diff --git a/upb/def.c b/upb/def.c index b92f6176ac..626214f454 100644 --- a/upb/def.c +++ b/upb/def.c @@ -44,7 +44,14 @@ typedef struct { char str[1]; /* Null-terminated string data follows. */ } str_t; +/* The upb core does not generally have a concept of default instances. However + * for descriptor options we make an exception since the max size is known and + * modest (<200 bytes). All types can share a default instance since it is + * initialized to zeroes. */ +static const char opt_default[_UPB_MAXOPT_SIZE] = {0}; + struct upb_fielddef { + const google_protobuf_FieldOptions *opts; const upb_filedef *file; const upb_msgdef *msgdef; const char *full_name; @@ -65,9 +72,9 @@ struct upb_fielddef { } sub; uint32_t number_; uint16_t index_; - uint16_t layout_index; + uint16_t layout_index; /* Index into msgdef->layout->fields or file->exts */ + bool has_default; bool is_extension_; - bool lazy_; bool packed_; bool proto3_optional_; upb_descriptortype_t type_; @@ -81,31 +88,40 @@ struct upb_extrange { }; struct upb_msgdef { + const google_protobuf_MessageOptions *opts; const upb_msglayout *layout; const upb_filedef *file; + const upb_msgdef *containing_type; const char *full_name; /* Tables for looking up fields by number and name. */ upb_inttable itof; upb_strtable ntof; - const upb_extrange *ext_ranges; + /* All nested defs. + * MEM: We could save some space here by putting nested defs in a contigous + * region and calculating counts from offets or vice-versa. */ const upb_fielddef *fields; const upb_oneofdef *oneofs; + const upb_extrange *ext_ranges; + const upb_msgdef *nested_msgs; + const upb_enumdef *nested_enums; + const upb_fielddef *nested_exts; int field_count; - int oneof_count; int real_oneof_count; + int oneof_count; int ext_range_count; - - /* Is this a map-entry message? */ - bool map_entry; - bool is_message_set; + int nested_msg_count; + int nested_enum_count; + int nested_ext_count; + bool in_message_set; upb_wellknowntype_t well_known_type; - const upb_fielddef *message_set_ext; }; struct upb_enumdef { + const google_protobuf_EnumOptions *opts; const upb_filedef *file; + const upb_msgdef *containing_type; // Could be merged with "file". const char *full_name; upb_strtable ntoi; upb_inttable iton; @@ -115,12 +131,14 @@ struct upb_enumdef { }; struct upb_enumvaldef { + const google_protobuf_EnumValueOptions *opts; const upb_enumdef *enum_; const char *full_name; int32_t number; }; struct upb_oneofdef { + const google_protobuf_OneofOptions *opts; const upb_msgdef *parent; const char *full_name; int field_count; @@ -131,25 +149,48 @@ struct upb_oneofdef { }; struct upb_filedef { + const google_protobuf_FileOptions *opts; const char *name; const char *package; - const char *phpprefix; - const char *phpnamespace; const upb_filedef **deps; - const upb_msgdef *msgs; - const upb_enumdef *enums; - const upb_fielddef *exts; + const int32_t* public_deps; + const upb_msgdef *top_lvl_msgs; + const upb_enumdef *top_lvl_enums; + const upb_fielddef *top_lvl_exts; + const upb_servicedef *services; const upb_msglayout_ext **ext_layouts; const upb_symtab *symtab; int dep_count; - int msg_count; - int enum_count; - int ext_count; + int public_dep_count; + int top_lvl_msg_count; + int top_lvl_enum_count; + int top_lvl_ext_count; + int service_count; + int ext_count; /* All exts in the file. */ upb_syntax_t syntax; }; +struct upb_methoddef { + const google_protobuf_MethodOptions *opts; + upb_servicedef *service; + const char *full_name; + const upb_msgdef *input_type; + const upb_msgdef *output_type; + bool client_streaming; + bool server_streaming; +}; + +struct upb_servicedef { + const google_protobuf_ServiceOptions *opts; + const upb_filedef *file; + const char *full_name; + upb_methoddef *methods; + int method_count; + int index; +}; + struct upb_symtab { upb_arena *arena; upb_strtable syms; /* full_name -> packed def ptr */ @@ -157,22 +198,33 @@ struct upb_symtab { upb_inttable exts; /* upb_msglayout_ext* -> upb_fielddef* */ upb_extreg *extreg; size_t bytes_loaded; + + // For compatibility with proto2, we have to accept json_names that conflict + // with field names or other json_names. This is very ill-advised, so we only + // allow this when it is needed (and hopefully these cases can be cleaned up + // and eliminated. When this is enabled, the results are not well-defined. + bool allow_name_conflicts; }; /* Inside a symtab we store tagged pointers to specific def types. */ typedef enum { UPB_DEFTYPE_MASK = 7, - UPB_DEFTYPE_FIELD = 0, - /* Only inside symtab table. */ + UPB_DEFTYPE_EXT = 0, UPB_DEFTYPE_MSG = 1, UPB_DEFTYPE_ENUM = 2, UPB_DEFTYPE_ENUMVAL = 3, + UPB_DEFTYPE_SERVICE = 4, /* Only inside message table. */ + UPB_DEFTYPE_FIELD = 0, UPB_DEFTYPE_ONEOF = 1, - UPB_DEFTYPE_FIELD_JSONNAME = 2 + UPB_DEFTYPE_FIELD_JSONNAME = 2, + + /* Only inside file table. */ + UPB_DEFTYPE_FILE = 0, + UPB_DEFTYPE_LAYOUT = 1 } upb_deftype_t; static upb_deftype_t deftype(upb_value v) { @@ -195,12 +247,12 @@ static upb_value pack_def(const void *ptr, upb_deftype_t type) { } /* isalpha() etc. from <ctype.h> are locale-dependent, which we don't want. */ -static bool upb_isbetween(char c, char low, char high) { +static bool upb_isbetween(uint8_t c, uint8_t low, uint8_t high) { return c >= low && c <= high; } static bool upb_isletter(char c) { - return upb_isbetween(c, 'A', 'Z') || upb_isbetween(c, 'a', 'z') || c == '_'; + return upb_isbetween(c | 0x20, 'a', 'z') || c == '_'; } static bool upb_isalphanum(char c) { @@ -288,6 +340,14 @@ static void assign_msg_wellknowntype(upb_msgdef *m) { /* upb_enumdef ****************************************************************/ +const google_protobuf_EnumOptions *upb_enumdef_options(const upb_enumdef *e) { + return e->opts; +} + +bool upb_enumdef_hasoptions(const upb_enumdef *e) { + return e->opts != (void*)opt_default; +} + const char *upb_enumdef_fullname(const upb_enumdef *e) { return e->full_name; } @@ -300,11 +360,19 @@ const upb_filedef *upb_enumdef_file(const upb_enumdef *e) { return e->file; } +const upb_msgdef *upb_enumdef_containingtype(const upb_enumdef *e) { + return e->containing_type; +} + int32_t upb_enumdef_default(const upb_enumdef *e) { UPB_ASSERT(upb_enumdef_lookupnum(e, e->defaultval)); return e->defaultval; } +int upb_enumdef_valuecount(const upb_enumdef *e) { + return e->value_count; +} + const upb_enumvaldef *upb_enumdef_lookupname(const upb_enumdef *def, const char *name, size_t len) { upb_value v; @@ -349,6 +417,15 @@ int32_t upb_enum_iter_number(upb_enum_iter *iter) { /* upb_enumvaldef *************************************************************/ +const google_protobuf_EnumValueOptions *upb_enumvaldef_options( + const upb_enumvaldef *e) { + return e->opts; +} + +bool upb_enumvaldef_hasoptions(const upb_enumvaldef *e) { + return e->opts != (void*)opt_default; +} + const upb_enumdef *upb_enumvaldef_enum(const upb_enumvaldef *ev) { return ev->enum_; } @@ -365,9 +442,36 @@ int32_t upb_enumvaldef_number(const upb_enumvaldef *ev) { return ev->number; } +/* upb_extrange ***************************************************************/ + +const google_protobuf_ExtensionRangeOptions *upb_extrange_options( + const upb_extrange *r) { + return r->opts; +} + +bool upb_extrange_hasoptions(const upb_extrange *r) { + return r->opts != (void*)opt_default; +} + +int32_t upb_extrange_start(const upb_extrange *e) { + return e->start; +} + +int32_t upb_extrange_end(const upb_extrange *e) { + return e->end; +} /* upb_fielddef ***************************************************************/ +const google_protobuf_FieldOptions *upb_fielddef_options( + const upb_fielddef *f) { + return f->opts; +} + +bool upb_fielddef_hasoptions(const upb_fielddef *f) { + return f->opts != (void*)opt_default; +} + const char *upb_fielddef_fullname(const upb_fielddef *f) { return f->full_name; } @@ -427,10 +531,6 @@ bool upb_fielddef_isextension(const upb_fielddef *f) { return f->is_extension_; } -bool upb_fielddef_lazy(const upb_fielddef *f) { - return f->lazy_; -} - bool upb_fielddef_packed(const upb_fielddef *f) { return f->packed_; } @@ -549,6 +649,10 @@ const upb_msglayout_ext *_upb_fielddef_extlayout(const upb_fielddef *f) { return f->file->ext_layouts[f->layout_index]; } +bool _upb_fielddef_proto3optional(const upb_fielddef *f) { + return f->proto3_optional_; +} + bool upb_fielddef_issubmsg(const upb_fielddef *f) { return upb_fielddef_type(f) == UPB_TYPE_MESSAGE; } @@ -571,6 +675,10 @@ bool upb_fielddef_ismap(const upb_fielddef *f) { upb_msgdef_mapentry(upb_fielddef_msgsubdef(f)); } +bool upb_fielddef_hasdefault(const upb_fielddef *f) { + return f->has_default; +} + bool upb_fielddef_hassubdef(const upb_fielddef *f) { return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM; } @@ -595,6 +703,14 @@ bool upb_fielddef_checkdescriptortype(int32_t type) { /* upb_msgdef *****************************************************************/ +const google_protobuf_MessageOptions *upb_msgdef_options(const upb_msgdef *m) { + return m->opts; +} + +bool upb_msgdef_hasoptions(const upb_msgdef *m) { + return m->opts != (void*)opt_default; +} + const char *upb_msgdef_fullname(const upb_msgdef *m) { return m->full_name; } @@ -603,6 +719,10 @@ const upb_filedef *upb_msgdef_file(const upb_msgdef *m) { return m->file; } +const upb_msgdef *upb_msgdef_containingtype(const upb_msgdef *m) { + return m->containing_type; +} + const char *upb_msgdef_name(const upb_msgdef *m) { return shortdefname(m->full_name); } @@ -640,16 +760,19 @@ const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name, } bool upb_msgdef_lookupname(const upb_msgdef *m, const char *name, size_t len, - const upb_fielddef **f, const upb_oneofdef **o) { + const upb_fielddef **out_f, + const upb_oneofdef **out_o) { upb_value val; if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) { return false; } - *o = unpack_def(val, UPB_DEFTYPE_ONEOF); - *f = unpack_def(val, UPB_DEFTYPE_FIELD); - return *o || *f; /* False if this was a JSON name. */ + const upb_fielddef *f = unpack_def(val, UPB_DEFTYPE_FIELD); + const upb_oneofdef *o = unpack_def(val, UPB_DEFTYPE_ONEOF); + if (out_f) *out_f = f; + if (out_o) *out_o = o; + return f || o; /* False if this was a JSON name. */ } const upb_fielddef *upb_msgdef_lookupjsonname(const upb_msgdef *m, @@ -691,6 +814,18 @@ int upb_msgdef_oneofcount(const upb_msgdef *m) { return m->oneof_count; } +int upb_msgdef_nestedmsgcount(const upb_msgdef *m) { + return m->nested_msg_count; +} + +int upb_msgdef_nestedenumcount(const upb_msgdef *m) { + return m->nested_enum_count; +} + +int upb_msgdef_nestedextcount(const upb_msgdef *m) { + return m->nested_ext_count; +} + int upb_msgdef_realoneofcount(const upb_msgdef *m) { return m->real_oneof_count; } @@ -714,8 +849,19 @@ const upb_oneofdef *upb_msgdef_oneof(const upb_msgdef *m, int i) { return &m->oneofs[i]; } -bool upb_msgdef_mapentry(const upb_msgdef *m) { - return m->map_entry; +const upb_msgdef *upb_msgdef_nestedmsg(const upb_msgdef *m, int i) { + UPB_ASSERT(i >= 0 && i < m->nested_msg_count); + return &m->nested_msgs[i]; +} + +const upb_enumdef *upb_msgdef_nestedenum(const upb_msgdef *m, int i) { + UPB_ASSERT(i >= 0 && i < m->nested_enum_count); + return &m->nested_enums[i]; +} + +const upb_fielddef *upb_msgdef_nestedext(const upb_msgdef *m, int i) { + UPB_ASSERT(i >= 0 && i < m->nested_ext_count); + return &m->nested_exts[i]; } upb_wellknowntype_t upb_msgdef_wellknowntype(const upb_msgdef *m) { @@ -793,6 +939,14 @@ bool upb_msg_oneof_iter_isequal(const upb_msg_oneof_iter *iter1, /* upb_oneofdef ***************************************************************/ +const google_protobuf_OneofOptions *upb_oneofdef_options(const upb_oneofdef *o) { + return o->opts; +} + +bool upb_oneofdef_hasoptions(const upb_oneofdef *o) { + return o->opts != (void*)opt_default; +} + const char *upb_oneofdef_name(const upb_oneofdef *o) { return shortdefname(o->full_name); } @@ -857,54 +1011,174 @@ void upb_oneof_iter_setdone(upb_oneof_iter *iter) { /* upb_filedef ****************************************************************/ -const char *upb_filedef_name(const upb_filedef *f) { - return f->name; +const google_protobuf_FileOptions *upb_filedef_options(const upb_filedef *f) { + return f->opts; } -const char *upb_filedef_package(const upb_filedef *f) { - return f->package; +bool upb_filedef_hasoptions(const upb_filedef *f) { + return f->opts != (void*)opt_default; } -const char *upb_filedef_phpprefix(const upb_filedef *f) { - return f->phpprefix; +const char *upb_filedef_name(const upb_filedef *f) { + return f->name; } -const char *upb_filedef_phpnamespace(const upb_filedef *f) { - return f->phpnamespace; +const char *upb_filedef_package(const upb_filedef *f) { + return f->package; } upb_syntax_t upb_filedef_syntax(const upb_filedef *f) { return f->syntax; } -int upb_filedef_msgcount(const upb_filedef *f) { - return f->msg_count; +int upb_filedef_toplvlmsgcount(const upb_filedef *f) { + return f->top_lvl_msg_count; } int upb_filedef_depcount(const upb_filedef *f) { return f->dep_count; } -int upb_filedef_enumcount(const upb_filedef *f) { - return f->enum_count; +int upb_filedef_publicdepcount(const upb_filedef *f) { + return f->public_dep_count; +} + +const int32_t *_upb_filedef_publicdepnums(const upb_filedef *f) { + return f->public_deps; +} + +int upb_filedef_toplvlenumcount(const upb_filedef *f) { + return f->top_lvl_enum_count; +} + +int upb_filedef_toplvlextcount(const upb_filedef *f) { + return f->top_lvl_ext_count; +} + +int upb_filedef_servicecount(const upb_filedef *f) { + return f->service_count; } const upb_filedef *upb_filedef_dep(const upb_filedef *f, int i) { - return i < 0 || i >= f->dep_count ? NULL : f->deps[i]; + UPB_ASSERT(0 <= i && i < f->dep_count); + return f->deps[i]; } -const upb_msgdef *upb_filedef_msg(const upb_filedef *f, int i) { - return i < 0 || i >= f->msg_count ? NULL : &f->msgs[i]; +const upb_filedef *upb_filedef_publicdep(const upb_filedef *f, int i) { + UPB_ASSERT(0 <= i && i < f->public_dep_count); + return f->deps[f->public_deps[i]]; } -const upb_enumdef *upb_filedef_enum(const upb_filedef *f, int i) { - return i < 0 || i >= f->enum_count ? NULL : &f->enums[i]; +const upb_msgdef *upb_filedef_toplvlmsg(const upb_filedef *f, int i) { + UPB_ASSERT(0 <= i && i < f->top_lvl_msg_count); + return &f->top_lvl_msgs[i]; +} + +const upb_enumdef *upb_filedef_toplvlenum(const upb_filedef *f, int i) { + UPB_ASSERT(0 <= i && i < f->top_lvl_enum_count); + return &f->top_lvl_enums[i]; +} + +const upb_fielddef *upb_filedef_toplvlext(const upb_filedef *f, int i) { + UPB_ASSERT(0 <= i && i < f->top_lvl_ext_count); + return &f->top_lvl_exts[i]; +} + +const upb_servicedef *upb_filedef_service(const upb_filedef *f, int i) { + UPB_ASSERT(0 <= i && i < f->service_count); + return &f->services[i]; } const upb_symtab *upb_filedef_symtab(const upb_filedef *f) { return f->symtab; } +/* upb_methoddef **************************************************************/ + +const google_protobuf_MethodOptions *upb_methoddef_options( + const upb_methoddef *m) { + return m->opts; +} + +bool upb_methoddef_hasoptions(const upb_methoddef *m) { + return m->opts != (void*)opt_default; +} + +const char *upb_methoddef_fullname(const upb_methoddef *m) { + return m->full_name; +} + +const char *upb_methoddef_name(const upb_methoddef *m) { + return shortdefname(m->full_name); +} + +const upb_servicedef *upb_methoddef_service(const upb_methoddef *m) { + return m->service; +} + +const upb_msgdef *upb_methoddef_inputtype(const upb_methoddef *m) { + return m->input_type; +} + +const upb_msgdef *upb_methoddef_outputtype(const upb_methoddef *m) { + return m->output_type; +} + +bool upb_methoddef_clientstreaming(const upb_methoddef *m) { + return m->client_streaming; +} + +bool upb_methoddef_serverstreaming(const upb_methoddef *m) { + return m->server_streaming; +} + +/* upb_servicedef *************************************************************/ + +const google_protobuf_ServiceOptions *upb_servicedef_options( + const upb_servicedef *s) { + return s->opts; +} + +bool upb_servicedef_hasoptions(const upb_servicedef *s) { + return s->opts != (void*)opt_default; +} + +const char *upb_servicedef_fullname(const upb_servicedef *s) { + return s->full_name; +} + +const char *upb_servicedef_name(const upb_servicedef *s) { + return shortdefname(s->full_name); +} + +int upb_servicedef_index(const upb_servicedef *s) { + return s->index; +} + +const upb_filedef *upb_servicedef_file(const upb_servicedef *s) { + return s->file; +} + +int upb_servicedef_methodcount(const upb_servicedef *s) { + return s->method_count; +} + +const upb_methoddef *upb_servicedef_method(const upb_servicedef *s, int i) { + return i < 0 || i >= s->method_count ? NULL : &s->methods[i]; +} + +const upb_methoddef *upb_servicedef_lookupmethod(const upb_servicedef *s, + const char *name) { + for (int i = 0; i < s->method_count; i++) { + if (strcmp(name, upb_methoddef_name(&s->methods[i])) == 0) { + return &s->methods[i]; + } + } + return NULL; +} + +/* upb_symtab *****************************************************************/ + void upb_symtab_free(upb_symtab *s) { upb_arena_free(s->arena); upb_gfree(s); @@ -919,6 +1193,7 @@ upb_symtab *upb_symtab_new(void) { s->arena = upb_arena_new(); s->bytes_loaded = 0; + s->allow_name_conflicts = false; if (!upb_strtable_init(&s->syms, 32, s->arena) || !upb_strtable_init(&s->files, 4, s->arena) || @@ -936,10 +1211,14 @@ err: return NULL; } -const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) { +void _upb_symtab_allownameconflicts(upb_symtab *s) { + s->allow_name_conflicts = true; +} + +static const void *symtab_lookup(const upb_symtab *s, const char *sym, + upb_deftype_t type) { upb_value v; - return upb_strtable_lookup(&s->syms, sym, &v) ? - unpack_def(v, UPB_DEFTYPE_MSG) : NULL; + return upb_strtable_lookup(&s->syms, sym, &v) ? unpack_def(v, type) : NULL; } static const void *symtab_lookup2(const upb_symtab *s, const char *sym, @@ -949,22 +1228,37 @@ static const void *symtab_lookup2(const upb_symtab *s, const char *sym, : NULL; } +const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) { + return symtab_lookup(s, sym, UPB_DEFTYPE_MSG); +} + const upb_msgdef *upb_symtab_lookupmsg2(const upb_symtab *s, const char *sym, size_t len) { return symtab_lookup2(s, sym, len, UPB_DEFTYPE_MSG); } const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) { - upb_value v; - return upb_strtable_lookup(&s->syms, sym, &v) ? - unpack_def(v, UPB_DEFTYPE_ENUM) : NULL; + return symtab_lookup(s, sym, UPB_DEFTYPE_ENUM); } const upb_enumvaldef *upb_symtab_lookupenumval(const upb_symtab *s, const char *sym) { + return symtab_lookup(s, sym, UPB_DEFTYPE_ENUMVAL); +} + +const upb_filedef *upb_symtab_lookupfile(const upb_symtab *s, + const char *name) { upb_value v; - return upb_strtable_lookup(&s->syms, sym, &v) - ? unpack_def(v, UPB_DEFTYPE_ENUMVAL) + return upb_strtable_lookup(&s->files, name, &v) + ? unpack_def(v, UPB_DEFTYPE_FILE) + : NULL; +} + +const upb_filedef *upb_symtab_lookupfile2(const upb_symtab *s, const char *name, + size_t len) { + upb_value v; + return upb_strtable_lookup2(&s->files, name, len, &v) + ? unpack_def(v, UPB_DEFTYPE_FILE) : NULL; } @@ -978,7 +1272,7 @@ const upb_fielddef *upb_symtab_lookupext2(const upb_symtab *s, const char *name, return unpack_def(v, UPB_DEFTYPE_FIELD); case UPB_DEFTYPE_MSG: { const upb_msgdef *m = unpack_def(v, UPB_DEFTYPE_MSG); - return m->message_set_ext; /* May be NULL if not in MessageeSet. */ + return m->in_message_set ? &m->nested_exts[0] : NULL; } default: break; @@ -991,21 +1285,55 @@ const upb_fielddef *upb_symtab_lookupext(const upb_symtab *s, const char *sym) { return upb_symtab_lookupext2(s, sym, strlen(sym)); } -const upb_filedef *upb_symtab_lookupfile(const upb_symtab *s, const char *name) { - upb_value v; - return upb_strtable_lookup(&s->files, name, &v) ? upb_value_getconstptr(v) - : NULL; +const upb_servicedef *upb_symtab_lookupservice(const upb_symtab *s, + const char *name) { + return symtab_lookup(s, name, UPB_DEFTYPE_SERVICE); } -const upb_filedef *upb_symtab_lookupfile2( - const upb_symtab *s, const char *name, size_t len) { +const upb_filedef *upb_symtab_lookupfileforsym(const upb_symtab *s, + const char *name) { upb_value v; - return upb_strtable_lookup2(&s->files, name, len, &v) ? - upb_value_getconstptr(v) : NULL; -} + // TODO(haberman): non-extension fields and oneofs. + if (upb_strtable_lookup(&s->syms, name, &v)) { + switch (deftype(v)) { + case UPB_DEFTYPE_EXT: { + const upb_fielddef *f = unpack_def(v, UPB_DEFTYPE_EXT); + return upb_fielddef_file(f); + } + case UPB_DEFTYPE_MSG: { + const upb_msgdef *m = unpack_def(v, UPB_DEFTYPE_MSG); + return upb_msgdef_file(m); + } + case UPB_DEFTYPE_ENUM: { + const upb_enumdef *e = unpack_def(v, UPB_DEFTYPE_ENUM); + return upb_enumdef_file(e); + } + case UPB_DEFTYPE_ENUMVAL: { + const upb_enumvaldef *ev = unpack_def(v, UPB_DEFTYPE_ENUMVAL); + return upb_enumdef_file(upb_enumvaldef_enum(ev)); + } + case UPB_DEFTYPE_SERVICE: { + const upb_servicedef *service = unpack_def(v, UPB_DEFTYPE_SERVICE); + return upb_servicedef_file(service); + } + default: + UPB_UNREACHABLE(); + } + } + + const char *last_dot = strrchr(name, '.'); + if (last_dot) { + const upb_msgdef *parent = upb_symtab_lookupmsg2(s, name, last_dot - name); + if (parent) { + const char *shortname = last_dot + 1; + if (upb_msgdef_lookupname(parent, shortname, strlen(shortname), NULL, + NULL)) { + return upb_msgdef_file(parent); + } + } + } -int upb_symtab_filecount(const upb_symtab *s) { - return (int)upb_strtable_count(&s->files); + return NULL; } /* Code to build defs from descriptor protos. *********************************/ @@ -1021,6 +1349,7 @@ typedef struct { upb_symtab *symtab; upb_filedef *file; /* File we are building. */ upb_arena *arena; /* Allocate defs here. */ + upb_arena *tmp_arena; /* For temporary allocations. */ const upb_msglayout_file *layout; /* NULL if we should build layouts. */ int enum_count; /* Count of enums built so far. */ int msg_count; /* Count of messages built so far. */ @@ -1045,11 +1374,26 @@ static void symtab_oomerr(symtab_addctx *ctx) { } void *symtab_alloc(symtab_addctx *ctx, size_t bytes) { + if (bytes == 0) return NULL; void *ret = upb_arena_malloc(ctx->arena, bytes); if (!ret) symtab_oomerr(ctx); return ret; } +// We want to copy the options verbatim into the destination options proto. +// We use serialize+parse as our deep copy. +#define SET_OPTIONS(target, desc_type, options_type, proto) \ + if (google_protobuf_##desc_type##_has_options(proto)) { \ + size_t size; \ + char *pb = google_protobuf_##options_type##_serialize( \ + google_protobuf_##desc_type##_options(proto), ctx->tmp_arena, &size); \ + CHK_OOM(pb); \ + target = google_protobuf_##options_type##_parse(pb, size, ctx->arena); \ + CHK_OOM(target); \ + } else { \ + target = (const google_protobuf_##options_type *)&opt_default; \ + } + static void check_ident(symtab_addctx *ctx, upb_strview name, bool full) { const char *str = name.data; size_t len = name.size; @@ -1120,13 +1464,18 @@ static uint8_t upb_msg_fielddefsize(const upb_fielddef *f) { } } -static uint32_t upb_msglayout_place(upb_msglayout *l, size_t size) { - uint32_t ret; +static uint32_t upb_msglayout_place(symtab_addctx *ctx, upb_msglayout *l, + size_t size, const upb_msgdef *m) { + size_t ofs = UPB_ALIGN_UP(l->size, size); + size_t next = ofs + size; - l->size = UPB_ALIGN_UP(l->size, size); - ret = l->size; - l->size += size; - return ret; + if (next > UINT16_MAX) { + symtab_errf(ctx, "size of message %s exceeded max size of %zu bytes", + upb_msgdef_fullname(m), (size_t)UINT16_MAX); + } + + l->size = next; + return ofs; } static int field_number_cmp(const void *p1, const void *p2) { @@ -1152,16 +1501,22 @@ static void assign_layout_indices(const upb_msgdef *m, upb_msglayout *l, l->dense_below = dense_below; } +static uint8_t map_descriptortype(const upb_fielddef *f) { + uint8_t type = upb_fielddef_descriptortype(f); + /* See TableDescriptorType() in upbc/generator.cc for details and + * rationale of these exceptions. */ + if (type == UPB_DTYPE_STRING && f->file->syntax == UPB_SYNTAX_PROTO2) { + return UPB_DTYPE_BYTES; + } else if (type == UPB_DTYPE_ENUM && + f->sub.enumdef->file->syntax == UPB_SYNTAX_PROTO3) { + return UPB_DTYPE_INT32; + } + return type; +} + static void fill_fieldlayout(upb_msglayout_field *field, const upb_fielddef *f) { field->number = upb_fielddef_number(f); - field->descriptortype = upb_fielddef_descriptortype(f); - - if (field->descriptortype == UPB_DTYPE_STRING && - f->file->syntax == UPB_SYNTAX_PROTO2) { - /* See TableDescriptorType() in upbc/generator.cc for details and - * rationale. */ - field->descriptortype = UPB_DTYPE_BYTES; - } + field->descriptortype = map_descriptortype(f); if (upb_fielddef_ismap(f)) { field->mode = _UPB_MODE_MAP | (_UPB_REP_PTR << _UPB_REP_SHIFT); @@ -1207,9 +1562,6 @@ static void fill_fieldlayout(upb_msglayout_field *field, const upb_fielddef *f) * It computes a dynamic layout for all of the fields in |m|. */ static void make_layout(symtab_addctx *ctx, const upb_msgdef *m) { upb_msglayout *l = (upb_msglayout*)m->layout; - upb_msg_field_iter it; - upb_msg_oneof_iter oit; - size_t hasbit; size_t field_count = upb_msgdef_numfields(m); size_t sublayout_count = 0; upb_msglayout_sub *subs; @@ -1219,7 +1571,12 @@ static void make_layout(symtab_addctx *ctx, const upb_msgdef *m) { /* Count sub-messages. */ for (size_t i = 0; i < field_count; i++) { - if (upb_fielddef_issubmsg(&m->fields[i])) { + const upb_fielddef *f = &m->fields[i]; + if (upb_fielddef_issubmsg(f)) { + sublayout_count++; + } + if (upb_fielddef_type(f) == UPB_TYPE_ENUM && + f->sub.enumdef->file->syntax == UPB_SYNTAX_PROTO2) { sublayout_count++; } } @@ -1233,7 +1590,7 @@ static void make_layout(symtab_addctx *ctx, const upb_msgdef *m) { l->table_mask = 0; if (upb_msgdef_extrangecount(m) > 0) { - if (m->is_message_set) { + if (google_protobuf_MessageOptions_message_set_wire_format(m->opts)) { l->ext = _UPB_MSGEXT_MSGSET; } else { l->ext = _UPB_MSGEXT_EXTENDABLE; @@ -1258,8 +1615,8 @@ static void make_layout(symtab_addctx *ctx, const upb_msgdef *m) { fields[1].mode = _UPB_MODE_SCALAR; fields[0].presence = 0; fields[1].presence = 0; - fields[0].descriptortype = upb_fielddef_descriptortype(key); - fields[1].descriptortype = upb_fielddef_descriptortype(val); + fields[0].descriptortype = map_descriptortype(key); + fields[1].descriptortype = map_descriptortype(val); fields[0].offset = 0; fields[1].offset = sizeof(upb_strview); fields[1].submsg_index = 0; @@ -1268,9 +1625,16 @@ static void make_layout(symtab_addctx *ctx, const upb_msgdef *m) { subs[0].submsg = upb_fielddef_msgsubdef(val)->layout; } + upb_fielddef *fielddefs = (upb_fielddef*)&m->fields[0]; + UPB_ASSERT(fielddefs[0].number_ == 1); + UPB_ASSERT(fielddefs[1].number_ == 2); + fielddefs[0].layout_index = 0; + fielddefs[1].layout_index = 1; + l->field_count = 2; l->size = 2 * sizeof(upb_strview); l->size = UPB_ALIGN_UP(l->size, 8); + l->dense_below = 2; return; } @@ -1283,23 +1647,25 @@ static void make_layout(symtab_addctx *ctx, const upb_msgdef *m) { * OPT: There is a lot of room for optimization here to minimize the size. */ + /* Assign hasbits for required fields first. */ + size_t hasbit = 0; + /* Allocate hasbits and set basic field attributes. */ sublayout_count = 0; - for (upb_msg_field_begin(&it, m), hasbit = 0; - !upb_msg_field_done(&it); - upb_msg_field_next(&it)) { - upb_fielddef* f = upb_msg_iter_field(&it); + for (int i = 0; i < m->field_count; i++) { + const upb_fielddef* f = &m->fields[i]; upb_msglayout_field *field = &fields[upb_fielddef_index(f)]; fill_fieldlayout(field, f); if (upb_fielddef_issubmsg(f)) { - const upb_msgdef *subm = upb_fielddef_msgsubdef(f); field->submsg_index = sublayout_count++; - subs[field->submsg_index].submsg = subm->layout; + subs[field->submsg_index].submsg = upb_fielddef_msgsubdef(f)->layout; } - if (upb_fielddef_haspresence(f) && !upb_fielddef_realcontainingoneof(f)) { + if (upb_fielddef_label(f) == UPB_LABEL_REQUIRED) { + /* Hasbit was already assigned. */ + } else if (upb_fielddef_haspresence(f) && !upb_fielddef_realcontainingoneof(f)) { /* We don't use hasbit 0, so that 0 can indicate "no presence" in the * table. This wastes one hasbit, but we don't worry about it for now. */ field->presence = ++hasbit; @@ -1312,9 +1678,8 @@ static void make_layout(symtab_addctx *ctx, const upb_msgdef *m) { l->size = div_round_up(hasbit, 8); /* Allocate non-oneof fields. */ - for (upb_msg_field_begin(&it, m); !upb_msg_field_done(&it); - upb_msg_field_next(&it)) { - const upb_fielddef* f = upb_msg_iter_field(&it); + for (int i = 0; i < m->field_count; i++) { + const upb_fielddef* f = &m->fields[i]; size_t field_size = upb_msg_fielddefsize(f); size_t index = upb_fielddef_index(f); @@ -1323,14 +1688,13 @@ static void make_layout(symtab_addctx *ctx, const upb_msgdef *m) { continue; } - fields[index].offset = upb_msglayout_place(l, field_size); + fields[index].offset = upb_msglayout_place(ctx, l, field_size, m); } /* Allocate oneof fields. Each oneof field consists of a uint32 for the case * and space for the actual data. */ - for (upb_msg_oneof_begin(&oit, m); !upb_msg_oneof_done(&oit); - upb_msg_oneof_next(&oit)) { - const upb_oneofdef* o = upb_msg_iter_oneof(&oit); + for (int i = 0; i < m->oneof_count; i++) { + const upb_oneofdef* o = &m->oneofs[i]; upb_oneof_iter fit; size_t case_size = sizeof(uint32_t); /* Could potentially optimize this. */ @@ -1349,13 +1713,11 @@ static void make_layout(symtab_addctx *ctx, const upb_msgdef *m) { } /* Align and allocate case offset. */ - case_offset = upb_msglayout_place(l, case_size); - data_offset = upb_msglayout_place(l, field_size); + case_offset = upb_msglayout_place(ctx, l, case_size, m); + data_offset = upb_msglayout_place(ctx, l, field_size, m); - for (upb_oneof_begin(&fit, o); - !upb_oneof_done(&fit); - upb_oneof_next(&fit)) { - const upb_fielddef* f = upb_oneof_iter_field(&fit); + for (int i = 0; i < o->field_count; i++) { + const upb_fielddef* f = o->fields[i]; fields[upb_fielddef_index(f)].offset = data_offset; fields[upb_fielddef_index(f)].presence = ~case_offset; } @@ -1480,6 +1842,9 @@ static char* makejsonname(symtab_addctx *ctx, const char* name) { return json_name; } +/* Adds a symbol to the symtab. The def's pointer to upb_filedef* must be set + * before adding, so we know which entries to remove if building this file + * fails. */ static void symtab_add(symtab_addctx *ctx, const char *name, upb_value v) { // TODO: table should support an operation "tryinsert" to avoid the double // lookup. @@ -1491,55 +1856,98 @@ static void symtab_add(symtab_addctx *ctx, const char *name, upb_value v) { ctx->symtab->arena)); } +static bool remove_component(char *base, size_t *len) { + if (*len == 0) return false; + + for (size_t i = *len - 1; i > 0; i--) { + if (base[i] == '.') { + *len = i; + return true; + } + } + + *len = 0; + return true; +} + /* Given a symbol and the base symbol inside which it is defined, find the * symbol's definition in t. */ -static const void *symtab_resolve(symtab_addctx *ctx, const upb_fielddef *f, - const char *base, upb_strview sym, - upb_deftype_t type) { +static const void *symtab_resolveany(symtab_addctx *ctx, + const char *from_name_dbg, + const char *base, upb_strview sym, + upb_deftype_t *type) { const upb_strtable *t = &ctx->symtab->syms; if(sym.size == 0) goto notfound; + upb_value v; if(sym.data[0] == '.') { /* Symbols starting with '.' are absolute, so we do a single lookup. * Slice to omit the leading '.' */ - upb_value v; if (!upb_strtable_lookup2(t, sym.data + 1, sym.size - 1, &v)) { goto notfound; } - - const void *ret = unpack_def(v, type); - if (!ret) { - symtab_errf(ctx, "type mismatch when resolving field %s, name %s", - f->full_name, sym.data); - } - return ret; } else { - /* Remove components from base until we find an entry or run out. - * TODO: This branch is totally broken, but currently not used. */ - (void)base; - UPB_ASSERT(false); - goto notfound; + /* Remove components from base until we find an entry or run out. */ + size_t baselen = strlen(base); + char *tmp = malloc(sym.size + strlen(base) + 1); + while (1) { + char *p = tmp; + if (baselen) { + memcpy(p, base, baselen); + p[baselen] = '.'; + p += baselen + 1; + } + memcpy(p, sym.data, sym.size); + p += sym.size; + if (upb_strtable_lookup2(t, tmp, p - tmp, &v)) { + break; + } + if (!remove_component(tmp, &baselen)) { + free(tmp); + goto notfound; + } + } + free(tmp); } + *type = deftype(v); + return unpack_def(v, *type); + notfound: symtab_errf(ctx, "couldn't resolve name '" UPB_STRVIEW_FORMAT "'", UPB_STRVIEW_ARGS(sym)); } +static const void *symtab_resolve(symtab_addctx *ctx, const char *from_name_dbg, + const char *base, upb_strview sym, + upb_deftype_t type) { + upb_deftype_t found_type; + const void *ret = + symtab_resolveany(ctx, from_name_dbg, base, sym, &found_type); + if (ret && found_type != type) { + symtab_errf( + ctx, + "type mismatch when resolving %s: couldn't find name %s with type=%d", + from_name_dbg, sym.data, (int)type); + } + return ret; +} + static void create_oneofdef( symtab_addctx *ctx, upb_msgdef *m, - const google_protobuf_OneofDescriptorProto *oneof_proto) { - upb_oneofdef *o; + const google_protobuf_OneofDescriptorProto *oneof_proto, + const upb_oneofdef *_o) { + upb_oneofdef *o = (upb_oneofdef *)_o; upb_strview name = google_protobuf_OneofDescriptorProto_name(oneof_proto); upb_value v; - o = (upb_oneofdef*)&m->oneofs[m->oneof_count++]; o->parent = m; o->full_name = makefullname(ctx, m->full_name, name); o->field_count = 0; o->synthetic = false; + SET_OPTIONS(o->opts, OneofDescriptorProto, OneofOptions, oneof_proto); + v = pack_def(o, UPB_DEFTYPE_ONEOF); - symtab_add(ctx, o->full_name, v); CHK_OOM(upb_strtable_insert(&m->ntof, name.data, name.size, v, ctx->arena)); CHK_OOM(upb_inttable_init(&o->itof, ctx->arena)); @@ -1644,6 +2052,7 @@ static void parse_default(symtab_addctx *ctx, const char *str, size_t len, } else if (streql2(str, len, "true")) { f->defaultval.boolean = true; } else { + goto invalid; } break; } @@ -1663,15 +2072,14 @@ static void parse_default(symtab_addctx *ctx, const char *str, size_t len, return; invalid: - symtab_errf(ctx, "Invalid default '%.*s' for field %s", (int)len, str, - upb_fielddef_fullname(f)); + symtab_errf(ctx, "Invalid default '%.*s' for field %s of type %d", (int)len, str, + upb_fielddef_fullname(f), (int)upb_fielddef_descriptortype(f)); } static void set_default_default(symtab_addctx *ctx, upb_fielddef *f) { switch (upb_fielddef_type(f)) { case UPB_TYPE_INT32: case UPB_TYPE_INT64: - case UPB_TYPE_ENUM: f->defaultval.sint = 0; break; case UPB_TYPE_UINT64: @@ -1689,6 +2097,8 @@ static void set_default_default(symtab_addctx *ctx, upb_fielddef *f) { case UPB_TYPE_BOOL: f->defaultval.boolean = false; break; + case UPB_TYPE_ENUM: + f->defaultval.sint = f->sub.enumdef->values[0].number; case UPB_TYPE_MESSAGE: break; } @@ -1696,15 +2106,17 @@ static void set_default_default(symtab_addctx *ctx, upb_fielddef *f) { static void create_fielddef( symtab_addctx *ctx, const char *prefix, upb_msgdef *m, - const google_protobuf_FieldDescriptorProto *field_proto) { - upb_fielddef *f; - const google_protobuf_FieldOptions *options; + const google_protobuf_FieldDescriptorProto *field_proto, + const upb_fielddef *_f) { + upb_fielddef *f = (upb_fielddef*)_f; upb_strview name; const char *full_name; const char *json_name; const char *shortname; uint32_t field_number; + f->file = ctx->file; /* Must happen prior to symtab_add(). */ + if (!google_protobuf_FieldDescriptorProto_has_name(field_proto)) { symtab_errf(ctx, "field has no name (%s)", upb_msgdef_fullname(m)); } @@ -1727,41 +2139,84 @@ static void create_fielddef( symtab_errf(ctx, "invalid field number (%u)", field_number); } + f->full_name = full_name; + f->json_name = json_name; + f->label_ = (int)google_protobuf_FieldDescriptorProto_label(field_proto); + f->number_ = field_number; + f->oneof = NULL; + f->proto3_optional_ = + google_protobuf_FieldDescriptorProto_proto3_optional(field_proto); + + bool has_type = google_protobuf_FieldDescriptorProto_has_type(field_proto); + bool has_type_name = + google_protobuf_FieldDescriptorProto_has_type_name(field_proto); + + f->type_ = (int)google_protobuf_FieldDescriptorProto_type(field_proto); + + if (has_type) { + switch (f->type_) { + case UPB_DTYPE_MESSAGE: + case UPB_DTYPE_GROUP: + case UPB_DTYPE_ENUM: + if (!has_type_name) { + symtab_errf(ctx, "field of type %d requires type name (%s)", + (int)f->type_, full_name); + } + break; + default: + if (has_type_name) { + symtab_errf(ctx, "invalid type for field with type_name set (%s, %d)", + full_name, (int)f->type_); + } + } + } else if (has_type_name) { + f->type_ = 0; // We'll fill this in in resolve_fielddef(). + } + if (m) { /* direct message field. */ - upb_value v, field_v, json_v; + upb_value v, field_v, json_v, existing_v; size_t json_size; - f = (upb_fielddef*)&m->fields[m->field_count]; - f->index_ = m->field_count++; + f->index_ = f - m->fields; f->msgdef = m; f->is_extension_ = false; - if (upb_strtable_lookup(&m->ntof, shortname, NULL)) { - symtab_errf(ctx, "duplicate field name (%s)", shortname); - } - - if (upb_strtable_lookup(&m->ntof, json_name, NULL)) { - symtab_errf(ctx, "duplicate json_name (%s)", json_name); - } - - if (upb_inttable_lookup(&m->itof, field_number, NULL)) { - symtab_errf(ctx, "duplicate field number (%u)", field_number); - } - field_v = pack_def(f, UPB_DEFTYPE_FIELD); json_v = pack_def(f, UPB_DEFTYPE_FIELD_JSONNAME); v = upb_value_constptr(f); json_size = strlen(json_name); + if (upb_strtable_lookup(&m->ntof, shortname, &existing_v)) { + if (ctx->symtab->allow_name_conflicts && + deftype(existing_v) == UPB_DEFTYPE_FIELD_JSONNAME) { + // Field name takes precedence over json name. + upb_strtable_remove(&m->ntof, shortname, strlen(shortname), NULL); + } else { + symtab_errf(ctx, "duplicate field name (%s)", shortname); + } + } + CHK_OOM(upb_strtable_insert(&m->ntof, name.data, name.size, field_v, ctx->arena)); - CHK_OOM(upb_inttable_insert(&m->itof, field_number, v, ctx->arena)); if (strcmp(shortname, json_name) != 0) { - upb_strtable_insert(&m->ntof, json_name, json_size, json_v, ctx->arena); + if (upb_strtable_lookup(&m->ntof, json_name, &v)) { + if (!ctx->symtab->allow_name_conflicts) { + symtab_errf(ctx, "duplicate json_name (%s)", json_name); + } + } else { + CHK_OOM(upb_strtable_insert(&m->ntof, json_name, json_size, json_v, + ctx->arena)); + } + } + + if (upb_inttable_lookup(&m->itof, field_number, NULL)) { + symtab_errf(ctx, "duplicate field number (%u)", field_number); } + CHK_OOM(upb_inttable_insert(&m->itof, field_number, v, ctx->arena)); + if (ctx->layout) { const upb_msglayout_field *fields = m->layout->fields; int count = m->layout->field_count; @@ -1777,26 +2232,23 @@ static void create_fielddef( } } else { /* extension field. */ - uint16_t layout_index = ctx->ext_count++; - f = (upb_fielddef*)&ctx->file->exts[layout_index]; - f->layout_index = layout_index; f->is_extension_ = true; - symtab_add(ctx, full_name, pack_def(f, UPB_DEFTYPE_FIELD)); + symtab_add(ctx, full_name, pack_def(f, UPB_DEFTYPE_EXT)); + f->layout_index = ctx->ext_count++; if (ctx->layout) { UPB_ASSERT(ctx->file->ext_layouts[f->layout_index]->field.number == field_number); } } - f->full_name = full_name; - f->json_name = json_name; - f->file = ctx->file; - f->type_ = (int)google_protobuf_FieldDescriptorProto_type(field_proto); - f->label_ = (int)google_protobuf_FieldDescriptorProto_label(field_proto); - f->number_ = field_number; - f->oneof = NULL; - f->proto3_optional_ = - google_protobuf_FieldDescriptorProto_proto3_optional(field_proto); + if (f->type_ < UPB_DTYPE_DOUBLE || f->type_ > UPB_DTYPE_SINT64) { + symtab_errf(ctx, "invalid type for field %s (%d)", f->full_name, f->type_); + } + + if (f->label_ < UPB_LABEL_OPTIONAL || f->label_ > UPB_LABEL_REPEATED) { + symtab_errf(ctx, "invalid label for field %s (%d)", f->full_name, + f->label_); + } /* We can't resolve the subdef or (in the case of extensions) the containing * message yet, because it may not have been defined yet. We stash a pointer @@ -1845,36 +2297,79 @@ static void create_fielddef( } } - options = google_protobuf_FieldDescriptorProto_has_options(field_proto) ? - google_protobuf_FieldDescriptorProto_options(field_proto) : NULL; + SET_OPTIONS(f->opts, FieldDescriptorProto, FieldOptions, field_proto); - if (options && google_protobuf_FieldOptions_has_packed(options)) { - f->packed_ = google_protobuf_FieldOptions_packed(options); + if (google_protobuf_FieldOptions_has_packed(f->opts)) { + f->packed_ = google_protobuf_FieldOptions_packed(f->opts); } else { /* Repeated fields default to packed for proto3 only. */ f->packed_ = upb_fielddef_isprimitive(f) && f->label_ == UPB_LABEL_REPEATED && f->file->syntax == UPB_SYNTAX_PROTO3; } +} - if (options) { - f->lazy_ = google_protobuf_FieldOptions_lazy(options); - } else { - f->lazy_ = false; +static void create_service( + symtab_addctx *ctx, const google_protobuf_ServiceDescriptorProto *svc_proto, + const upb_servicedef *_s) { + upb_servicedef *s = (upb_servicedef*)_s; + upb_strview name; + const google_protobuf_MethodDescriptorProto *const *methods; + size_t i, n; + + s->file = ctx->file; /* Must happen prior to symtab_add. */ + + name = google_protobuf_ServiceDescriptorProto_name(svc_proto); + check_ident(ctx, name, false); + s->full_name = makefullname(ctx, ctx->file->package, name); + symtab_add(ctx, s->full_name, pack_def(s, UPB_DEFTYPE_SERVICE)); + + methods = google_protobuf_ServiceDescriptorProto_method(svc_proto, &n); + + s->method_count = n; + s->methods = symtab_alloc(ctx, sizeof(*s->methods) * n); + + SET_OPTIONS(s->opts, ServiceDescriptorProto, ServiceOptions, svc_proto); + + for (i = 0; i < n; i++) { + const google_protobuf_MethodDescriptorProto *method_proto = methods[i]; + upb_methoddef *m = (upb_methoddef*)&s->methods[i]; + upb_strview name = google_protobuf_MethodDescriptorProto_name(method_proto); + + m->service = s; + m->full_name = makefullname(ctx, s->full_name, name); + m->client_streaming = + google_protobuf_MethodDescriptorProto_client_streaming(method_proto); + m->server_streaming = + google_protobuf_MethodDescriptorProto_server_streaming(method_proto); + m->input_type = symtab_resolve( + ctx, m->full_name, m->full_name, + google_protobuf_MethodDescriptorProto_input_type(method_proto), + UPB_DEFTYPE_MSG); + m->output_type = symtab_resolve( + ctx, m->full_name, m->full_name, + google_protobuf_MethodDescriptorProto_output_type(method_proto), + UPB_DEFTYPE_MSG); + + SET_OPTIONS(m->opts, MethodDescriptorProto, MethodOptions, method_proto); } } static void create_enumdef( symtab_addctx *ctx, const char *prefix, - const google_protobuf_EnumDescriptorProto *enum_proto) { - upb_enumdef *e; + const google_protobuf_EnumDescriptorProto *enum_proto, + const upb_msgdef *containing_type, + const upb_enumdef *_e) { + upb_enumdef *e = (upb_enumdef*)_e;; const google_protobuf_EnumValueDescriptorProto *const *values; upb_strview name; size_t i, n; + e->file = ctx->file; /* Must happen prior to symtab_add() */ + e->containing_type = containing_type; + name = google_protobuf_EnumDescriptorProto_name(enum_proto); check_ident(ctx, name, false); - e = (upb_enumdef*)&ctx->file->enums[ctx->enum_count++]; e->full_name = makefullname(ctx, prefix, name); symtab_add(ctx, e->full_name, pack_def(e, UPB_DEFTYPE_ENUM)); @@ -1882,7 +2377,6 @@ static void create_enumdef( CHK_OOM(upb_strtable_init(&e->ntoi, n, ctx->arena)); CHK_OOM(upb_inttable_init(&e->iton, ctx->arena)); - e->file = ctx->file; e->defaultval = 0; e->value_count = n; e->values = symtab_alloc(ctx, sizeof(*e->values) * n); @@ -1892,17 +2386,21 @@ static void create_enumdef( e->full_name); } + SET_OPTIONS(e->opts, EnumDescriptorProto, EnumOptions, enum_proto); + for (i = 0; i < n; i++) { const google_protobuf_EnumValueDescriptorProto *val_proto = values[i]; upb_enumvaldef *val = (upb_enumvaldef*)&e->values[i]; upb_strview name = google_protobuf_EnumValueDescriptorProto_name(val_proto); upb_value v = upb_value_constptr(val); - val->enum_ = e; + val->enum_ = e; /* Must happen prior to symtab_add(). */ val->full_name = makefullname(ctx, prefix, name); val->number = google_protobuf_EnumValueDescriptorProto_number(val_proto); symtab_add(ctx, val->full_name, pack_def(val, UPB_DEFTYPE_ENUMVAL)); + SET_OPTIONS(val->opts, EnumValueDescriptorProto, EnumValueOptions, val_proto); + if (i == 0 && e->file->syntax == UPB_SYNTAX_PROTO3 && val->number != 0) { symtab_errf(ctx, "for proto3, the first enum value must be zero (%s)", e->full_name); @@ -1920,9 +2418,10 @@ static void create_enumdef( } static void create_msgdef(symtab_addctx *ctx, const char *prefix, - const google_protobuf_DescriptorProto *msg_proto) { - upb_msgdef *m; - const google_protobuf_MessageOptions *options; + const google_protobuf_DescriptorProto *msg_proto, + const upb_msgdef *containing_type, + const upb_msgdef *_m) { + upb_msgdef *m = (upb_msgdef*)_m; const google_protobuf_OneofDescriptorProto *const *oneofs; const google_protobuf_FieldDescriptorProto *const *fields; const google_protobuf_EnumDescriptorProto *const *enums; @@ -1931,13 +2430,13 @@ static void create_msgdef(symtab_addctx *ctx, const char *prefix, size_t i, n_oneof, n_field, n_ext_range, n; upb_strview name; + m->file = ctx->file; /* Must happen prior to symtab_add(). */ + m->containing_type = containing_type; + name = google_protobuf_DescriptorProto_name(msg_proto); check_ident(ctx, name, false); - int msg_index = ctx->msg_count; - m = (upb_msgdef*)&ctx->file->msgs[msg_index]; m->full_name = makefullname(ctx, prefix, name); - ctx->msg_count++; symtab_add(ctx, m->full_name, pack_def(m, UPB_DEFTYPE_MSG)); oneofs = google_protobuf_DescriptorProto_oneof_decl(msg_proto, &n_oneof); @@ -1948,22 +2447,10 @@ static void create_msgdef(symtab_addctx *ctx, const char *prefix, CHK_OOM(upb_inttable_init(&m->itof, ctx->arena)); CHK_OOM(upb_strtable_init(&m->ntof, n_oneof + n_field, ctx->arena)); - m->file = ctx->file; - m->map_entry = false; - m->is_message_set = false; - m->message_set_ext = NULL; - - options = google_protobuf_DescriptorProto_options(msg_proto); - - if (options) { - m->map_entry = google_protobuf_MessageOptions_map_entry(options); - m->is_message_set = - google_protobuf_MessageOptions_message_set_wire_format(options); - } - if (ctx->layout) { /* create_fielddef() below depends on this being set. */ - m->layout = ctx->layout->msgs[msg_index]; + UPB_ASSERT(ctx->msg_count < ctx->layout->msg_count); + m->layout = ctx->layout->msgs[ctx->msg_count++]; UPB_ASSERT(n_field == m->layout->field_count); } else { /* Allocate now (to allow cross-linking), populate later. */ @@ -1971,16 +2458,18 @@ static void create_msgdef(symtab_addctx *ctx, const char *prefix, ctx, sizeof(*m->layout) + sizeof(_upb_fasttable_entry)); } - m->oneof_count = 0; + SET_OPTIONS(m->opts, DescriptorProto, MessageOptions, msg_proto); + + m->oneof_count = n_oneof; m->oneofs = symtab_alloc(ctx, sizeof(*m->oneofs) * n_oneof); for (i = 0; i < n_oneof; i++) { - create_oneofdef(ctx, m, oneofs[i]); + create_oneofdef(ctx, m, oneofs[i], &m->oneofs[i]); } - m->field_count = 0; + m->field_count = n_field; m->fields = symtab_alloc(ctx, sizeof(*m->fields) * n_field); for (i = 0; i < n_field; i++) { - create_fielddef(ctx, m->full_name, m, fields[i]); + create_fielddef(ctx, m->full_name, m, fields[i], &m->fields[i]); } m->ext_range_count = n_ext_range; @@ -1990,71 +2479,86 @@ static void create_msgdef(symtab_addctx *ctx, const char *prefix, upb_extrange *r_def = (upb_extrange*)&m->ext_ranges[i]; r_def->start = google_protobuf_DescriptorProto_ExtensionRange_start(r); r_def->end = google_protobuf_DescriptorProto_ExtensionRange_end(r); + SET_OPTIONS(r_def->opts, DescriptorProto_ExtensionRange, + ExtensionRangeOptions, r); } finalize_oneofs(ctx, m); assign_msg_wellknowntype(m); upb_inttable_compact(&m->itof, ctx->arena); - /* This message is built. Now build nested messages and enums. */ + /* This message is built. Now build nested entities. */ enums = google_protobuf_DescriptorProto_enum_type(msg_proto, &n); + m->nested_enum_count = n; + m->nested_enums = symtab_alloc(ctx, sizeof(*m->nested_enums) * n); for (i = 0; i < n; i++) { - create_enumdef(ctx, m->full_name, enums[i]); + m->nested_enum_count = i + 1; + create_enumdef(ctx, m->full_name, enums[i], m, &m->nested_enums[i]); } fields = google_protobuf_DescriptorProto_extension(msg_proto, &n); + m->nested_ext_count = n; + m->nested_exts = symtab_alloc(ctx, sizeof(*m->nested_exts) * n); for (i = 0; i < n; i++) { - create_fielddef(ctx, m->full_name, NULL, fields[i]); - } - - msgs = google_protobuf_DescriptorProto_nested_type(msg_proto, &n); - for (i = 0; i < n; i++) { - create_msgdef(ctx, m->full_name, msgs[i]); + create_fielddef(ctx, m->full_name, NULL, fields[i], &m->nested_exts[i]); + ((upb_fielddef*)&m->nested_exts[i])->index_ = i; } -} - -static void count_types_in_msg(const google_protobuf_DescriptorProto *msg_proto, - upb_filedef *file) { - const google_protobuf_DescriptorProto *const *msgs; - size_t i, n; - - file->msg_count++; msgs = google_protobuf_DescriptorProto_nested_type(msg_proto, &n); + m->nested_msg_count = n; + m->nested_msgs = symtab_alloc(ctx, sizeof(*m->nested_msgs) * n); for (i = 0; i < n; i++) { - count_types_in_msg(msgs[i], file); - } - - google_protobuf_DescriptorProto_enum_type(msg_proto, &n); - file->enum_count += n; - - google_protobuf_DescriptorProto_extension(msg_proto, &n); - file->ext_count += n; -} - -static void count_types_in_file( - const google_protobuf_FileDescriptorProto *file_proto, - upb_filedef *file) { - const google_protobuf_DescriptorProto *const *msgs; - size_t i, n; - - msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n); - for (i = 0; i < n; i++) { - count_types_in_msg(msgs[i], file); + create_msgdef(ctx, m->full_name, msgs[i], m, &m->nested_msgs[i]); } - - google_protobuf_FileDescriptorProto_enum_type(file_proto, &n); - file->enum_count += n; - - google_protobuf_FileDescriptorProto_extension(file_proto, &n); - file->ext_count += n; } static void resolve_fielddef(symtab_addctx *ctx, const char *prefix, upb_fielddef *f) { - upb_strview name; const google_protobuf_FieldDescriptorProto *field_proto = f->sub.unresolved; + upb_strview name = + google_protobuf_FieldDescriptorProto_type_name(field_proto); + bool has_name = + google_protobuf_FieldDescriptorProto_has_type_name(field_proto); + + // Resolve subdef by type name, if necessary. + switch ((int)f->type_) { + case 0: { + // Type was not specified and must be inferred. + UPB_ASSERT(has_name); + upb_deftype_t type; + const void *def = + symtab_resolveany(ctx, f->full_name, prefix, name, &type); + switch (type) { + case UPB_DEFTYPE_ENUM: + f->sub.enumdef = def; + f->type_ = UPB_DTYPE_ENUM; + break; + case UPB_DEFTYPE_MSG: + f->sub.msgdef = def; + f->type_ = UPB_DTYPE_MESSAGE; // It appears there is no way of this + // being a group. + break; + default: + symtab_errf(ctx, "Couldn't resolve type name for field %s", + f->full_name); + } + } + case UPB_DTYPE_MESSAGE: + case UPB_DTYPE_GROUP: + UPB_ASSERT(has_name); + f->sub.msgdef = + symtab_resolve(ctx, f->full_name, prefix, name, UPB_DEFTYPE_MSG); + break; + case UPB_DTYPE_ENUM: + UPB_ASSERT(has_name); + f->sub.enumdef = + symtab_resolve(ctx, f->full_name, prefix, name, UPB_DEFTYPE_ENUM); + break; + default: + // No resolution necessary. + break; + } if (f->is_extension_) { if (!google_protobuf_FieldDescriptorProto_has_extendee(field_proto)) { @@ -2063,7 +2567,8 @@ static void resolve_fielddef(symtab_addctx *ctx, const char *prefix, } name = google_protobuf_FieldDescriptorProto_extendee(field_proto); - f->msgdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_MSG); + f->msgdef = + symtab_resolve(ctx, f->full_name, prefix, name, UPB_DEFTYPE_MSG); const upb_msglayout_ext *ext = ctx->file->ext_layouts[f->layout_index]; if (ctx->layout) { @@ -2082,26 +2587,6 @@ static void resolve_fielddef(symtab_addctx *ctx, const char *prefix, upb_value_constptr(f), ctx->arena)); } - if ((upb_fielddef_issubmsg(f) || f->type_ == UPB_DESCRIPTOR_TYPE_ENUM) && - !google_protobuf_FieldDescriptorProto_has_type_name(field_proto)) { - symtab_errf(ctx, "field '%s' is missing type name", f->full_name); - } - - name = google_protobuf_FieldDescriptorProto_type_name(field_proto); - - if (upb_fielddef_issubmsg(f)) { - f->sub.msgdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_MSG); - if (f->is_extension_ && f->msgdef->is_message_set && - f->file == f->msgdef->file) { - // TODO: When defs are restructured to follow message nesting, we can make - // this check more robust. The actual rules for what make something - // qualify as a MessageSet item are more strict. - ((upb_msgdef*)f->sub.msgdef)->message_set_ext = f; - } - } else if (f->type_ == UPB_DESCRIPTOR_TYPE_ENUM) { - f->sub.enumdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_ENUM); - } - /* Have to delay resolving of the default value until now because of the enum * case, since enum defaults are specified with a label. */ if (google_protobuf_FieldDescriptorProto_has_default_value(field_proto)) { @@ -2119,35 +2604,75 @@ static void resolve_fielddef(symtab_addctx *ctx, const char *prefix, } parse_default(ctx, defaultval.data, defaultval.size, f); + f->has_default = true; } else { set_default_default(ctx, f); + f->has_default = false; + } +} + +static void resolve_msgdef(symtab_addctx *ctx, upb_msgdef *m) { + for (int i = 0; i < m->field_count; i++) { + resolve_fielddef(ctx, m->full_name, (upb_fielddef *)&m->fields[i]); + } + + for (int i = 0; i < m->nested_ext_count; i++) { + resolve_fielddef(ctx, m->full_name, (upb_fielddef *)&m->nested_exts[i]); + } + + if (!ctx->layout) make_layout(ctx, m); + + m->in_message_set = false; + if (m->nested_ext_count == 1) { + const upb_fielddef *ext = &m->nested_exts[0]; + if (ext->type_ == UPB_DTYPE_MESSAGE && ext->label_ == UPB_LABEL_OPTIONAL && + ext->sub.msgdef == m && + google_protobuf_MessageOptions_message_set_wire_format( + ext->msgdef->opts)) { + m->in_message_set = true; + } + } + + for (int i = 0; i < m->nested_msg_count; i++) { + resolve_msgdef(ctx, (upb_msgdef*)&m->nested_msgs[i]); } } +static int count_exts_in_msg(const google_protobuf_DescriptorProto *msg_proto) { + size_t n; + google_protobuf_DescriptorProto_extension(msg_proto, &n); + int ext_count = n; + + const google_protobuf_DescriptorProto *const *nested_msgs = + google_protobuf_DescriptorProto_nested_type(msg_proto, &n); + for (size_t i = 0; i < n; i++) { + ext_count += count_exts_in_msg(nested_msgs[i]); + } + + return ext_count; +} + static void build_filedef( symtab_addctx *ctx, upb_filedef *file, const google_protobuf_FileDescriptorProto *file_proto) { - const google_protobuf_FileOptions *file_options_proto; const google_protobuf_DescriptorProto *const *msgs; const google_protobuf_EnumDescriptorProto *const *enums; const google_protobuf_FieldDescriptorProto *const *exts; - const upb_strview* strs; + const google_protobuf_ServiceDescriptorProto *const *services; + const upb_strview *strs; + const int32_t *public_deps; size_t i, n; file->symtab = ctx->symtab; - /* One pass to count and allocate. */ - file->msg_count = 0; - file->enum_count = 0; - file->ext_count = 0; - count_types_in_file(file_proto, file); - file->msgs = symtab_alloc(ctx, sizeof(*file->msgs) * file->msg_count); - file->enums = symtab_alloc(ctx, sizeof(*file->enums) * file->enum_count); - file->exts = symtab_alloc(ctx, sizeof(*file->exts) * file->ext_count); - - ctx->msg_count = 0; - ctx->enum_count = 0; - ctx->ext_count = 0; + /* Count all extensions in the file, to build a flat array of layouts. */ + google_protobuf_FileDescriptorProto_extension(file_proto, &n); + int ext_count = n; + msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n); + for (int i = 0; i < n; i++) { + ext_count += count_exts_in_msg(msgs[i]); + } + file->ext_count = ext_count; if (ctx->layout) { /* We are using the ext layouts that were passed in. */ @@ -2172,8 +2697,6 @@ static void build_filedef( file->name = strviewdup(ctx, google_protobuf_FileDescriptorProto_name(file_proto)); - file->phpprefix = NULL; - file->phpnamespace = NULL; if (google_protobuf_FileDescriptorProto_has_package(file_proto)) { upb_strview package = @@ -2185,8 +2708,7 @@ static void build_filedef( } if (google_protobuf_FileDescriptorProto_has_syntax(file_proto)) { - upb_strview syntax = - google_protobuf_FileDescriptorProto_syntax(file_proto); + upb_strview syntax = google_protobuf_FileDescriptorProto_syntax(file_proto); if (streql_view(syntax, "proto2")) { file->syntax = UPB_SYNTAX_PROTO2; @@ -2201,93 +2723,111 @@ static void build_filedef( } /* Read options. */ - file_options_proto = google_protobuf_FileDescriptorProto_options(file_proto); - if (file_options_proto) { - if (google_protobuf_FileOptions_has_php_class_prefix(file_options_proto)) { - file->phpprefix = strviewdup( - ctx, - google_protobuf_FileOptions_php_class_prefix(file_options_proto)); - } - if (google_protobuf_FileOptions_has_php_namespace(file_options_proto)) { - file->phpnamespace = strviewdup( - ctx, google_protobuf_FileOptions_php_namespace(file_options_proto)); - } - } + SET_OPTIONS(file->opts, FileDescriptorProto, FileOptions, file_proto); /* Verify dependencies. */ strs = google_protobuf_FileDescriptorProto_dependency(file_proto, &n); + file->dep_count = n; file->deps = symtab_alloc(ctx, sizeof(*file->deps) * n); for (i = 0; i < n; i++) { - upb_strview dep_name = strs[i]; - upb_value v; - if (!upb_strtable_lookup2(&ctx->symtab->files, dep_name.data, - dep_name.size, &v)) { + upb_strview str = strs[i]; + file->deps[i] = upb_symtab_lookupfile2(ctx->symtab, str.data, str.size); + if (!file->deps[i]) { symtab_errf(ctx, "Depends on file '" UPB_STRVIEW_FORMAT "', but it has not been loaded", - UPB_STRVIEW_ARGS(dep_name)); + UPB_STRVIEW_ARGS(str)); } - file->deps[i] = upb_value_getconstptr(v); } - /* Create messages. */ - msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n); + public_deps = + google_protobuf_FileDescriptorProto_public_dependency(file_proto, &n); + file->public_dep_count = n; + file->public_deps = symtab_alloc(ctx, sizeof(*file->public_deps) * n); + int32_t *mutable_public_deps = (int32_t*)file->public_deps; for (i = 0; i < n; i++) { - create_msgdef(ctx, file->package, msgs[i]); + if (public_deps[i] >= file->dep_count) { + symtab_errf(ctx, "public_dep %d is out of range", (int)public_deps[i]); + } + mutable_public_deps[i] = public_deps[i]; } /* Create enums. */ enums = google_protobuf_FileDescriptorProto_enum_type(file_proto, &n); + file->top_lvl_enum_count = n; + file->top_lvl_enums = symtab_alloc(ctx, sizeof(*file->top_lvl_enums) * n); for (i = 0; i < n; i++) { - create_enumdef(ctx, file->package, enums[i]); + create_enumdef(ctx, file->package, enums[i], NULL, &file->top_lvl_enums[i]); } /* Create extensions. */ exts = google_protobuf_FileDescriptorProto_extension(file_proto, &n); + file->top_lvl_ext_count = n; + file->top_lvl_exts = symtab_alloc(ctx, sizeof(*file->top_lvl_exts) * n); for (i = 0; i < n; i++) { - create_fielddef(ctx, file->package, NULL, exts[i]); + create_fielddef(ctx, file->package, NULL, exts[i], &file->top_lvl_exts[i]); + ((upb_fielddef*)&file->top_lvl_exts[i])->index_ = i; } - UPB_ASSERT(ctx->ext_count == file->ext_count); + /* Create messages. */ + msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n); + file->top_lvl_msg_count = n; + file->top_lvl_msgs = symtab_alloc(ctx, sizeof(*file->top_lvl_msgs) * n); + for (i = 0; i < n; i++) { + create_msgdef(ctx, file->package, msgs[i], NULL, &file->top_lvl_msgs[i]); + } - /* Now that all names are in the table, build layouts and resolve refs. */ - for (i = 0; i < (size_t)file->ext_count; i++) { - resolve_fielddef(ctx, file->package, (upb_fielddef*)&file->exts[i]); + /* Create services. */ + services = google_protobuf_FileDescriptorProto_service(file_proto, &n); + file->service_count = n; + file->services = symtab_alloc(ctx, sizeof(*file->services) * n); + for (i = 0; i < n; i++) { + create_service(ctx, services[i], &file->services[i]); + ((upb_servicedef*)&file->services[i])->index = i; } - for (i = 0; i < (size_t)file->msg_count; i++) { - const upb_msgdef *m = &file->msgs[i]; - int j; - for (j = 0; j < m->field_count; j++) { - resolve_fielddef(ctx, m->full_name, (upb_fielddef*)&m->fields[j]); - } + /* Now that all names are in the table, build layouts and resolve refs. */ + for (i = 0; i < (size_t)file->top_lvl_ext_count; i++) { + resolve_fielddef(ctx, file->package, (upb_fielddef*)&file->top_lvl_exts[i]); } - if (!ctx->layout) { - for (i = 0; i < (size_t)file->msg_count; i++) { - const upb_msgdef *m = &file->msgs[i]; - make_layout(ctx, m); - } + for (i = 0; i < (size_t)file->top_lvl_msg_count; i++) { + resolve_msgdef(ctx, (upb_msgdef*)&file->top_lvl_msgs[i]); } CHK_OOM( _upb_extreg_add(ctx->symtab->extreg, file->ext_layouts, file->ext_count)); } -static void remove_filedef(symtab_addctx *ctx, upb_symtab *s, upb_filedef *file) { - int i; - for (i = 0; i < ctx->msg_count; i++) { - const char *name = file->msgs[i].full_name; - upb_strtable_remove(&s->syms, name, strlen(name), NULL); - } - for (i = 0; i < ctx->enum_count; i++) { - const char *name = file->enums[i].full_name; - upb_strtable_remove(&s->syms, name, strlen(name), NULL); - } - for (i = 0; i < ctx->ext_count; i++) { - const char *name = file->exts[i].full_name; - upb_strtable_remove(&s->syms, name, strlen(name), NULL); +static void remove_filedef(upb_symtab *s, upb_filedef *file) { + intptr_t iter = UPB_INTTABLE_BEGIN; + upb_strview key; + upb_value val; + while (upb_strtable_next2(&s->syms, &key, &val, &iter)) { + const upb_filedef *f; + switch (deftype(val)) { + case UPB_DEFTYPE_EXT: + f = upb_fielddef_file(unpack_def(val, UPB_DEFTYPE_EXT)); + break; + case UPB_DEFTYPE_MSG: + f = upb_msgdef_file(unpack_def(val, UPB_DEFTYPE_MSG)); + break; + case UPB_DEFTYPE_ENUM: + f = upb_enumdef_file(unpack_def(val, UPB_DEFTYPE_ENUM)); + break; + case UPB_DEFTYPE_ENUMVAL: + f = upb_enumdef_file( + upb_enumvaldef_enum(unpack_def(val, UPB_DEFTYPE_ENUMVAL))); + break; + case UPB_DEFTYPE_SERVICE: + f = upb_servicedef_file(unpack_def(val, UPB_DEFTYPE_SERVICE)); + break; + default: + UPB_UNREACHABLE(); + } + + if (f == file) upb_strtable_removeiter(&s->syms, &iter); } } @@ -2296,20 +2836,38 @@ static const upb_filedef *_upb_symtab_addfile( const upb_msglayout_file *layout, upb_status *status) { symtab_addctx ctx; upb_strview name = google_protobuf_FileDescriptorProto_name(file_proto); + upb_value v; - if (upb_strtable_lookup2(&s->files, name.data, name.size, NULL)) { - upb_status_seterrf(status, "duplicate file name (%.*s)", - UPB_STRVIEW_ARGS(name)); - return NULL; + if (upb_strtable_lookup2(&s->files, name.data, name.size, &v)) { + if (unpack_def(v, UPB_DEFTYPE_FILE)) { + upb_status_seterrf(status, "duplicate file name (%.*s)", + UPB_STRVIEW_ARGS(name)); + return NULL; + } + const upb_msglayout_file *registered = unpack_def(v, UPB_DEFTYPE_LAYOUT); + UPB_ASSERT(registered); + if (layout && layout != registered) { + upb_status_seterrf( + status, "tried to build with a different layout (filename=%.*s)", + UPB_STRVIEW_ARGS(name)); + return NULL; + } + layout = registered; } ctx.symtab = s; ctx.layout = layout; + ctx.msg_count = 0; + ctx.enum_count = 0; + ctx.ext_count = 0; ctx.status = status; ctx.file = NULL; ctx.arena = upb_arena_new(); + ctx.tmp_arena = upb_arena_new(); - if (!ctx.arena) { + if (!ctx.arena && !ctx.tmp_arena) { + if (ctx.arena) upb_arena_free(ctx.arena); + if (ctx.tmp_arena) upb_arena_free(ctx.tmp_arena); upb_status_setoom(status); return NULL; } @@ -2317,27 +2875,28 @@ static const upb_filedef *_upb_symtab_addfile( if (UPB_UNLIKELY(UPB_SETJMP(ctx.err))) { UPB_ASSERT(!upb_ok(status)); if (ctx.file) { - remove_filedef(&ctx, s, ctx.file); + remove_filedef(s, ctx.file); ctx.file = NULL; } } else { ctx.file = symtab_alloc(&ctx, sizeof(*ctx.file)); build_filedef(&ctx, ctx.file, file_proto); upb_strtable_insert(&s->files, name.data, name.size, - upb_value_constptr(ctx.file), ctx.arena); + pack_def(ctx.file, UPB_DEFTYPE_FILE), ctx.arena); UPB_ASSERT(upb_ok(status)); upb_arena_fuse(s->arena, ctx.arena); } upb_arena_free(ctx.arena); + upb_arena_free(ctx.tmp_arena); return ctx.file; } -const upb_filedef *upb_symtab_addfile( - upb_symtab *s, const google_protobuf_FileDescriptorProto *file_proto, - upb_status *status) { - return _upb_symtab_addfile(s, file_proto, NULL, status); -} + const upb_filedef *upb_symtab_addfile( + upb_symtab * s, const google_protobuf_FileDescriptorProto *file_proto, + upb_status *status) { + return _upb_symtab_addfile(s, file_proto, NULL, status); + } /* Include here since we want most of this file to be stdio-free. */ #include <stdio.h> @@ -2352,7 +2911,7 @@ bool _upb_symtab_loaddefinit(upb_symtab *s, const upb_def_init *init) { upb_status_clear(&status); - if (upb_strtable_lookup(&s->files, init->filename, NULL)) { + if (upb_symtab_lookupfile(s, init->filename)) { return true; } @@ -2376,7 +2935,9 @@ bool _upb_symtab_loaddefinit(upb_symtab *s, const upb_def_init *init) { goto err; } - if (!_upb_symtab_addfile(s, file, init->layout, &status)) goto err; + if (!_upb_symtab_addfile(s, file, init->layout, &status)) { + goto err; + } upb_arena_free(arena); return true; @@ -2406,8 +2967,48 @@ const upb_fielddef *_upb_symtab_lookupextfield(const upb_symtab *s, return upb_value_getconstptr(v); } +const upb_fielddef *upb_symtab_lookupextbynum(const upb_symtab *s, + const upb_msgdef *m, + int32_t fieldnum) { + const upb_msglayout *l = upb_msgdef_layout(m); + const upb_msglayout_ext *ext = _upb_extreg_get(s->extreg, l, fieldnum); + return ext ? _upb_symtab_lookupextfield(s, ext) : NULL; +} + +bool _upb_symtab_registerlayout(upb_symtab *s, const char *filename, + const upb_msglayout_file *file) { + if (upb_symtab_lookupfile(s, filename)) return false; + upb_value v = pack_def(file, UPB_DEFTYPE_LAYOUT); + return upb_strtable_insert(&s->files, filename, strlen(filename), v, + s->arena); +} + const upb_extreg *upb_symtab_extreg(const upb_symtab *s) { return s->extreg; } +const upb_fielddef **upb_symtab_getallexts(const upb_symtab *s, + const upb_msgdef *m, size_t *count) { + size_t n = 0; + intptr_t iter = UPB_INTTABLE_BEGIN; + uintptr_t key; + upb_value val; + // This is O(all exts) instead of O(exts for m). If we need this to be + // efficient we may need to make extreg into a two-level table, or have a + // second per-message index. + while (upb_inttable_next2(&s->exts, &key, &val, &iter)) { + const upb_fielddef *f = upb_value_getconstptr(val); + if (upb_fielddef_containingtype(f) == m) n++; + } + const upb_fielddef **exts = malloc(n * sizeof(*exts)); + iter = UPB_INTTABLE_BEGIN; + size_t i = 0; + while (upb_inttable_next2(&s->exts, &key, &val, &iter)) { + const upb_fielddef *f = upb_value_getconstptr(val); + if (upb_fielddef_containingtype(f) == m) exts[i++] = f; + } + *count = n; + return exts; +} + #undef CHK_OOM diff --git a/upb/def.h b/upb/def.h index b548e2b360..88cc5613be 100644 --- a/upb/def.h +++ b/upb/def.h @@ -62,10 +62,16 @@ struct upb_fielddef; typedef struct upb_fielddef upb_fielddef; struct upb_filedef; typedef struct upb_filedef upb_filedef; +struct upb_methoddef; +typedef struct upb_methoddef upb_methoddef; struct upb_msgdef; typedef struct upb_msgdef upb_msgdef; struct upb_oneofdef; typedef struct upb_oneofdef upb_oneofdef; +struct upb_servicedef; +typedef struct upb_servicedef upb_servicedef; +struct upb_streamdef; +typedef struct upb_streamdef upb_streamdef; struct upb_symtab; typedef struct upb_symtab upb_symtab; @@ -106,6 +112,8 @@ typedef enum { * protobuf wire format. */ #define UPB_MAX_FIELDNUMBER ((1 << 29) - 1) +const google_protobuf_FieldOptions *upb_fielddef_options(const upb_fielddef *f); +bool upb_fielddef_hasoptions(const upb_fielddef *f); const char *upb_fielddef_fullname(const upb_fielddef *f); upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f); upb_descriptortype_t upb_fielddef_descriptortype(const upb_fielddef *f); @@ -126,6 +134,7 @@ bool upb_fielddef_isstring(const upb_fielddef *f); bool upb_fielddef_isseq(const upb_fielddef *f); bool upb_fielddef_isprimitive(const upb_fielddef *f); bool upb_fielddef_ismap(const upb_fielddef *f); +bool upb_fielddef_hasdefault(const upb_fielddef *f); int64_t upb_fielddef_defaultint64(const upb_fielddef *f); int32_t upb_fielddef_defaultint32(const upb_fielddef *f); uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f); @@ -140,11 +149,14 @@ const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f); const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f); const upb_msglayout_field *upb_fielddef_layout(const upb_fielddef *f); const upb_msglayout_ext *_upb_fielddef_extlayout(const upb_fielddef *f); +bool _upb_fielddef_proto3optional(const upb_fielddef *f); /* upb_oneofdef ***************************************************************/ typedef upb_inttable_iter upb_oneof_iter; +const google_protobuf_OneofOptions *upb_oneofdef_options(const upb_oneofdef *o); +bool upb_oneofdef_hasoptions(const upb_oneofdef *o); const char *upb_oneofdef_name(const upb_oneofdef *o); const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o); uint32_t upb_oneofdef_index(const upb_oneofdef *o); @@ -196,11 +208,13 @@ typedef upb_strtable_iter upb_msg_oneof_iter; #define UPB_TIMESTAMP_SECONDS 1 #define UPB_TIMESTAMP_NANOS 2 +const google_protobuf_MessageOptions *upb_msgdef_options(const upb_msgdef *m); +bool upb_msgdef_hasoptions(const upb_msgdef *m); const char *upb_msgdef_fullname(const upb_msgdef *m); const upb_filedef *upb_msgdef_file(const upb_msgdef *m); +const upb_msgdef *upb_msgdef_containingtype(const upb_msgdef *m); const char *upb_msgdef_name(const upb_msgdef *m); upb_syntax_t upb_msgdef_syntax(const upb_msgdef *m); -bool upb_msgdef_mapentry(const upb_msgdef *m); upb_wellknowntype_t upb_msgdef_wellknowntype(const upb_msgdef *m); bool upb_msgdef_iswrapper(const upb_msgdef *m); bool upb_msgdef_isnumberwrapper(const upb_msgdef *m); @@ -227,6 +241,18 @@ UPB_INLINE const upb_fielddef *upb_msgdef_ntofz(const upb_msgdef *m, return upb_msgdef_ntof(m, name, strlen(name)); } +UPB_INLINE bool upb_msgdef_mapentry(const upb_msgdef *m) { + return google_protobuf_MessageOptions_map_entry(upb_msgdef_options(m)); +} + +/* Nested entities. */ +int upb_msgdef_nestedmsgcount(const upb_msgdef *m); +int upb_msgdef_nestedenumcount(const upb_msgdef *m); +int upb_msgdef_nestedextcount(const upb_msgdef *m); +const upb_msgdef *upb_msgdef_nestedmsg(const upb_msgdef *m, int i); +const upb_enumdef *upb_msgdef_nestedenum(const upb_msgdef *m, int i); +const upb_fielddef *upb_msgdef_nestedext(const upb_msgdef *m, int i); + /* Lookup of either field or oneof by name. Returns whether either was found. * If the return is true, then the found def will be set, and the non-found * one set to NULL. */ @@ -242,6 +268,10 @@ UPB_INLINE bool upb_msgdef_lookupnamez(const upb_msgdef *m, const char *name, /* Returns a field by either JSON name or regular proto name. */ const upb_fielddef *upb_msgdef_lookupjsonname(const upb_msgdef *m, const char *name, size_t len); +UPB_INLINE const upb_fielddef *upb_msgdef_lookupjsonnamez(const upb_msgdef *m, + const char *name) { + return upb_msgdef_lookupjsonname(m, name, strlen(name)); +} /* DEPRECATED, slated for removal */ int upb_msgdef_numfields(const upb_msgdef *m); @@ -275,9 +305,12 @@ int32_t upb_extrange_end(const upb_extrange *r); typedef upb_strtable_iter upb_enum_iter; +const google_protobuf_EnumOptions *upb_enumdef_options(const upb_enumdef *e); +bool upb_enumdef_hasoptions(const upb_enumdef *e); const char *upb_enumdef_fullname(const upb_enumdef *e); const char *upb_enumdef_name(const upb_enumdef *e); const upb_filedef *upb_enumdef_file(const upb_enumdef *e); +const upb_msgdef *upb_enumdef_containingtype(const upb_enumdef *e); int32_t upb_enumdef_default(const upb_enumdef *e); int upb_enumdef_valuecount(const upb_enumdef *e); const upb_enumvaldef *upb_enumdef_value(const upb_enumdef *e, int i); @@ -303,6 +336,9 @@ UPB_INLINE const upb_enumvaldef *upb_enumdef_lookupnamez(const upb_enumdef *e, /* upb_enumvaldef *************************************************************/ +const google_protobuf_EnumValueOptions *upb_enumvaldef_options( + const upb_enumvaldef *e); +bool upb_enumvaldef_hasoptions(const upb_enumvaldef *e); const char *upb_enumvaldef_fullname(const upb_enumvaldef *e); const char *upb_enumvaldef_name(const upb_enumvaldef *e); int32_t upb_enumvaldef_number(const upb_enumvaldef *e); @@ -310,18 +346,54 @@ const upb_enumdef *upb_enumvaldef_enum(const upb_enumvaldef *e); /* upb_filedef ****************************************************************/ +const google_protobuf_FileOptions *upb_filedef_options(const upb_filedef *f); +bool upb_filedef_hasoptions(const upb_filedef *f); const char *upb_filedef_name(const upb_filedef *f); const char *upb_filedef_package(const upb_filedef *f); const char *upb_filedef_phpprefix(const upb_filedef *f); const char *upb_filedef_phpnamespace(const upb_filedef *f); upb_syntax_t upb_filedef_syntax(const upb_filedef *f); int upb_filedef_depcount(const upb_filedef *f); -int upb_filedef_msgcount(const upb_filedef *f); -int upb_filedef_enumcount(const upb_filedef *f); +int upb_filedef_publicdepcount(const upb_filedef *f); +int upb_filedef_toplvlmsgcount(const upb_filedef *f); +int upb_filedef_toplvlenumcount(const upb_filedef *f); +int upb_filedef_toplvlextcount(const upb_filedef *f); +int upb_filedef_servicecount(const upb_filedef *f); const upb_filedef *upb_filedef_dep(const upb_filedef *f, int i); -const upb_msgdef *upb_filedef_msg(const upb_filedef *f, int i); -const upb_enumdef *upb_filedef_enum(const upb_filedef *f, int i); +const upb_filedef *upb_filedef_publicdep(const upb_filedef *f, int i); +const upb_msgdef *upb_filedef_toplvlmsg(const upb_filedef *f, int i); +const upb_enumdef *upb_filedef_toplvlenum(const upb_filedef *f, int i); +const upb_fielddef *upb_filedef_toplvlext(const upb_filedef *f, int i); +const upb_servicedef *upb_filedef_service(const upb_filedef *f, int i); const upb_symtab *upb_filedef_symtab(const upb_filedef *f); +const int32_t *_upb_filedef_publicdepnums(const upb_filedef *f); + +/* upb_methoddef **************************************************************/ + +const google_protobuf_MethodOptions *upb_methoddef_options( + const upb_methoddef *m); +bool upb_methoddef_hasoptions(const upb_methoddef *m); +const char *upb_methoddef_fullname(const upb_methoddef *m); +const char *upb_methoddef_name(const upb_methoddef *m); +const upb_servicedef *upb_methoddef_service(const upb_methoddef *m); +const upb_msgdef *upb_methoddef_inputtype(const upb_methoddef *m); +const upb_msgdef *upb_methoddef_outputtype(const upb_methoddef *m); +bool upb_methoddef_clientstreaming(const upb_methoddef *m); +bool upb_methoddef_serverstreaming(const upb_methoddef *m); + +/* upb_servicedef *************************************************************/ + +const google_protobuf_ServiceOptions *upb_servicedef_options( + const upb_servicedef *s); +bool upb_servicedef_hasoptions(const upb_servicedef *s); +const char *upb_servicedef_fullname(const upb_servicedef *s); +const char *upb_servicedef_name(const upb_servicedef *s); +int upb_servicedef_index(const upb_servicedef *s); +const upb_filedef *upb_servicedef_file(const upb_servicedef *s); +int upb_servicedef_methodcount(const upb_servicedef *s); +const upb_methoddef *upb_servicedef_method(const upb_servicedef *s, int i); +const upb_methoddef *upb_servicedef_lookupmethod(const upb_servicedef *s, + const char *name); /* upb_symtab *****************************************************************/ @@ -337,9 +409,12 @@ const upb_fielddef *upb_symtab_lookupext(const upb_symtab *s, const char *sym); const upb_fielddef *upb_symtab_lookupext2(const upb_symtab *s, const char *sym, size_t len); const upb_filedef *upb_symtab_lookupfile(const upb_symtab *s, const char *name); +const upb_servicedef *upb_symtab_lookupservice(const upb_symtab *s, + const char *name); +const upb_filedef *upb_symtab_lookupfileforsym(const upb_symtab *s, + const char *name); const upb_filedef *upb_symtab_lookupfile2( const upb_symtab *s, const char *name, size_t len); -int upb_symtab_filecount(const upb_symtab *s); const upb_filedef *upb_symtab_addfile( upb_symtab *s, const google_protobuf_FileDescriptorProto *file, upb_status *status); @@ -347,7 +422,12 @@ size_t _upb_symtab_bytesloaded(const upb_symtab *s); upb_arena *_upb_symtab_arena(const upb_symtab *s); const upb_fielddef *_upb_symtab_lookupextfield(const upb_symtab *s, const upb_msglayout_ext *ext); +const upb_fielddef *upb_symtab_lookupextbynum(const upb_symtab *s, + const upb_msgdef *m, + int32_t fieldnum); const upb_extreg *upb_symtab_extreg(const upb_symtab *s); +const upb_fielddef **upb_symtab_getallexts(const upb_symtab *s, + const upb_msgdef *m, size_t *count); /* For generated code only: loads a generated descriptor. */ typedef struct upb_def_init { @@ -358,6 +438,7 @@ typedef struct upb_def_init { } upb_def_init; bool _upb_symtab_loaddefinit(upb_symtab *s, const upb_def_init *init); +void _upb_symtab_allownameconflicts(upb_symtab *s); #include "upb/port_undef.inc" diff --git a/upb/table.c b/upb/table.c index 21e42ac816..dabcff0482 100644 --- a/upb/table.c +++ b/upb/table.c @@ -805,6 +805,100 @@ void upb_inttable_next(upb_inttable_iter *iter) { } } +bool upb_inttable_next2(const upb_inttable *t, uintptr_t *key, upb_value *val, + intptr_t *iter) { + intptr_t i = *iter; + if (i < t->array_size) { + while (++i < t->array_size) { + upb_tabval ent = t->array[i]; + if (upb_arrhas(ent)) { + *key = i; + *val = _upb_value_val(ent.val); + *iter = i; + return true; + } + } + } + + size_t tab_idx = next(&t->t, i == -1 ? -1 : i - t->array_size); + if (tab_idx < upb_table_size(&t->t)) { + upb_tabent *ent = &t->t.entries[tab_idx]; + *key = ent->key; + *val = _upb_value_val(ent->val.val); + *iter = tab_idx + t->array_size; + return true; + } + + return false; +} + +void upb_inttable_removeiter(upb_inttable *t, intptr_t *iter) { + intptr_t i = *iter; + if (i < t->array_size) { + t->array_count--; + mutable_array(t)[i].val = -1; + } else { + upb_tabent *ent = &t->t.entries[i - t->array_size]; + upb_tabent *prev = NULL; + + // Linear search, not great. + upb_tabent *end = &t->t.entries[upb_table_size(&t->t)]; + for (upb_tabent *e = t->t.entries; e != end; e++) { + if (e->next == ent) { + prev = e; + break; + } + } + + if (prev) { + prev->next = ent->next; + } + + t->t.count--; + ent->key = 0; + ent->next = NULL; + } +} + +bool upb_strtable_next2(const upb_strtable *t, upb_strview *key, upb_value *val, + intptr_t *iter) { + size_t tab_idx = next(&t->t, *iter); + if (tab_idx < upb_table_size(&t->t)) { + upb_tabent *ent = &t->t.entries[tab_idx]; + uint32_t len; + key->data = upb_tabstr(ent->key, &len); + key->size = len; + *val = _upb_value_val(ent->val.val); + *iter = tab_idx; + return true; + } + + return false; +} + +void upb_strtable_removeiter(upb_strtable *t, intptr_t *iter) { + intptr_t i = *iter; + upb_tabent *ent = &t->t.entries[i]; + upb_tabent *prev = NULL; + + // Linear search, not great. + upb_tabent *end = &t->t.entries[upb_table_size(&t->t)]; + for (upb_tabent *e = t->t.entries; e != end; e++) { + if (e->next == ent) { + prev = e; + break; + } + } + + if (prev) { + prev->next = ent->next; + } + + t->t.count--; + ent->key = 0; + ent->next = NULL; +} + bool upb_inttable_done(const upb_inttable_iter *i) { if (!i->t) return true; if (i->array_part) { diff --git a/upb/table_internal.h b/upb/table_internal.h index 1313686848..3e65db7718 100644 --- a/upb/table_internal.h +++ b/upb/table_internal.h @@ -263,6 +263,38 @@ bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_arena *a); /* Iterators ******************************************************************/ +/* New-style iterators. Much simpler, iterator state is held in size_t. + * + * intptr_t iter = UPB_INTTABLE_BEGIN; + * uintptr_t key; + * upb_value val; + * while (upb_inttable_next2(t, &key, &val, &iter)) { + * // ... + * } + */ + +#define UPB_INTTABLE_BEGIN -1 + +bool upb_inttable_next2(const upb_inttable *t, uintptr_t *key, upb_value *val, + intptr_t *iter); +void upb_inttable_removeiter(upb_inttable *t, intptr_t *iter); + +/* New-style iterators. Much simpler, iterator state is held in size_t. + * + * intptr_t iter = UPB_INTTABLE_BEGIN; + * upb_strview key; + * upb_value val; + * while (upb_strtable_next2(t, &key, &val, &iter)) { + * // ... + * } + */ + +#define UPB_STRTABLE_BEGIN -1 + +bool upb_strtable_next2(const upb_strtable *t, upb_strview *key, upb_value *val, + intptr_t *iter); +void upb_strtable_removeiter(upb_strtable *t, intptr_t *iter); + /* Iterators for int and string tables. We are subject to some kind of unusual * design constraints: * diff --git a/upbc/protoc-gen-upb.cc b/upbc/protoc-gen-upb.cc index ba58c25ebb..dffec31f84 100644 --- a/upbc/protoc-gen-upb.cc +++ b/upbc/protoc-gen-upb.cc @@ -138,12 +138,12 @@ void AddExtensionsFromMessage( std::vector<const protobuf::FieldDescriptor*> SortedExtensions( const protobuf::FileDescriptor* file) { std::vector<const protobuf::FieldDescriptor*> ret; - for (int i = 0; i < file->message_type_count(); i++) { - AddExtensionsFromMessage(file->message_type(i), &ret); - } for (int i = 0; i < file->extension_count(); i++) { ret.push_back(file->extension(i)); } + for (int i = 0; i < file->message_type_count(); i++) { + AddExtensionsFromMessage(file->message_type(i), &ret); + } return ret; } @@ -729,8 +729,42 @@ void WriteHeader(const protobuf::FileDescriptor* file, Output& output) { GenerateExtensionInHeader(ext, output); } + output("extern const upb_msglayout_file $0;\n\n", FileLayoutName(file)); + if (file->name() == protobuf::FileDescriptorProto::descriptor()->file()->name()) { + // This is gratuitously inefficient with how many times it rebuilds + // MessageLayout objects for the same message. But we only do this for one + // proto (descriptor.proto) so we don't worry about it. + const protobuf::Descriptor* max32 = nullptr; + const protobuf::Descriptor* max64 = nullptr; + for (auto message : this_file_messages) { + if (absl::EndsWith(message->name(), "Options")) { + MessageLayout layout(message); + if (max32 == nullptr) { + max32 = message; + max64 = message; + } else { + if (layout.message_size().size32 > + MessageLayout(max32).message_size().size32) { + max32 = message; + } + if (layout.message_size().size64 > + MessageLayout(max64).message_size().size64) { + max64 = message; + } + } + } + } + + output("/* Max size 32 is $0 */\n", max32->full_name()); + output("/* Max size 64 is $0 */\n", max64->full_name()); + MessageLayout::Size size; + size.size32 = MessageLayout(max32).message_size().size32; + size.size64 = MessageLayout(max32).message_size().size64; + output("#define _UPB_MAXOPT_SIZE $0\n\n", GetSizeInit(size)); + } + output( "#ifdef __cplusplus\n" "} /* extern \"C\" */\n"