From 1b9d37a00ebae8b59773c8501d8712e1c3335302 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 1 Jul 2017 15:15:52 -0700 Subject: [PATCH] Start migrating upb_msglayout to be suitable for generated code. This involves: - remove upb_msglayout -> upb_msgfactory dependency. - remove upb_msglayout -> upb_msgdef dependency (in progress). - make upb_msglayout use a representation that can be statically initialized by generated code. The goal here is that upb_msglayout becomes a kind of "descriptor lite": it contains enough data to parser and serialize protobufs and manipulate a upb_msg in memory, while being far smaller and simpler than a full descriptor. It also does not include field names, which can be a benefit for applications that do not want to leak field names. Generated code can then create a upb_msglayout, and do most things without ever needing to construct full descriptors/defs if they don't want to. --- Makefile | 2 +- tests/bindings/lua/test_upb.lua | 35 +++--- upb/bindings/lua/msg.c | 24 ++-- upb/bindings/lua/upb.h | 1 + upb/bindings/lua/upb/pb.c | 2 +- upb/msg.c | 210 +++++++++++++++++++------------- upb/msg.h | 69 ++++++++--- 7 files changed, 217 insertions(+), 126 deletions(-) diff --git a/Makefile b/Makefile index 09b71c4545..581993bffe 100644 --- a/Makefile +++ b/Makefile @@ -463,7 +463,7 @@ testlua: lua echo LUA $$test; \ LUA_PATH="third_party/lunit/?.lua;upb/bindings/lua/?.lua" \ LUA_CPATH=upb/bindings/lua/?.so \ - lua $$test; \ + $(RUN_UNDER) lua $$test; \ done clean: clean_lua diff --git a/tests/bindings/lua/test_upb.lua b/tests/bindings/lua/test_upb.lua index 1dc07178f4..07d794ce91 100644 --- a/tests/bindings/lua/test_upb.lua +++ b/tests/bindings/lua/test_upb.lua @@ -565,13 +565,20 @@ function test_msg_primitives() local symtab = upb.SymbolTable{ upb.MessageDef{full_name = "TestMessage", fields = { - upb.FieldDef{name = "i32", number = 1, type = upb.TYPE_INT32}, - upb.FieldDef{name = "u32", number = 2, type = upb.TYPE_UINT32}, - upb.FieldDef{name = "i64", number = 3, type = upb.TYPE_INT64}, - upb.FieldDef{name = "u64", number = 4, type = upb.TYPE_UINT64}, - upb.FieldDef{name = "dbl", number = 5, type = upb.TYPE_DOUBLE}, - upb.FieldDef{name = "flt", number = 6, type = upb.TYPE_FLOAT}, - upb.FieldDef{name = "bool", number = 7, type = upb.TYPE_BOOL}, + upb.FieldDef{ + name = "i32", number = 1, type = upb.TYPE_INT32, default = 1}, + upb.FieldDef{ + name = "u32", number = 2, type = upb.TYPE_UINT32, default = 2}, + upb.FieldDef{ + name = "i64", number = 3, type = upb.TYPE_INT64, default = 3}, + upb.FieldDef{ + name = "u64", number = 4, type = upb.TYPE_UINT64, default = 4}, + upb.FieldDef{ + name = "dbl", number = 5, type = upb.TYPE_DOUBLE, default = 5}, + upb.FieldDef{ + name = "flt", number = 6, type = upb.TYPE_FLOAT, default = 6}, + upb.FieldDef{ + name = "bool", number = 7, type = upb.TYPE_BOOL, default = true}, } } } @@ -581,13 +588,13 @@ function test_msg_primitives() msg = TestMessage() -- Unset member returns default value. - assert_equal(0, msg.i32) - assert_equal(0, msg.u32) - assert_equal(0, msg.i64) - assert_equal(0, msg.u64) - assert_equal(0, msg.dbl) - assert_equal(0, msg.flt) - assert_equal(false, msg.bool) + assert_equal(1, msg.i32) + assert_equal(2, msg.u32) + assert_equal(3, msg.i64) + assert_equal(4, msg.u64) + assert_equal(5, msg.dbl) + assert_equal(6, msg.flt) + assert_equal(true, msg.bool) -- Attempts to access non-existent fields fail. assert_error_match("no such field", function() msg.no_such = 1 end) diff --git a/upb/bindings/lua/msg.c b/upb/bindings/lua/msg.c index 64c8e7c285..c222ca5444 100644 --- a/upb/bindings/lua/msg.c +++ b/upb/bindings/lua/msg.c @@ -189,7 +189,8 @@ typedef struct lupb_msgfactory { upb_msgfactory *factory; } lupb_msgfactory; -static int lupb_msgclass_pushnew(lua_State *L, int factory, const upb_msglayout *l); +static int lupb_msgclass_pushnew(lua_State *L, int factory, + const upb_msgdef *md); /* lupb_msgfactory helpers. */ @@ -199,8 +200,6 @@ static lupb_msgfactory *lupb_msgfactory_check(lua_State *L, int narg) { static void lupb_msgfactory_pushmsgclass(lua_State *L, int narg, const upb_msgdef *md) { - const lupb_msgfactory *lfactory = lupb_msgfactory_check(L, narg); - lupb_getuservalue(L, narg); lua_pushlightuserdata(L, (void*)md); lua_rawget(L, -2); @@ -208,8 +207,7 @@ static void lupb_msgfactory_pushmsgclass(lua_State *L, int narg, if (lua_isnil(L, -1)) { lua_pop(L, 1); /* TODO: verify md is in symtab? */ - lupb_msgclass_pushnew(L, narg, - upb_msgfactory_getlayout(lfactory->factory, md)); + lupb_msgclass_pushnew(L, narg, md); /* Set in userval. */ lua_pushlightuserdata(L, (void*)md); @@ -317,6 +315,10 @@ const upb_handlers *lupb_msgclass_getmergehandlers(lua_State *L, int narg) { lmsgclass->lfactory->factory, upb_msglayout_msgdef(lmsgclass->layout)); } +upb_msgfactory *lupb_msgclass_getfactory(const lupb_msgclass *lmsgclass) { + return lmsgclass->lfactory->factory; +} + /** * lupb_msgclass_typecheck() * @@ -360,13 +362,13 @@ static const lupb_msgclass *lupb_msgclass_getsubmsgclass(lua_State *L, int narg, return lupb_msgclass_msgclassfor(L, narg, upb_fielddef_msgsubdef(f)); } -static int lupb_msgclass_pushnew(lua_State *L, int factory, const upb_msglayout *l) { +static int lupb_msgclass_pushnew(lua_State *L, int factory, + const upb_msgdef *md) { const lupb_msgfactory *lfactory = lupb_msgfactory_check(L, factory); lupb_msgclass *lmc = lupb_newuserdata(L, sizeof(*lmc), LUPB_MSGCLASS); - UPB_ASSERT(l); lupb_uservalseti(L, -1, LUPB_MSGCLASS_FACTORY, factory); - lmc->layout = l; + lmc->layout = upb_msgfactory_getlayout(lfactory->factory, md); lmc->lfactory = lfactory; return 1; @@ -933,7 +935,7 @@ static const lupb_msgclass *lupb_msg_getsubmsgclass(lua_State *L, int narg, return lupb_msgclass_getsubmsgclass(L, -1, f); } -int lupb_msg_pushref(lua_State *L, int msgclass, void *msg) { +int lupb_msg_pushref(lua_State *L, int msgclass, upb_msg *msg) { const lupb_msgclass *lmsgclass = lupb_msgclass_check(L, msgclass); lupb_msg *lmsg = lupb_newuserdata(L, sizeof(lupb_msg), LUPB_MSG); @@ -966,8 +968,8 @@ static int lupb_msg_pushnew(lua_State *L, int narg) { lupb_msg *lmsg = lupb_newuserdata(L, size, LUPB_MSG); lmsg->lmsgclass = lmsgclass; - lmsg->msg = ADD_BYTES(lmsg, sizeof(*lmsg)); - upb_msg_init(lmsg->msg, lmsgclass->layout, lupb_alloc_get(L)); + lmsg->msg = upb_msg_init( + ADD_BYTES(lmsg, sizeof(*lmsg)), lmsgclass->layout, lupb_alloc_get(L)); lupb_uservalseti(L, -1, LUPB_MSG_MSGCLASSINDEX, narg); diff --git a/upb/bindings/lua/upb.h b/upb/bindings/lua/upb.h index 88a201cee8..982ae57b5b 100644 --- a/upb/bindings/lua/upb.h +++ b/upb/bindings/lua/upb.h @@ -135,6 +135,7 @@ const upb_msg *lupb_msg_checkmsg(lua_State *L, int narg, const lupb_msgclass *lupb_msgclass_check(lua_State *L, int narg); const upb_msglayout *lupb_msgclass_getlayout(lua_State *L, int narg); const upb_handlers *lupb_msgclass_getmergehandlers(lua_State *L, int narg); +upb_msgfactory *lupb_msgclass_getfactory(const lupb_msgclass *lmsgclass); void lupb_msg_registertypes(lua_State *L); #endif /* UPB_LUA_UPB_H_ */ diff --git a/upb/bindings/lua/upb/pb.c b/upb/bindings/lua/upb/pb.c index a25d1ac8a6..731430d691 100644 --- a/upb/bindings/lua/upb/pb.c +++ b/upb/bindings/lua/upb/pb.c @@ -106,7 +106,7 @@ static int lupb_pb_makemsgtostrencoder(lua_State *L) { const upb_msglayout *layout = lupb_msgclass_getlayout(L, 1); const lupb_msgclass *lmsgclass = lupb_msgclass_check(L, 1); const upb_msgdef *md = upb_msglayout_msgdef(layout); - upb_msgfactory *factory = upb_msglayout_factory(layout); + upb_msgfactory *factory = lupb_msgclass_getfactory(lmsgclass); const upb_handlers *encode_handlers; const upb_visitorplan *vp; diff --git a/upb/msg.c b/upb/msg.c index 39e3035779..26d0e9847f 100644 --- a/upb/msg.c +++ b/upb/msg.c @@ -24,7 +24,7 @@ bool upb_fieldtype_mapkeyok(upb_fieldtype_t type) { void *upb_array_pack(const upb_array *arr, void *p, size_t *ofs, size_t size); void *upb_map_pack(const upb_map *map, void *p, size_t *ofs, size_t size); -#define CHARPTR_AT(msg, ofs) ((char*)msg + ofs) +#define VOIDPTR_AT(msg, ofs) (void*)((char*)msg + ofs) #define ENCODE_MAX_NESTING 64 #define CHECK_TRUE(x) if (!(x)) { return false; } @@ -150,16 +150,8 @@ static upb_msgval upb_msgval_fromdefault(const upb_fielddef *f) { /** upb_msglayout *************************************************************/ struct upb_msglayout { - upb_msgfactory *factory; - const upb_msgdef *msgdef; - size_t size; - size_t extdict_offset; - void *default_msg; - uint32_t *field_offsets; - uint32_t *case_offsets; - uint32_t *hasbits; - bool has_extdict; - uint8_t align; + const upb_msgdef *msgdef; /* TODO(haberman): remove. */ + struct upb_msglayout_msginit_v1 data; }; static void upb_msg_checkfield(const upb_msglayout *l, const upb_fielddef *f) { @@ -167,7 +159,7 @@ static void upb_msg_checkfield(const upb_msglayout *l, const upb_fielddef *f) { } static void upb_msglayout_free(upb_msglayout *l) { - upb_gfree(l->default_msg); + upb_gfree(l->data.default_msg); upb_gfree(l); } @@ -178,35 +170,35 @@ const upb_msgdef *upb_msglayout_msgdef(const upb_msglayout *l) { static size_t upb_msglayout_place(upb_msglayout *l, size_t size) { size_t ret; - l->size = align_up(l->size, size); - l->align = align_up(l->align, size); - ret = l->size; - l->size += size; + l->data.size = align_up(l->data.size, size); + l->data.align = align_up(l->data.align, size); + ret = l->data.size; + l->data.size += size; return ret; } static uint32_t upb_msglayout_offset(const upb_msglayout *l, const upb_fielddef *f) { - return l->field_offsets[upb_fielddef_index(f)]; + return l->data.fields[upb_fielddef_index(f)].offset; } static uint32_t upb_msglayout_hasbit(const upb_msglayout *l, const upb_fielddef *f) { - return l->hasbits[upb_fielddef_index(f)]; + return l->data.fields[upb_fielddef_index(f)].hasbit; } static bool upb_msglayout_initdefault(upb_msglayout *l) { const upb_msgdef *m = l->msgdef; upb_msg_field_iter it; - if (upb_msgdef_syntax(m) == UPB_SYNTAX_PROTO2 && l->size) { + if (upb_msgdef_syntax(m) == UPB_SYNTAX_PROTO2 && l->data.size) { /* Allocate default message and set default values in it. */ - l->default_msg = upb_gmalloc(l->size); - if (!l->default_msg) { + l->data.default_msg = upb_gmalloc(l->data.size); + if (!l->data.default_msg) { return false; } - memset(l->default_msg, 0, l->size); + memset(l->data.default_msg, 0, l->data.size); for (upb_msg_field_begin(&it, m); !upb_msg_field_done(&it); upb_msg_field_next(&it)) { @@ -219,7 +211,7 @@ static bool upb_msglayout_initdefault(upb_msglayout *l) { if (!upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f) && !upb_fielddef_isseq(f)) { - upb_msg_set(l->default_msg, f, upb_msgval_fromdefault(f), l); + upb_msg_set(l->data.default_msg, f, upb_msgval_fromdefault(f), l); } } } @@ -232,22 +224,29 @@ static upb_msglayout *upb_msglayout_new(const upb_msgdef *m) { upb_msg_oneof_iter oit; upb_msglayout *l; size_t hasbit; - size_t array_size = upb_msgdef_numfields(m) + upb_msgdef_numoneofs(m); + size_t submsg_count = 0; - if (upb_msgdef_syntax(m) == UPB_SYNTAX_PROTO2) { - array_size += upb_msgdef_numfields(m); /* hasbits. */ + for (upb_msg_field_begin(&it, m), hasbit = sizeof(void*) * 8; + !upb_msg_field_done(&it); + upb_msg_field_next(&it)) { + const upb_fielddef* f = upb_msg_iter_field(&it); + if (upb_fielddef_issubmsg(f)) { + submsg_count++; + } } - l = upb_gmalloc(sizeof(*l) + (sizeof(uint32_t) * array_size)); + l = upb_gmalloc(sizeof(*l)); if (!l) return NULL; memset(l, 0, sizeof(*l)); - l->msgdef = m; - l->align = 1; - l->field_offsets = (uint32_t*)CHARPTR_AT(l, sizeof(*l)); - l->case_offsets = l->field_offsets + upb_msgdef_numfields(m); - l->hasbits = l->case_offsets + upb_msgdef_numoneofs(m); + + /* TODO(haberman): check OOM. */ + l->data.fields = upb_gmalloc(upb_msgdef_numfields(m) * + sizeof(struct upb_msglayout_fieldinit_v1)); + l->data.submsgs = upb_gmalloc(submsg_count * sizeof(void*)); + l->data.case_offsets = upb_gmalloc(upb_msgdef_numoneofs(m) * + sizeof(*l->data.case_offsets)); /* Allocate data offsets in three stages: * @@ -265,12 +264,13 @@ static upb_msglayout *upb_msglayout_new(const upb_msgdef *m) { const upb_fielddef* f = upb_msg_iter_field(&it); if (upb_fielddef_haspresence(f) && !upb_fielddef_containingoneof(f)) { - l->hasbits[upb_fielddef_index(f)] = hasbit++; + l->data.fields[upb_fielddef_index(f)].hasbit = hasbit++; } } /* Account for space used by hasbits. */ - l->size = div_round_up(hasbit, 8); + l->data.size = div_round_up(hasbit, 8); + l->data.align = 1; /* Allocate non-oneof fields. */ for (upb_msg_field_begin(&it, m); !upb_msg_field_done(&it); @@ -279,13 +279,12 @@ static upb_msglayout *upb_msglayout_new(const upb_msgdef *m) { size_t field_size = upb_msg_fieldsize(f); size_t index = upb_fielddef_index(f); - if (upb_fielddef_containingoneof(f)) { /* Oneofs are handled separately below. */ continue; } - l->field_offsets[index] = upb_msglayout_place(l, field_size); + l->data.fields[index].offset = upb_msglayout_place(l, field_size); } /* Allocate oneof fields. Each oneof field consists of a uint32 for the case @@ -311,19 +310,19 @@ static upb_msglayout *upb_msglayout_new(const upb_msgdef *m) { case_offset = upb_msglayout_place(l, case_size); val_offset = upb_msglayout_place(l, field_size); - l->case_offsets[upb_oneofdef_index(oneof)] = case_offset; + l->data.case_offsets[upb_oneofdef_index(oneof)] = case_offset; /* Assign all fields in the oneof this same offset. */ for (upb_oneof_begin(&fit, oneof); !upb_oneof_done(&fit); upb_oneof_next(&fit)) { const upb_fielddef* f = upb_oneof_iter_field(&fit); - l->field_offsets[upb_fielddef_index(f)] = val_offset; + l->data.fields[upb_fielddef_index(f)].offset = val_offset; } } /* Size of the entire structure should be a multiple of its greatest * alignment. */ - l->size = align_up(l->size, l->align); + l->data.size = align_up(l->data.size, l->data.align); if (upb_msglayout_initdefault(l)) { return l; @@ -333,8 +332,17 @@ static upb_msglayout *upb_msglayout_new(const upb_msgdef *m) { } } -upb_msgfactory *upb_msglayout_factory(const upb_msglayout *layout) { - return layout->factory; +upb_msglayout *upb_msglayout_frominit_v1( + const struct upb_msglayout_msginit_v1 *init, upb_alloc *a) { + UPB_UNUSED(a); + /* If upb upgrades to a v2, this would create a heap-allocated v2. */ + return (upb_msglayout*)init; +} + +void upb_msglayout_uninit_v1(upb_msglayout *layout, upb_alloc *a) { + UPB_UNUSED(layout); + UPB_UNUSED(a); + /* If upb upgrades to a v2, this would free the heap-allocated v2. */ } @@ -393,7 +401,6 @@ const upb_msglayout *upb_msgfactory_getlayout(upb_msgfactory *f, upb_msglayout *l = upb_msglayout_new(m); upb_inttable_insertptr(&mutable_f->layouts, m, upb_value_ptr(l)); UPB_ASSERT(l); - l->factory = f; return l; } } @@ -402,8 +409,7 @@ const upb_msglayout *upb_msgfactory_getlayout(upb_msgfactory *f, void *upb_msg_startstr(void *msg, const void *hd, size_t size_hint) { uint32_t ofs = (uintptr_t)hd; - /* We pass NULL here because we know we can get away with it. */ - upb_alloc *alloc = upb_msg_alloc(msg, NULL); + upb_alloc *alloc = upb_msg_alloc(msg); upb_msgval val; UPB_UNUSED(size_hint); @@ -420,8 +426,7 @@ void *upb_msg_startstr(void *msg, const void *hd, size_t size_hint) { size_t upb_msg_str(void *msg, const void *hd, const char *ptr, size_t size, const upb_bufhandle *handle) { uint32_t ofs = (uintptr_t)hd; - /* We pass NULL here because we know we can get away with it. */ - upb_alloc *alloc = upb_msg_alloc(msg, NULL); + upb_alloc *alloc = upb_msg_alloc(msg); upb_msgval val; size_t newsize; UPB_UNUSED(handle); @@ -628,20 +633,58 @@ bool upb_visitor_visitmsg(upb_visitor *visitor, const upb_msg *msg) { /* If we always read/write as a consistent type to each address, this shouldn't * violate aliasing. */ -#define DEREF(msg, ofs, type) *(type*)CHARPTR_AT(msg, ofs) +#define DEREF(msg, ofs, type) *(type*)VOIDPTR_AT(msg, ofs) + +/* Internal members of a upb_msg. We can change this without breaking binary + * compatibility. We put these before the user's data. The user's upb_msg* + * points after the upb_msg_internal. */ + +/* Used when a message is not extendable. */ +typedef struct { + /* TODO(haberman): add unknown fields. */ + upb_alloc *alloc; +} upb_msg_internal; + +/* Used when a message is extendable. */ +typedef struct { + upb_inttable *extdict; + upb_msg_internal base; +} upb_msg_internal_withext; + +#define INTERNAL_MEMBERS_SIZE(l) \ + sizeof(upb_msg_internal) - (l->data.extendable * sizeof(void*)) + +static upb_msg_internal *upb_msg_getinternal(upb_msg *msg) { + return VOIDPTR_AT(msg, -sizeof(upb_msg_internal)); +} + +static const upb_msg_internal *upb_msg_getinternal_const(const upb_msg *msg) { + return VOIDPTR_AT(msg, -sizeof(upb_msg_internal)); +} + +static upb_msg_internal_withext *upb_msg_getinternalwithext( + upb_msg *msg, const upb_msglayout *l) { + UPB_ASSERT(l->data.extendable); + return VOIDPTR_AT(msg, -sizeof(upb_msg_internal_withext)); +} + +static const upb_msg_internal_withext *upb_msg_getinternalwithext_const( + const upb_msg *msg, const upb_msglayout *l) { + UPB_ASSERT(l->data.extendable); + return VOIDPTR_AT(msg, -sizeof(upb_msg_internal_withext)); +} static upb_inttable *upb_msg_trygetextdict(const upb_msg *msg, const upb_msglayout *l) { - return l->has_extdict ? DEREF(msg, l->extdict_offset, upb_inttable*) : NULL; + return upb_msg_getinternalwithext_const(msg, l)->extdict; } static upb_inttable *upb_msg_getextdict(upb_msg *msg, const upb_msglayout *l, upb_alloc *a) { upb_inttable *ext_dict; - UPB_ASSERT(l->has_extdict); - ext_dict = upb_msg_trygetextdict(msg, l); + ext_dict = upb_msg_getinternalwithext(msg, l)->extdict; if (!ext_dict) { ext_dict = upb_malloc(a, sizeof(upb_inttable)); @@ -656,7 +699,7 @@ static upb_inttable *upb_msg_getextdict(upb_msg *msg, return NULL; } - DEREF(msg, l->extdict_offset, upb_inttable*) = ext_dict; + upb_msg_getinternalwithext(msg, l)->extdict = ext_dict; } return ext_dict; @@ -665,7 +708,7 @@ static upb_inttable *upb_msg_getextdict(upb_msg *msg, static uint32_t upb_msg_getoneofint(const upb_msg *msg, const upb_oneofdef *o, const upb_msglayout *l) { - size_t oneof_ofs = l->case_offsets[upb_oneofdef_index(o)]; + size_t oneof_ofs = l->data.case_offsets[upb_oneofdef_index(o)]; return DEREF(msg, oneof_ofs, uint8_t); } @@ -673,7 +716,7 @@ static void upb_msg_setoneofcase(const upb_msg *msg, const upb_oneofdef *o, const upb_msglayout *l, uint32_t val) { - size_t oneof_ofs = l->case_offsets[upb_oneofdef_index(o)]; + size_t oneof_ofs = l->data.case_offsets[upb_oneofdef_index(o)]; DEREF(msg, oneof_ofs, uint8_t) = val; } @@ -683,46 +726,49 @@ static bool upb_msg_oneofis(const upb_msg *msg, const upb_msglayout *l, return upb_msg_getoneofint(msg, o, l) == upb_fielddef_number(f); } -size_t upb_msg_sizeof(const upb_msglayout *l) { return l->size; } +size_t upb_msg_sizeof(const upb_msglayout *l) { + return l->data.size + INTERNAL_MEMBERS_SIZE(l); +} -void upb_msg_init(upb_msg *msg, const upb_msglayout *l, upb_alloc *a) { - if (l->default_msg) { - memcpy(msg, l->default_msg, l->size); +upb_msg *upb_msg_init(void *mem, const upb_msglayout *l, upb_alloc *a) { + upb_msg *msg = VOIDPTR_AT(mem, INTERNAL_MEMBERS_SIZE(l)); + if (l->data.default_msg) { + memcpy(msg, l->data.default_msg, l->data.size); } else { - memset(msg, 0, l->size); + memset(msg, 0, l->data.size); + } + + upb_msg_getinternal(msg)->alloc = a; + if (l->data.extendable) { + upb_msg_getinternalwithext(msg, l)->extdict = NULL; } - /* Set arena pointer. */ - memcpy(msg, &a, sizeof(a)); + return msg; } -void upb_msg_uninit(upb_msg *msg, const upb_msglayout *l) { - upb_inttable *ext_dict = upb_msg_trygetextdict(msg, l); - if (ext_dict) { - upb_inttable_uninit2(ext_dict, upb_msg_alloc(msg, l)); +void *upb_msg_uninit(upb_msg *msg, const upb_msglayout *l) { + if (l->data.extendable) { + upb_inttable *ext_dict = upb_msg_getinternalwithext(msg, l)->extdict; + if (ext_dict) { + upb_inttable_uninit2(ext_dict, upb_msg_alloc(msg)); + upb_free(upb_msg_alloc(msg), ext_dict); + } } + + return VOIDPTR_AT(msg, -INTERNAL_MEMBERS_SIZE(l)); } upb_msg *upb_msg_new(const upb_msglayout *l, upb_alloc *a) { - upb_msg *msg = upb_malloc(a, upb_msg_sizeof(l)); - - if (msg) { - upb_msg_init(msg, l, a); - } - - return msg; + void *mem = upb_malloc(a, upb_msg_sizeof(l)); + return mem ? upb_msg_init(mem, l, a) : NULL; } void upb_msg_free(upb_msg *msg, const upb_msglayout *l) { - upb_msg_uninit(msg, l); - upb_free(upb_msg_alloc(msg, l), msg); + upb_free(upb_msg_alloc(msg), upb_msg_uninit(msg, l)); } -upb_alloc *upb_msg_alloc(const upb_msg *msg, const upb_msglayout *l) { - upb_alloc *alloc; - UPB_UNUSED(l); - memcpy(&alloc, msg, sizeof(alloc)); - return alloc; +upb_alloc *upb_msg_alloc(const upb_msg *msg) { + return upb_msg_getinternal_const(msg)->alloc; } bool upb_msg_has(const upb_msg *msg, @@ -743,7 +789,7 @@ bool upb_msg_has(const upb_msg *msg, return upb_msg_getoneofint(msg, o, l) == upb_fielddef_number(f); } else { /* Other fields are set when their hasbit is set. */ - uint32_t hasbit = l->hasbits[upb_fielddef_index(f)]; + uint32_t hasbit = l->data.fields[upb_fielddef_index(f)].hasbit; return DEREF(msg, hasbit / 8, char) | (1 << (hasbit % 8)); } } @@ -761,7 +807,7 @@ upb_msgval upb_msg_get(const upb_msg *msg, const upb_fielddef *f, return upb_msgval_fromdefault(f); } } else { - size_t ofs = l->field_offsets[upb_fielddef_index(f)]; + size_t ofs = l->data.fields[upb_fielddef_index(f)].offset; const upb_oneofdef *o = upb_fielddef_containingoneof(f); upb_msgval ret; @@ -780,10 +826,10 @@ bool upb_msg_set(upb_msg *msg, const upb_fielddef *f, upb_msgval val, const upb_msglayout *l) { - upb_alloc *a = upb_msg_alloc(msg, l); upb_msg_checkfield(l, f); if (upb_fielddef_isextension(f)) { + upb_alloc *a = upb_msg_alloc(msg); /* TODO(haberman): introduce table API that can do this in one call. */ upb_inttable *ext = upb_msg_getextdict(msg, l, a); upb_value val2 = upb_toval(val); @@ -792,7 +838,7 @@ bool upb_msg_set(upb_msg *msg, return false; } } else { - size_t ofs = l->field_offsets[upb_fielddef_index(f)]; + size_t ofs = l->data.fields[upb_fielddef_index(f)].offset; const upb_oneofdef *o = upb_fielddef_containingoneof(f); if (o) { diff --git a/upb/msg.h b/upb/msg.h index 36470ae58a..c2b5cf0063 100644 --- a/upb/msg.h +++ b/upb/msg.h @@ -65,18 +65,6 @@ typedef void upb_msg; * instances of this from a upb_msgfactory, and the factory always owns the * msglayout. */ -/* Gets the factory for this layout */ -upb_msgfactory *upb_msglayout_factory(const upb_msglayout *l); - -/* Get the msglayout for a submessage. This requires that this field is a - * submessage, ie. upb_fielddef_issubmsg(upb_msglayout_msgdef(l)) == true. - * - * Since map entry messages don't have layouts, if upb_fielddef_ismap(f) == true - * then this function will return the layout for the map's value. It requires - * that the value type of the map field is a submessage. */ -const upb_msglayout *upb_msglayout_sublayout(const upb_msglayout *l, - const upb_fielddef *f); - /* Returns the msgdef for this msglayout. */ const upb_msgdef *upb_msglayout_msgdef(const upb_msglayout *l); @@ -212,19 +200,29 @@ size_t upb_msg_sizeof(const upb_msglayout *l); * upb_msg_uninit() must be called to release internally-allocated memory * unless the allocator is an arena that does not require freeing. * + * Please note that upb_msg_init() may return a value that is different than + * |msg|, so you must assign the return value and not cast your memory block + * to upb_msg* directly! + * * Please note that upb_msg_uninit() does *not* free any submessages, maps, * or arrays referred to by this message's fields. You must free them manually - * yourself. */ -void upb_msg_init(upb_msg *msg, const upb_msglayout *l, upb_alloc *a); -void upb_msg_uninit(upb_msg *msg, const upb_msglayout *l); + * yourself. + * + * upb_msg_uninit returns the original memory block, which may be useful if + * you dynamically allocated it (though upb_msg_new() would normally be more + * appropriate in this case). */ +upb_msg *upb_msg_init(void *msg, const upb_msglayout *l, upb_alloc *a); +void *upb_msg_uninit(upb_msg *msg, const upb_msglayout *l); /* Like upb_msg_init() / upb_msg_uninit(), except the message's memory is * allocated / freed from the given upb_alloc. */ upb_msg *upb_msg_new(const upb_msglayout *l, upb_alloc *a); void upb_msg_free(upb_msg *msg, const upb_msglayout *l); -/* Returns the upb_alloc for the given message. */ -upb_alloc *upb_msg_alloc(const upb_msg *msg, const upb_msglayout *l); +/* Returns the upb_alloc for the given message. + * TODO(haberman): get rid of this? Not sure we want to be storing this + * for every message. */ +upb_alloc *upb_msg_alloc(const upb_msg *msg); /* Packs the tree of messages rooted at "msg" into a single hunk of memory, * allocated from the given allocator. */ @@ -400,6 +398,43 @@ bool upb_msg_getscalarhandlerdata(const upb_handlers *h, size_t *offset, int32_t *hasbit); + +/** Interfaces for generated code *********************************************/ + +struct upb_msglayout_strinit_v1 { + const char *ptr; + uint32_t length; +}; + +struct upb_msglayout_fieldinit_v1 { + uint32_t number; + uint32_t offset; + uint16_t hasbit; + uint16_t oneof_index; + uint16_t submsg_index; + uint8_t type; + uint8_t label; +}; + +struct upb_msglayout_msginit_v1 { + struct upb_msglayout_fieldinit_v1 *fields; + struct upb_msglayout_msginit_v1 **submsgs; + uint32_t *case_offsets; + void *default_msg; + /* Must be aligned to 8. Doesn't include internal members like unknown + * fields, extension dict, pointer to msglayout, etc. */ + uint32_t size; + bool extendable; + char align; +}; + +/* Initialize/uninitialize a msglayout from a msginit. If upb uses v1 + * internally, this will not allocate any memory. Should only be used by + * generated code. */ +upb_msglayout *upb_msglayout_frominit_v1( + const struct upb_msglayout_msginit_v1 *init, upb_alloc *a); +void upb_msglayout_uninit_v1(upb_msglayout *layout, upb_alloc *a); + UPB_END_EXTERN_C #endif /* UPB_MSG_H_ */