From c486da3970cd26d66c6aff8b541446388821cab0 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 11 Nov 2019 16:59:24 -0800 Subject: [PATCH] WIP. --- BUILD | 1 + upb/bindings/lua/msg.c | 85 ++++++++------ upb/generated_util.h | 41 ++----- upb/legacy_msg_reflection.c | 56 ++++------ upb/legacy_msg_reflection.h | 217 ++++++++++++++---------------------- upb/msg.c | 99 ++++++++++++---- upb/msg.h | 63 ++++++++--- upbc/generator.cc | 19 ++-- 8 files changed, 302 insertions(+), 279 deletions(-) diff --git a/BUILD b/BUILD index ad85b202a9..a9a421fe15 100644 --- a/BUILD +++ b/BUILD @@ -156,6 +156,7 @@ cc_library( deps = [ ":table", ":upb", + ":reflection", ], ) diff --git a/upb/bindings/lua/msg.c b/upb/bindings/lua/msg.c index be3ae7df16..8d319bc905 100644 --- a/upb/bindings/lua/msg.c +++ b/upb/bindings/lua/msg.c @@ -385,33 +385,43 @@ static bool lupb_istypewrapped(upb_fieldtype_t type) { static upb_msgval lupb_tomsgval(lua_State *L, upb_fieldtype_t type, int narg, const lupb_msgclass *lmsgclass) { + upb_msgval ret; switch (type) { case UPB_TYPE_INT32: case UPB_TYPE_ENUM: - return upb_msgval_int32(lupb_checkint32(L, narg)); + ret.int32_val = lupb_checkint32(L, narg); + break; case UPB_TYPE_INT64: - return upb_msgval_int64(lupb_checkint64(L, narg)); + ret.int64_val = lupb_checkint64(L, narg); + break; case UPB_TYPE_UINT32: - return upb_msgval_uint32(lupb_checkuint32(L, narg)); + ret.uint32_val = lupb_checkuint32(L, narg); + break; case UPB_TYPE_UINT64: - return upb_msgval_uint64(lupb_checkuint64(L, narg)); + ret.uint64_val = lupb_checkuint64(L, narg); + break; case UPB_TYPE_DOUBLE: - return upb_msgval_double(lupb_checkdouble(L, narg)); + ret.double_val = lupb_checkdouble(L, narg); + break; case UPB_TYPE_FLOAT: - return upb_msgval_float(lupb_checkfloat(L, narg)); + ret.float_val = lupb_checkfloat(L, narg); + break; case UPB_TYPE_BOOL: - return upb_msgval_bool(lupb_checkbool(L, narg)); + ret.bool_val = lupb_checkbool(L, narg); + break; case UPB_TYPE_STRING: case UPB_TYPE_BYTES: { size_t len; const char *ptr = lupb_checkstring(L, narg, &len); - return upb_msgval_makestr(ptr, len); + ret.str_val = upb_strview_make(ptr, len); + break; } case UPB_TYPE_MESSAGE: UPB_ASSERT(lmsgclass); - return upb_msgval_msg(lupb_msg_checkmsg(L, narg, lmsgclass)); + ret.msg_val = lupb_msg_checkmsg(L, narg, lmsgclass); + break; } - UPB_UNREACHABLE(); + return ret; } static void lupb_pushmsgval(lua_State *L, upb_fieldtype_t type, @@ -419,25 +429,25 @@ static void lupb_pushmsgval(lua_State *L, upb_fieldtype_t type, switch (type) { case UPB_TYPE_INT32: case UPB_TYPE_ENUM: - lupb_pushint32(L, upb_msgval_getint32(val)); + lupb_pushint32(L, val.int32_val); return; case UPB_TYPE_INT64: - lupb_pushint64(L, upb_msgval_getint64(val)); + lupb_pushint64(L, val.int64_val); return; case UPB_TYPE_UINT32: - lupb_pushuint32(L, upb_msgval_getuint32(val)); + lupb_pushuint32(L, val.uint32_val); return; case UPB_TYPE_UINT64: - lupb_pushuint64(L, upb_msgval_getuint64(val)); + lupb_pushuint64(L, val.uint64_val); return; case UPB_TYPE_DOUBLE: - lupb_pushdouble(L, upb_msgval_getdouble(val)); + lupb_pushdouble(L, val.double_val); return; case UPB_TYPE_FLOAT: - lupb_pushfloat(L, upb_msgval_getfloat(val)); + lupb_pushfloat(L, val.float_val); return; case UPB_TYPE_BOOL: - lua_pushboolean(L, upb_msgval_getbool(val)); + lua_pushboolean(L, val.bool_val); return; case UPB_TYPE_STRING: case UPB_TYPE_BYTES: @@ -486,6 +496,7 @@ static lupb_array *lupb_array_check(lua_State *L, int narg) { static upb_msgval lupb_array_typecheck(lua_State *L, int narg, int msg, const upb_fielddef *f) { lupb_array *larray = lupb_array_check(L, narg); + upb_msgval val; if (larray->type != upb_fielddef_type(f) || lupb_msg_getsubmsgclass(L, msg, f) != larray->lmsgclass) { @@ -498,7 +509,8 @@ static upb_msgval lupb_array_typecheck(lua_State *L, int narg, int msg, larray->lmsgclass); } - return upb_msgval_arr(larray->arr); + val.array_val = larray->arr; + return val; } /** @@ -599,6 +611,8 @@ static const struct luaL_Reg lupb_array_mm[] = { typedef struct { const lupb_msgclass *value_lmsgclass; + upb_fieldtype_t key_type; + upb_fieldtype_t value_type; upb_map *map; } lupb_map; @@ -627,16 +641,17 @@ static upb_msgval lupb_map_typecheck(lua_State *L, int narg, int msg, UPB_ASSERT(entry && key_field && value_field); - if (upb_map_keytype(map) != upb_fielddef_type(key_field)) { - luaL_error(L, "Map key type invalid"); + if (lmap->key_type != upb_fielddef_type(key_field)) { + luaL_error(L, "Map had incorrect field type (expected: %s, got: %s)", + upb_fielddef_type(key_field), lmap->key_type); } - if (upb_map_valuetype(map) != upb_fielddef_type(value_field)) { + if (lmap->value_type != upb_fielddef_type(value_field)) { luaL_error(L, "Map had incorrect value type (expected: %s, got: %s)", - upb_fielddef_type(value_field), upb_map_valuetype(map)); + upb_fielddef_type(value_field), lmap->value_type); } - if (upb_map_valuetype(map) == UPB_TYPE_MESSAGE) { + if (lmap->value_type == UPB_TYPE_MESSAGE) { lupb_msgclass_typecheck( L, lupb_msg_msgclassfor(L, msg, upb_fielddef_msgsubdef(value_field)), lmap->value_lmsgclass); @@ -672,6 +687,8 @@ static int lupb_map_new(lua_State *L) { lupb_uservalseti(L, -1, MAP_MSGCLASS_INDEX, 2); /* GC-root lmsgclass. */ } + lmap->key_type = key_type; + lmap->value_type = value_type; lmap->value_lmsgclass = value_lmsgclass; lmap->map = upb_map_new(key_type, value_type, lupb_arena_get(L)); @@ -687,9 +704,9 @@ static int lupb_map_new(lua_State *L) { static int lupb_map_index(lua_State *L) { lupb_map *lmap = lupb_map_check(L, 1); upb_map *map = lmap->map; - upb_fieldtype_t valtype = upb_map_valuetype(map); + upb_fieldtype_t valtype = lmap->value_type; /* We don't always use "key", but this call checks the key type. */ - upb_msgval key = lupb_tomsgval(L, upb_map_keytype(map), 2, NULL); + upb_msgval key = lupb_tomsgval(L, lmap->key_type, 2, NULL); if (lupb_istypewrapped(valtype)) { /* Userval contains the full map, lookup there by key. */ @@ -704,7 +721,7 @@ static int lupb_map_index(lua_State *L) { /* Lookup in upb_map. */ upb_msgval val; if (upb_map_get(map, key, &val)) { - lupb_pushmsgval(L, upb_map_valuetype(map), val); + lupb_pushmsgval(L, lmap->value_type, val); } else { lua_pushnil(L); } @@ -735,13 +752,13 @@ static int lupb_map_len(lua_State *L) { static int lupb_map_newindex(lua_State *L) { lupb_map *lmap = lupb_map_check(L, 1); upb_map *map = lmap->map; - upb_msgval key = lupb_tomsgval(L, upb_map_keytype(map), 2, NULL); + upb_msgval key = lupb_tomsgval(L, lmap->key_type, 2, NULL); if (lua_isnil(L, 3)) { /* Delete from map. */ upb_map_del(map, key); - if (lupb_istypewrapped(upb_map_valuetype(map))) { + if (lupb_istypewrapped(lmap->value_type)) { /* Delete in userval. */ lupb_getuservalue(L, 1); lua_pushvalue(L, 2); @@ -752,11 +769,11 @@ static int lupb_map_newindex(lua_State *L) { } else { /* Set in map. */ upb_msgval val = - lupb_tomsgval(L, upb_map_valuetype(map), 3, lmap->value_lmsgclass); + lupb_tomsgval(L, lmap->value_type, 3, lmap->value_lmsgclass); upb_map_set(map, key, val, NULL); - if (lupb_istypewrapped(upb_map_valuetype(map))) { + if (lupb_istypewrapped(lmap->value_type)) { /* Set in userval. */ lupb_getuservalue(L, 1); lua_pushvalue(L, 2); @@ -780,8 +797,8 @@ static int lupb_mapiter_next(lua_State *L) { return 0; } - lupb_pushmsgval(L, upb_map_keytype(map), upb_mapiter_key(i)); - lupb_pushmsgval(L, upb_map_valuetype(map), upb_mapiter_value(i)); + lupb_pushmsgval(L, lmap->key_type, upb_mapiter_key(i)); + lupb_pushmsgval(L, lmap->value_type, upb_mapiter_value(i)); upb_mapiter_next(i); return 2; @@ -790,8 +807,8 @@ static int lupb_mapiter_next(lua_State *L) { static int lupb_map_pairs(lua_State *L) { lupb_map *lmap = lupb_map_check(L, 1); - if (lupb_istypewrapped(upb_map_keytype(lmap->map)) || - lupb_istypewrapped(upb_map_valuetype(lmap->map))) { + if (lupb_istypewrapped(lmap->key_type) || + lupb_istypewrapped(lmap->value_type)) { /* Complex key or value type. * Sync upb_map to userval if necessary, then iterate over userval. */ diff --git a/upb/generated_util.h b/upb/generated_util.h index 234bcdad3c..8151e45583 100644 --- a/upb/generated_util.h +++ b/upb/generated_util.h @@ -37,48 +37,29 @@ UPB_INLINE void *_upb_array_mutable_accessor(void *msg, size_t ofs, } } -/* TODO(haberman): this is a mess. It will improve when upb_array no longer - * carries reflective state (type, elem_size). */ UPB_INLINE void *_upb_array_resize_accessor(void *msg, size_t ofs, size_t size, size_t elem_size, - upb_fieldtype_t type, upb_arena *arena) { - upb_array *arr = *PTR_AT(msg, ofs, upb_array*); - - if (!arr) { - arr = upb_array_new(arena); - if (!arr) return NULL; - *PTR_AT(msg, ofs, upb_array*) = arr; - } - - if (size > arr->size) { - size_t new_size = UPB_MAX(arr->size, 4); - size_t old_bytes = arr->size * elem_size; - size_t new_bytes; - while (new_size < size) new_size *= 2; - new_bytes = new_size * elem_size; - arr->data = upb_arena_realloc(arena, arr->data, old_bytes, new_bytes); - if (!arr->data) { - return NULL; - } - arr->size = new_size; + upb_array **arr_ptr = PTR_AT(msg, ofs, upb_array*); + upb_array *arr = *arr_ptr; + if (!arr || arr->size < size) { + return _upb_array_resize_fallback(arr_ptr, size, elem_size, arena); } - arr->len = size; return arr->data; } UPB_INLINE bool _upb_array_append_accessor(void *msg, size_t ofs, size_t elem_size, - upb_fieldtype_t type, const void *value, upb_arena *arena) { - upb_array *arr = *PTR_AT(msg, ofs, upb_array*); - size_t i = arr ? arr->len : 0; - void *data = - _upb_array_resize_accessor(msg, ofs, i + 1, elem_size, type, arena); - if (!data) return false; - memcpy(PTR_AT(data, i * elem_size, char), value, elem_size); + upb_array **arr_ptr = PTR_AT(msg, ofs, upb_array*); + upb_array *arr = *arr_ptr; + if (!arr || arr->len == arr->size) { + return _upb_array_append_fallback(arr_ptr, elem_size, value, arena); + } + memcpy(PTR_AT(arr->data, arr->len * elem_size, char), value, elem_size); + arr->len++; return true; } diff --git a/upb/legacy_msg_reflection.c b/upb/legacy_msg_reflection.c index 7abb368a76..ab0ad8e8e0 100644 --- a/upb/legacy_msg_reflection.c +++ b/upb/legacy_msg_reflection.c @@ -20,6 +20,7 @@ bool upb_fieldtype_mapkeyok(upb_fieldtype_t type) { /** upb_msgval ****************************************************************/ +#if 0 /* These functions will generate real memcpy() calls on ARM sadly, because * the compiler assumes they might not be aligned. */ @@ -163,53 +164,38 @@ void upb_msg_set(upb_msg *msg, int field_index, upb_msgval val, upb_msgval_write(msg, field->offset, val, size); } +#endif /** upb_array *****************************************************************/ -#define DEREF_ARR(arr, i, type) ((type*)arr->data)[i] - size_t upb_array_size(const upb_array *arr) { return arr->len; } -upb_msgval upb_array_get(const upb_array *arr, upb_fieldtype_t type, size_t i) { - size_t element_size = upb_msgval_sizeof2(type); - UPB_ASSERT(i < arr->len); - return upb_msgval_read(arr->data, i * element_size, element_size); +const void *upb_array_get(const upb_array *arr, size_t *size) { + if (size) *size = arr->len; + return arr->data; } -bool upb_array_set(upb_array *arr, upb_fieldtype_t type, size_t i, - upb_msgval val, upb_arena *arena) { - size_t element_size = upb_msgval_sizeof2(type); - UPB_ASSERT(i <= arr->len); - - if (i == arr->len) { - /* Extending the array. */ - - if (i == arr->size) { - /* Need to reallocate. */ - size_t new_size = UPB_MAX(arr->size * 2, 8); - size_t new_bytes = new_size * element_size; - size_t old_bytes = arr->size * element_size; - upb_alloc *alloc = upb_arena_alloc(arena); - upb_msgval *new_data = - upb_realloc(alloc, arr->data, old_bytes, new_bytes); - - if (!new_data) { - return false; - } - - arr->data = new_data; - arr->size = new_size; - } +void* upb_array_getmutable(upb_array *arr, size_t *size) { + if (size) *size = arr->len; + return arr->data; +} - arr->len = i + 1; +/* Resizes the array to the given size, reallocating if necessary, and returns a + * pointer to the new array elements. */ +void *upb_array_resize(upb_array *arr, size_t size, upb_fieldtype_t type, + upb_arena *arena) { + int elem_size = _upb_fieldtype_to_size[type]; + if (size > arr->size && !_upb_array_realloc(arr, size, elem_size, arena)) { + return NULL; } - - upb_msgval_write(arr->data, i * element_size, val, element_size); - return true; + arr->len = size; + return arr->data; } +#if 0 + /** upb_map *******************************************************************/ static void upb_map_tokey(upb_descriptortype_t type, upb_msgval *key, @@ -375,3 +361,5 @@ void upb_mapiter_setdone(upb_mapiter *i) { bool upb_mapiter_isequal(const upb_mapiter *i1, const upb_mapiter *i2) { return upb_strtable_iter_isequal(&i1->iter, &i2->iter); } + +#endif diff --git a/upb/legacy_msg_reflection.h b/upb/legacy_msg_reflection.h index 62b05fa976..cd6db7c1ec 100644 --- a/upb/legacy_msg_reflection.h +++ b/upb/legacy_msg_reflection.h @@ -2,181 +2,132 @@ #ifndef UPB_LEGACY_MSG_REFLECTION_H_ #define UPB_LEGACY_MSG_REFLECTION_H_ -#include "upb/upb.h" +#include "upb/def.h" #include "upb/msg.h" +#include "upb/upb.h" #include "upb/port_def.inc" -struct upb_mapiter; -typedef struct upb_mapiter upb_mapiter; - -/** upb_msgval ****************************************************************/ - -/* A union representing all possible protobuf values. Used for generic get/set - * operations. */ - typedef union { - bool b; - float flt; - double dbl; - int32_t i32; - int64_t i64; - uint32_t u32; - uint64_t u64; - const upb_map* map; - const upb_msg* msg; - const upb_array* arr; - const void* ptr; - upb_strview str; + bool bool_val; + float float_val; + double double_val; + int32_t int32_val; + int64_t int64_val; + uint32_t uint32_val; + uint64_t uint64_val; + const upb_map* map_val; + const upb_msg* msg_val; + const upb_array* array_val; + upb_strview str_val; } upb_msgval; -#define ACCESSORS(name, membername, ctype) \ - UPB_INLINE ctype upb_msgval_get ## name(upb_msgval v) { \ - return v.membername; \ - } \ - UPB_INLINE void upb_msgval_set ## name(upb_msgval *v, ctype cval) { \ - v->membername = cval; \ - } \ - UPB_INLINE upb_msgval upb_msgval_ ## name(ctype v) { \ - upb_msgval ret; \ - ret.membername = v; \ - return ret; \ - } - -ACCESSORS(bool, b, bool) -ACCESSORS(float, flt, float) -ACCESSORS(double, dbl, double) -ACCESSORS(int32, i32, int32_t) -ACCESSORS(int64, i64, int64_t) -ACCESSORS(uint32, u32, uint32_t) -ACCESSORS(uint64, u64, uint64_t) -ACCESSORS(map, map, const upb_map*) -ACCESSORS(msg, msg, const upb_msg*) -ACCESSORS(ptr, ptr, const void*) -ACCESSORS(arr, arr, const upb_array*) -ACCESSORS(str, str, upb_strview) - -#undef ACCESSORS - -UPB_INLINE upb_msgval upb_msgval_makestr(const char *data, size_t size) { - return upb_msgval_str(upb_strview_make(data, size)); -} +typedef union { + upb_map* map; + upb_msg* msg; + upb_array* array; +} upb_mutmsgval; /** upb_msg *******************************************************************/ -/* A upb_msg represents a protobuf message. It always corresponds to a specific - * upb_msglayout, which describes how it is laid out in memory. */ - -/* Read-only message API. Can be safely called by anyone. */ - -/* Returns the value associated with this field: - * - for scalar fields (including strings), the value directly. - * - return upb_msg*, or upb_map* for msg/map. - * If the field is unset for these field types, returns NULL. - * - * TODO(haberman): should we let users store cached array/map/msg - * pointers here for fields that are unset? Could be useful for the - * strongly-owned submessage model (ie. generated C API that doesn't use - * arenas). - */ -upb_msgval upb_msg_get(const upb_msg *msg, - int field_index, +/* Returns the value associated with this field. */ +upb_msgval upb_msg_get(const upb_msg *msg, const upb_fielddef *f, const upb_msglayout *l); +/* Returns a mutable pointer to a map, array, or submessage value, constructing + * a new object if it was not previously present. May not be called for + * primitive fields. */ +upb_mutmsgval upb_msg_mutable(upb_msg *msg, const upb_fielddef *f, + const upb_msglayout *l, upb_arena *a); + /* May only be called for fields where upb_fielddef_haspresence(f) == true. */ -bool upb_msg_has(const upb_msg *msg, - int field_index, +bool upb_msg_has(const upb_msg *msg, const upb_fielddef *f, const upb_msglayout *l); -/* Mutable message API. May only be called by the owner of the message who - * knows its ownership scheme and how to keep it consistent. */ - -/* Sets the given field to the given value. Does not perform any memory - * management: if you overwrite a pointer to a msg/array/map/string without - * cleaning it up (or using an arena) it will leak. - */ -void upb_msg_set(upb_msg *msg, - int field_index, - upb_msgval val, - const upb_msglayout *l); +/* Sets the given field to the given value. For a msg/array/map/string, the + * value must be in the same arena. */ +void upb_msg_set(upb_msg *msg, const upb_fielddef *f, upb_msgval val, + const upb_msglayout *l, upb_arena *a); -/* For a primitive field, set it back to its default. For repeated, string, and - * submessage fields set it back to NULL. This could involve releasing some - * internal memory (for example, from an extension dictionary), but it is not - * recursive in any way and will not recover any memory that may be used by - * arrays/maps/strings/msgs that this field may have pointed to. - */ -bool upb_msg_clearfield(upb_msg *msg, - int field_index, +/* Clears any field presence and sets the value back to its default. */ +void upb_msg_clearfield(upb_msg *msg, const upb_fielddef *f, const upb_msglayout *l); -/* TODO(haberman): copyfrom()/mergefrom()? */ - /** upb_array *****************************************************************/ -/* A upb_array stores data for a repeated field. The memory management - * semantics are the same as upb_msg. A upb_array allocates dynamic - * memory internally for the array elements. */ - -/* Read-only interface. Safe for anyone to call. */ - +/* Returns the size of the array. */ size_t upb_array_size(const upb_array *arr); -upb_msgval upb_array_get(const upb_array *arr, upb_fieldtype_t type, size_t i); -/* Write interface. May only be called by the message's owner who can enforce - * its memory management invariants. */ +/* Returns the given element. */ +upb_msgval upb_array_get(const upb_array *arr, size_t i); -bool upb_array_set(upb_array *arr, upb_fieldtype_t type, size_t i, - upb_msgval val, upb_arena *arena); +/* Sets the given element, which must be within the array's current size. */ +void upb_array_set(upb_array *arr, size_t i, upb_msgval val); -/** upb_map *******************************************************************/ +/* Appends an element to the array. Returns false on allocation failure. */ +bool upb_array_append(upb_array *array, upb_msgval val); -/* A upb_map stores data for a map field. The memory management semantics are - * the same as upb_msg, with one notable exception. upb_map will internally - * store a copy of all string keys, but *not* any string values or submessages. - * So you must ensure that any string or message values outlive the map, and you - * must delete them manually when they are no longer required. */ +/* Changes the size of a vector. New elements are initialized to empty/0. + * Returns false on allocation failure. */ +bool upb_array_resize(upb_array *array, size_t size); -/* Read-only interface. Safe for anyone to call. */ +/** upb_map *******************************************************************/ +/* Returns the number of entries in the map. */ size_t upb_map_size(const upb_map *map); -bool upb_map_get(const upb_map *map, upb_msgval key, upb_msgval *val, - const upb_msglayout *l); -/* Write interface. May only be called by the message's owner who can enforce - * its memory management invariants. */ +/* Stores a value for the given key into |*val| (or the zero value if the key is + * not present). Returns whether the key was present. The |val| pointer may be + * NULL, in which case the function tests whether the given key is present. */ +bool upb_map_get(const upb_map *map, upb_msgval key, upb_msgval *val); -/* Sets or overwrites an entry in the map. Return value indicates whether - * the operation succeeded or failed with OOM, and also whether an existing - * key was replaced or not. */ -bool upb_map_set(upb_map *map, upb_msgval key, upb_msgval val, - const upb_msglayout *l, upb_arena *arena); +/* Removes all entries in the map. */ +void upb_map_clear(upb_map *map); -/* Deletes an entry in the map. Returns true if the key was present. */ -bool upb_map_del(upb_map *map, upb_msgval key, const upb_msglayout *l, - upb_arena *arena); +/* Sets the given key to the given value. Returns true if this was a new key in + * the map, or false if an existing key was replaced. */ +bool upb_map_set(upb_map *map, upb_msgval key, upb_msgval val); + +/* Deletes this key from the table. Returns true if the key was present. */ +bool upb_map_delete(upb_map *map, upb_msgval key); /** upb_mapiter ***************************************************************/ /* For iterating over a map. Map iterators are invalidated by mutations to the - * map, but an invalidated iterator will never return junk or crash the process. - * An invalidated iterator may return entries that were already returned though, - * and if you keep invalidating the iterator during iteration, the program may - * enter an infinite loop. */ + * map, but an invalidated iterator will never return junk or crash the process + * (this is an important property when exposing iterators to interpreted + * languages like Ruby, PHP, etc). An invalidated iterator may return entries + * that were already returned though, and if you keep invalidating the iterator + * during iteration, the program may enter an infinite loop. */ +struct upb_mapiter; +typedef struct upb_mapiter upb_mapiter; size_t upb_mapiter_sizeof(void); -void upb_mapiter_begin(upb_mapiter *i, const upb_msglayout *layout, - const upb_map *t); -upb_mapiter *upb_mapiter_new(const upb_map *t, const upb_msglayout *layout, - upb_alloc *a); -void upb_mapiter_free(upb_mapiter *i, upb_alloc *a); +/* Starts iteration. If the map is mutable then we can modify entries while + * iterating. */ +void upb_mapiter_constbegin(upb_mapiter *i, const upb_map *t); +void upb_mapiter_begin(upb_mapiter *i, upb_map *t); + +/* Sets the iterator to "done" state. This will return "true" from + * upb_mapiter_done() and will compare equal to other "done" iterators. */ +void upb_mapiter_setdone(upb_mapiter *i); + +/* Advances to the next entry. The iterator must not be done. */ void upb_mapiter_next(upb_mapiter *i); -bool upb_mapiter_done(const upb_mapiter *i); +/* Returns the key and value for this entry of the map. */ upb_msgval upb_mapiter_key(const upb_mapiter *i); upb_msgval upb_mapiter_value(const upb_mapiter *i); -void upb_mapiter_setdone(upb_mapiter *i); + +/* Sets the value for this entry. The iterator must not be done, and the + * iterator must not have been initialized const. */ +void upb_mapiter_setvalue(const upb_mapiter *i, upb_msgval value); + +/* Returns true if the iterator is done. */ +bool upb_mapiter_done(const upb_mapiter *i); + +/* Compares two iterators for equality. */ bool upb_mapiter_isequal(const upb_mapiter *i1, const upb_mapiter *i2); #include "upb/port_undef.inc" diff --git a/upb/msg.c b/upb/msg.c index 41bacd861c..80bd7c1392 100644 --- a/upb/msg.c +++ b/upb/msg.c @@ -7,6 +7,8 @@ #define VOIDPTR_AT(msg, ofs) (void*)((char*)msg + (int)ofs) +/** upb_msg *******************************************************************/ + /* Internal members of a upb_msg. We can change this without breaking binary * compatibility. We put these before the user's data. The user's upb_msg* * points after the upb_msg_internal. */ @@ -24,6 +26,21 @@ typedef struct { upb_msg_internal base; } upb_msg_internal_withext; +char _upb_fieldtype_to_size[12] = { + 0, + 1, /* UPB_TYPE_BOOL */ + 4, /* UPB_TYPE_FLOAT */ + 4, /* UPB_TYPE_INT32 */ + 4, /* UPB_TYPE_UINT32 */ + 4, /* UPB_TYPE_ENUM */ + sizeof(void*), /* UPB_TYPE_STRING */ + sizeof(void*), /* UPB_TYPE_BYTES */ + sizeof(void*), /* UPB_TYPE_MESSAGE */ + 8, /* UPB_TYPE_DOUBLE */ + 8, /* UPB_TYPE_INT64 */ + 8, /* UPB_TYPE_UINT64 */ +}; + static int upb_msg_internalsize(const upb_msglayout *l) { return sizeof(upb_msg_internal) - l->extendable * sizeof(void *); } @@ -74,6 +91,28 @@ upb_msg *upb_msg_new(const upb_msglayout *l, upb_arena *a) { return msg; } +void upb_msg_addunknown(upb_msg *msg, const char *data, size_t len, + upb_arena *arena) { + upb_msg_internal *in = upb_msg_getinternal(msg); + if (len > in->unknown_size - in->unknown_len) { + upb_alloc *alloc = upb_arena_alloc(arena); + size_t need = in->unknown_size + len; + size_t newsize = UPB_MAX(in->unknown_size * 2, need); + in->unknown = upb_realloc(alloc, in->unknown, in->unknown_size, newsize); + in->unknown_size = newsize; + } + memcpy(in->unknown + in->unknown_len, data, len); + in->unknown_len += len; +} + +const char *upb_msg_getunknown(const upb_msg *msg, size_t *len) { + const upb_msg_internal* in = upb_msg_getinternal_const(msg); + *len = in->unknown_len; + return in->unknown; +} + +/** upb_array *****************************************************************/ + static void upb_array_init(upb_array *arr) { arr->data = NULL; arr->len = 0; @@ -92,7 +131,44 @@ upb_array *upb_array_new(upb_arena *a) { return arr; } -upb_map *upb_map_new(upb_arena *a) { +bool _upb_array_realloc(upb_array *arr, size_t min_size, int elem_size, + upb_arena *arena) { + size_t new_size = UPB_MAX(arr->size, 4); + size_t old_bytes = arr->size * elem_size; + size_t new_bytes; + while (new_size < min_size) new_size *= 2; + new_bytes = new_size * elem_size; + arr->data = upb_arena_realloc(arena, arr->data, old_bytes, new_bytes); + if (!arr->data) { + arr->len = 0; + arr->size = 0; + return false; + } + arr->size = new_size; + return true; +} + +void *_upb_array_resize_fallback(upb_array **arr_ptr, size_t size, int elem_size, + upb_arena *arena) { + upb_array *arr = *arr_ptr; + if (!arr) { + arr = upb_array_new(arena); + if (!arr) return NULL; + *arr_ptr = arr; + } + + if (size > arr->size && !_upb_array_realloc(arr, size, elem_size, arena)) { + return NULL; + } + + arr->len = size; + return arr->data; +} + +/** upb_map *******************************************************************/ + +upb_map *upb_map_new(upb_arena *a, upb_fieldtype_t key_type, + upb_fieldtype_t value_type) { upb_map *map = upb_arena_malloc(a, sizeof(upb_map)); if (!map) { @@ -100,28 +176,11 @@ upb_map *upb_map_new(upb_arena *a) { } upb_strtable_init(&map->table, UPB_CTYPE_INT32); + map->key_type = key_type; + map->value_type = value_type; return map; } -void upb_msg_addunknown(upb_msg *msg, const char *data, size_t len, - upb_arena *arena) { - upb_msg_internal *in = upb_msg_getinternal(msg); - if (len > in->unknown_size - in->unknown_len) { - upb_alloc *alloc = upb_arena_alloc(arena); - size_t need = in->unknown_size + len; - size_t newsize = UPB_MAX(in->unknown_size * 2, need); - in->unknown = upb_realloc(alloc, in->unknown, in->unknown_size, newsize); - in->unknown_size = newsize; - } - memcpy(in->unknown + in->unknown_len, data, len); - in->unknown_len += len; -} - -const char *upb_msg_getunknown(const upb_msg *msg, size_t *len) { - const upb_msg_internal* in = upb_msg_getinternal_const(msg); - *len = in->unknown_len; - return in->unknown; -} #undef VOIDPTR_AT diff --git a/upb/msg.h b/upb/msg.h index d8a6defe8f..6c9d3eba62 100644 --- a/upb/msg.h +++ b/upb/msg.h @@ -1,7 +1,6 @@ /* -** Data structures for message tables, used for parsing and serialization. -** This are much lighter-weight than full reflection, but they are do not -** have enough information to convert to text format, JSON, etc. +** Our memory representation for parsing tables and messages themselves. +** Functions in this file are used by generated code and possible reflection. ** ** The definitions in this file are internal to upb. **/ @@ -46,29 +45,61 @@ typedef struct upb_msglayout { bool extendable; } upb_msglayout; -/** Message internal representation *******************************************/ +/** upb_msg *******************************************************************/ -/* Our internal representation for repeated fields. */ +/* Representation is in msg.c for now. */ + +/* Maps upb_fieldtype_t -> memory size. */ +extern char _upb_fieldtype_to_size[12]; + +/* Creates a new messages with the given layout on the given arena. */ +upb_msg *upb_msg_new(const upb_msglayout *l, upb_arena *a); + +/* Adds unknown data (serialized protobuf data) to the given message. The data + * is copied into the message instance. */ +void upb_msg_addunknown(upb_msg *msg, const char *data, size_t len, + upb_arena *arena); + +/* Returns a reference to the message's unknown data. */ +const char *upb_msg_getunknown(const upb_msg *msg, size_t *len); + +/** upb_array *****************************************************************/ + +/* Our internal representation for repeated fields. */ typedef struct { - void *data; /* Each element is element_size. */ + void *data; /* Tagged: low 2 bits of ptr are lg2(elem size). */ size_t len; /* Measured in elements. */ size_t size; /* Measured in elements. */ } upb_array; -/* Our internal representation for maps. Right now we use strmaps for - * everything. We'll likely want to use integer-specific maps for - * integer-keyed maps.*/ +/* Creates a new array on the given arena. */ +upb_array *upb_array_new(upb_arena *a, upb_fieldtype_t type); + +/* Resizes the capacity of the array to be at least min_size. */ +bool _upb_array_realloc(upb_array *arr, size_t min_size, int elem_size, + upb_arena *arena); + +/* Fallback functions for when the accessors require a resize. */ +void *_upb_array_resize_fallback(upb_array **arr_ptr, size_t size, int elem_size, + upb_arena *arena); +bool _upb_array_append_fallback(upb_array **arr_ptr, int elem_size, + const void *value, upb_arena *arena); + +/** upb_map *******************************************************************/ + +/* Right now we use strmaps for everything. We'll likely want to use + * integer-specific maps for integer-keyed maps.*/ typedef struct { + /* We should pack these better and move them into table to avoid padding. */ + upb_fieldtype_t key_type; + upb_fieldtype_t value_type; + upb_strtable table; } upb_map; -upb_msg *upb_msg_new(const upb_msglayout *l, upb_arena *a); -upb_array *upb_array_new(upb_arena *a); -upb_map *upb_map_new(upb_arena *a); - -void upb_msg_addunknown(upb_msg *msg, const char *data, size_t len, - upb_arena *arena); -const char *upb_msg_getunknown(const upb_msg *msg, size_t *len); +/* Creates a new map on the given arena with this key/value type. */ +upb_map *upb_map_new(upb_arena *a, upb_fieldtype_t key_type, + upb_fieldtype_t value_type); #ifdef __cplusplus } /* extern "C" */ diff --git a/upbc/generator.cc b/upbc/generator.cc index 4d87a4f0f4..4419cc834c 100644 --- a/upbc/generator.cc +++ b/upbc/generator.cc @@ -429,37 +429,32 @@ void GenerateMessageInHeader(const protobuf::Descriptor* message, Output& output output( "UPB_INLINE $0* $1_resize_$2($1 *msg, size_t len, " "upb_arena *arena) {\n" - " return ($0*)_upb_array_resize_accessor(msg, $3, len, $4, $5, " - "arena);\n" + " return ($0*)_upb_array_resize_accessor(msg, $3, len, $4, arena);\n" "}\n", CType(field), msgname, field->name(), GetSizeInit(layout.GetFieldOffset(field)), - GetSizeInit(MessageLayout::SizeOfUnwrapped(field).size), - UpbType(field)); + GetSizeInit(MessageLayout::SizeOfUnwrapped(field).size)); if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE) { output( "UPB_INLINE struct $0* $1_add_$2($1 *msg, upb_arena *arena) {\n" " struct $0* sub = (struct $0*)upb_msg_new(&$3, arena);\n" - " bool ok = _upb_array_append_accessor(\n" - " msg, $4, $5, $6, &sub, arena);\n" + " bool ok = _upb_array_append_accessor(" + " msg, $4, $5, &sub, arena);\n" " if (!ok) return NULL;\n" " return sub;\n" "}\n", MessageName(field->message_type()), msgname, field->name(), MessageInit(field->message_type()), GetSizeInit(layout.GetFieldOffset(field)), - GetSizeInit(MessageLayout::SizeOfUnwrapped(field).size), - UpbType(field)); + GetSizeInit(MessageLayout::SizeOfUnwrapped(field).size)); } else { output( "UPB_INLINE bool $1_add_$2($1 *msg, $0 val, upb_arena *arena) {\n" - " return _upb_array_append_accessor(\n" - " msg, $3, $4, $5, &val, arena);\n" + " return _upb_array_append_accessor(msg, $3, $4, &val, arena);\n" "}\n", CType(field), msgname, field->name(), GetSizeInit(layout.GetFieldOffset(field)), - GetSizeInit(MessageLayout::SizeOfUnwrapped(field).size), - UpbType(field)); + GetSizeInit(MessageLayout::SizeOfUnwrapped(field).size)); } } else { // Non-repeated field.