diff --git a/benchmarks/compare.py b/benchmarks/compare.py index 9bdf787138..8d62d94272 100755 --- a/benchmarks/compare.py +++ b/benchmarks/compare.py @@ -58,7 +58,7 @@ def Benchmark(outbase, bench_cpu=True, runs=12, fasttable=False): baseline = "master" -bench_cpu = True +bench_cpu = False fasttable = False if len(sys.argv) > 1: diff --git a/tests/bindings/lua/test_upb.lua b/tests/bindings/lua/test_upb.lua index 84178aa79c..0f4a3d2725 100644 --- a/tests/bindings/lua/test_upb.lua +++ b/tests/bindings/lua/test_upb.lua @@ -91,6 +91,69 @@ function test_msg_map() assert_equal(12, msg2.map_int32_int32[6]) end +function test_map_sorting() + function msg_with_int32_entries(start, expand) + local msg = test_messages_proto3.TestAllTypesProto3() + for i=start,start + 8 do + msg.map_int32_int32[i] = i * 2 + end + + if expand then + for i=start+20,200 do + msg.map_int32_int32[i] = i + end + for i=start+20,200 do + msg.map_int32_int32[i] = nil + end + end + return msg + end + + function msg_with_msg_entries(expand) + local msg = test_messages_proto3.TestAllTypesProto3() + -- 8! = 40320 possible orderings makes it overwhelmingly likely that two + -- random orderings will be different. + for i=1,8 do + local submsg = test_messages_proto3.TestAllTypesProto3.NestedMessage() + submsg.corecursive = msg_with_int32_entries(i, expand) + msg.map_string_nested_message[tostring(i)] = submsg + end + + expand = false + if expand then + for i=21,2000 do + local submsg = test_messages_proto3.TestAllTypesProto3.NestedMessage() + submsg.corecursive = msg_with_int32_entries(i, expand) + msg.map_string_nested_message[tostring(i)] = submsg + end + for i=21,2000 do + msg.map_string_nested_message[tostring(i)] = nil + end + end + return msg + end + + -- Create two messages with the same contents but (hopefully) different + -- map table orderings. + local msg = msg_with_msg_entries(false) + local msg2 = msg_with_msg_entries(true) + + local text1 = upb.text_encode(msg) + local text2 = upb.text_encode(msg2) + assert_equal(text1, text2) + + local binary1 = upb.encode(msg, {upb.ENCODE_DETERMINISTIC}) + local binary2 = upb.encode(msg2, {upb.ENCODE_DETERMINISTIC}) + assert_equal(binary1, binary2) + + -- Non-sorted map should compare different. + local text3 = upb.text_encode(msg, {upb.TXTENC_NOSORT}) + assert_not_equal(text1, text3) + + local binary3 = upb.encode(msg) + assert_not_equal(binary1, binary3) +end + function test_utf8() local proto2_msg = test_messages_proto2.TestAllTypesProto2() proto2_msg.optional_string = "\xff" diff --git a/upb/bindings/lua/def.c b/upb/bindings/lua/def.c index 3bf1d9654f..ca277c8354 100644 --- a/upb/bindings/lua/def.c +++ b/upb/bindings/lua/def.c @@ -53,12 +53,12 @@ static void lupb_wrapper_pushwrapper(lua_State *L, int narg, const void *def, /* lupb_msgdef_pushsubmsgdef() * * Pops the msgdef wrapper at the top of the stack and replaces it with a msgdef - * wrapper for field |f| of this msgdef. + * wrapper for field |f| of this msgdef (submsg may not be direct, for example it + * may be the submessage of the map value). */ void lupb_msgdef_pushsubmsgdef(lua_State *L, const upb_fielddef *f) { const upb_msgdef *m = upb_fielddef_msgsubdef(f); assert(m); - assert(upb_fielddef_containingtype(f) == lupb_msgdef_check(L, -1)); lupb_wrapper_pushwrapper(L, -1, m, LUPB_MSGDEF); lua_replace(L, -2); /* Replace msgdef with submsgdef. */ } @@ -337,6 +337,26 @@ static int lupb_msgdef_oneofcount(lua_State *L) { return 1; } +static bool lupb_msgdef_pushnested(lua_State *L, int msgdef, int name) { + const upb_msgdef *m = lupb_msgdef_check(L, msgdef); + lupb_wrapper_pushsymtab(L, msgdef); + upb_symtab *symtab = lupb_symtab_check(L, -1); + lua_pop(L, 1); + + /* Construct full package.Message.SubMessage name. */ + lua_pushstring(L, upb_msgdef_fullname(m)); + lua_pushstring(L, "."); + lua_pushvalue(L, name); + lua_concat(L, 3); + const char *nested_name = lua_tostring(L, -1); + + /* Try lookup. */ + const upb_msgdef *nested = upb_symtab_lookupmsg(symtab, nested_name); + if (!nested) return false; + lupb_wrapper_pushwrapper(L, msgdef, nested, LUPB_MSGDEF); + return true; +} + /* lupb_msgdef_field() * * Handles: @@ -430,6 +450,13 @@ static int lupb_msgdef_fullname(lua_State *L) { return 1; } +static int lupb_msgdef_index(lua_State *L) { + if (!lupb_msgdef_pushnested(L, 1, 2)) { + luaL_error(L, "No such nested message"); + } + return 1; +} + static int lupb_msgoneofiter_next(lua_State *L) { const upb_msgdef *m = lupb_msgdef_check(L, lua_upvalueindex(1)); int *index = lua_touserdata(L, lua_upvalueindex(2)); @@ -471,6 +498,7 @@ static int lupb_msgdef_tostring(lua_State *L) { static const struct luaL_Reg lupb_msgdef_mm[] = { {"__call", lupb_msg_pushnew}, + {"__index", lupb_msgdef_index}, {"__len", lupb_msgdef_fieldcount}, {"__tostring", lupb_msgdef_tostring}, {NULL, NULL} diff --git a/upb/bindings/lua/msg.c b/upb/bindings/lua/msg.c index 6a2ecb36c7..0c4c35f2c4 100644 --- a/upb/bindings/lua/msg.c +++ b/upb/bindings/lua/msg.c @@ -187,6 +187,13 @@ static void lupb_arena_fuse(lua_State *L, int to, int from) { upb_arena_fuse(to_arena, from_arena); } +static void lupb_arena_fuseobjs(lua_State *L, int to, int from) { + lua_getiuservalue(L, to, LUPB_ARENA_INDEX); + lua_getiuservalue(L, from, LUPB_ARENA_INDEX); + lupb_arena_fuse(L, lua_absindex(L, -2), lua_absindex(L, -1)); + lua_pop(L, 2); +} + static int lupb_arena_gc(lua_State *L) { upb_arena *a = lupb_arena_check(L, 1); upb_arena_free(a); @@ -398,6 +405,10 @@ static int lupb_array_newindex(lua_State *L) { upb_array_set(larray->arr, n, msgval); } + if (larray->type == UPB_TYPE_MESSAGE) { + lupb_arena_fuseobjs(L, 1, 3); + } + return 0; /* 1 for chained assignments? */ } @@ -535,6 +546,9 @@ static int lupb_map_newindex(lua_State *L) { } else { upb_msgval val = lupb_tomsgval(L, lmap->value_type, 3, 1, LUPB_COPY); upb_map_set(map, key, val, lupb_arenaget(L, 1)); + if (lmap->value_type == UPB_TYPE_MESSAGE) { + lupb_arena_fuseobjs(L, 1, 3); + } } return 0; @@ -600,21 +614,26 @@ static upb_msg *lupb_msg_check(lua_State *L, int narg) { return msg->msg; } -static const upb_fielddef *lupb_msg_checkfield(lua_State *L, int msg, - int field) { +static const upb_msgdef *lupb_msg_getmsgdef(lua_State *L, int msg) { + lua_getiuservalue(L, msg, LUPB_MSGDEF_INDEX); + const upb_msgdef *m = lupb_msgdef_check(L, -1); + lua_pop(L, 1); + return m; +} + +static const upb_fielddef *lupb_msg_tofield(lua_State *L, int msg, int field) { size_t len; const char *fieldname = luaL_checklstring(L, field, &len); - const upb_msgdef *m; - const upb_fielddef *f; + const upb_msgdef *m = lupb_msg_getmsgdef(L, msg); + return upb_msgdef_ntof(m, fieldname, len); +} - lua_getiuservalue(L, msg, LUPB_MSGDEF_INDEX); - m = lupb_msgdef_check(L, -1); - f = upb_msgdef_ntof(m, fieldname, len); +static const upb_fielddef *lupb_msg_checkfield(lua_State *L, int msg, + int field) { + const upb_fielddef *f = lupb_msg_tofield(L, msg, field); if (f == NULL) { - luaL_error(L, "no such field '%s'", fieldname); + luaL_error(L, "no such field '%s'", lua_tostring(L, field)); } - lua_pop(L, 1); - return f; } @@ -813,10 +832,7 @@ static int lupb_msg_newindex(lua_State *L) { } if (merge_arenas) { - lua_getiuservalue(L, 1, LUPB_ARENA_INDEX); - lua_getiuservalue(L, 3, LUPB_ARENA_INDEX); - lupb_arena_fuse(L, lua_absindex(L, -2), lua_absindex(L, -1)); - lua_pop(L, 2); + lupb_arena_fuseobjs(L, 1, 3); } upb_msg_set(msg, f, msgval, lupb_arenaget(L, 1)); @@ -907,6 +923,46 @@ static int lupb_decode(lua_State *L) { return 1; } +/** + * lupb_msg_textencode() + * + * Handles: + * text_string = upb.text_encode(msg, {upb.TXTENC_SINGLELINE}) + */ +static int lupb_textencode(lua_State *L) { + int argcount = lua_gettop(L); + upb_msg *msg = lupb_msg_check(L, 1); + const upb_msgdef *m; + char buf[1024]; + size_t size; + int options = 0; + + lua_getiuservalue(L, 1, LUPB_MSGDEF_INDEX); + m = lupb_msgdef_check(L, -1); + + if (argcount > 1) { + size_t len = lua_rawlen(L, 2); + for (size_t i = 1; i <= len; i++) { + lua_rawgeti(L, 2, i); + options |= lupb_checkuint32(L, -1); + lua_pop(L, 1); + } + } + + size = upb_text_encode(msg, m, NULL, options, buf, sizeof(buf)); + + if (size < sizeof(buf)) { + lua_pushlstring(L, buf, size); + } else { + char *ptr = malloc(size + 1); + upb_text_encode(msg, m, NULL, options, ptr, size + 1); + lua_pushlstring(L, ptr, size); + free(ptr); + } + + return 1; +} + /** * lupb_encode() * @@ -914,17 +970,28 @@ static int lupb_decode(lua_State *L) { * bin_string = upb.encode(msg) */ static int lupb_encode(lua_State *L) { + int argcount = lua_gettop(L); const upb_msg *msg = lupb_msg_check(L, 1); const upb_msglayout *layout; upb_arena *arena = lupb_arena_pushnew(L); size_t size; char *result; + int options = 0; + + if (argcount > 1) { + size_t len = lua_rawlen(L, 2); + for (size_t i = 1; i <= len; i++) { + lua_rawgeti(L, 2, i); + options |= lupb_checkuint32(L, -1); + lua_pop(L, 1); + } + } lua_getiuservalue(L, 1, LUPB_MSGDEF_INDEX); layout = upb_msgdef_layout(lupb_msgdef_check(L, -1)); lua_pop(L, 1); - result = upb_encode(msg, (const void*)layout, arena, &size); + result = upb_encode_ex(msg, (const void*)layout, options, arena, &size); if (!result) { lua_pushstring(L, "Error encoding protobuf."); @@ -936,11 +1003,17 @@ static int lupb_encode(lua_State *L) { return 1; } +static void lupb_setfieldi(lua_State *L, const char *field, int i) { + lua_pushinteger(L, i); + lua_setfield(L, -2, field); +} + static const struct luaL_Reg lupb_msg_toplevel_m[] = { {"Array", lupb_array_new}, {"Map", lupb_map_new}, {"decode", lupb_decode}, {"encode", lupb_encode}, + {"text_encode", lupb_textencode}, {NULL, NULL} }; @@ -952,5 +1025,11 @@ void lupb_msg_registertypes(lua_State *L) { lupb_register_type(L, LUPB_MAP, NULL, lupb_map_mm); lupb_register_type(L, LUPB_MSG, NULL, lupb_msg_mm); + lupb_setfieldi(L, "TXTENC_SINGLELINE", UPB_TXTENC_SINGLELINE); + lupb_setfieldi(L, "TXTENC_SKIPUNKNOWN", UPB_TXTENC_SKIPUNKNOWN); + lupb_setfieldi(L, "TXTENC_NOSORT", UPB_TXTENC_NOSORT); + + lupb_setfieldi(L, "ENCODE_DETERMINISTIC", UPB_ENCODE_DETERMINISTIC); + lupb_cacheinit(L); } diff --git a/upb/bindings/lua/upb.c b/upb/bindings/lua/upb.c index 3630ae2ef3..c247c03746 100644 --- a/upb/bindings/lua/upb.c +++ b/upb/bindings/lua/upb.c @@ -84,6 +84,24 @@ int lua_getiuservalue(lua_State *L, int index, int n) { } #endif +/* We use this function as the __index metamethod when a type has both methods + * and an __index metamethod. */ +int lupb_indexmm(lua_State *L) { + /* Look up in __index table (which is a closure param). */ + lua_pushvalue(L, 2); + lua_rawget(L, lua_upvalueindex(1)); + if (!lua_isnil(L, -1)) { + return 1; + } + + /* Not found, chain to user __index metamethod. */ + lua_pushvalue(L, lua_upvalueindex(2)); + lua_pushvalue(L, 1); + lua_pushvalue(L, 2); + lua_call(L, 2, 1); + return 1; +} + void lupb_register_type(lua_State *L, const char *name, const luaL_Reg *m, const luaL_Reg *mm) { luaL_newmetatable(L, name); @@ -93,14 +111,17 @@ void lupb_register_type(lua_State *L, const char *name, const luaL_Reg *m, } if (m) { - /* Methods go in the mt's __index method. This implies that you can' - * implement __index and also have methods. */ - lua_getfield(L, -1, "__index"); - lupb_assert(L, lua_isnil(L, -1)); - lua_pop(L, 1); - - lua_createtable(L, 0, 0); + lua_createtable(L, 0, 0); /* __index table */ lupb_setfuncs(L, m); + + /* Methods go in the mt's __index slot. If the user also specified an + * __index metamethod, use our custom lupb_indexmm() that can check both. */ + lua_getfield(L, -2, "__index"); + if (lua_isnil(L, -1)) { + lua_pop(L, 1); + } else { + lua_pushcclosure(L, &lupb_indexmm, 2); + } lua_setfield(L, -2, "__index"); } diff --git a/upb/decode.h b/upb/decode.h index 00419ab373..eff4b8bd3d 100644 --- a/upb/decode.h +++ b/upb/decode.h @@ -15,6 +15,8 @@ extern "C" { #endif enum { + /* If set, strings will alias the input buffer instead of copying into the + * arena. */ UPB_DECODE_ALIAS = 1, }; diff --git a/upb/encode.c b/upb/encode.c index 6d36619d62..c1fe02decb 100644 --- a/upb/encode.c +++ b/upb/encode.c @@ -32,6 +32,8 @@ typedef struct { jmp_buf err; upb_alloc *alloc; char *buf, *ptr, *limit; + int options; + _upb_mapsorter sorter; } upb_encstate; static size_t upb_roundup_pow2(size_t bytes) { @@ -341,31 +343,48 @@ static void encode_array(upb_encstate *e, const char *field_mem, } } +static void encode_mapentry(upb_encstate *e, uint32_t number, + const upb_msglayout *layout, + const upb_map_entry *ent) { + const upb_msglayout_field *key_field = &layout->fields[0]; + const upb_msglayout_field *val_field = &layout->fields[1]; + size_t pre_len = e->limit - e->ptr; + size_t size; + encode_scalar(e, &ent->v, layout, val_field, false); + encode_scalar(e, &ent->k, layout, key_field, false); + size = (e->limit - e->ptr) - pre_len; + encode_varint(e, size); + encode_tag(e, number, UPB_WIRE_TYPE_DELIMITED); +} + static void encode_map(upb_encstate *e, const char *field_mem, const upb_msglayout *m, const upb_msglayout_field *f) { const upb_map *map = *(const upb_map**)field_mem; - const upb_msglayout *entry = m->submsgs[f->submsg_index]; - const upb_msglayout_field *key_field = &entry->fields[0]; - const upb_msglayout_field *val_field = &entry->fields[1]; - upb_strtable_iter i; - if (map == NULL) { - return; - } + const upb_msglayout *layout = m->submsgs[f->submsg_index]; + UPB_ASSERT(layout->field_count == 2); - upb_strtable_begin(&i, &map->table); - for(; !upb_strtable_done(&i); upb_strtable_next(&i)) { - size_t pre_len = e->limit - e->ptr; - size_t size; - upb_strview key = upb_strtable_iter_key(&i); - const upb_value val = upb_strtable_iter_value(&i); + if (map == NULL) return; + + if (e->options & UPB_ENCODE_DETERMINISTIC) { + _upb_sortedmap sorted; + _upb_mapsorter_pushmap(&e->sorter, layout->fields[0].descriptortype, map, + &sorted); upb_map_entry ent; - _upb_map_fromkey(key, &ent.k, map->key_size); - _upb_map_fromvalue(val, &ent.v, map->val_size); - encode_scalar(e, &ent.v, entry, val_field, false); - encode_scalar(e, &ent.k, entry, key_field, false); - size = (e->limit - e->ptr) - pre_len; - encode_varint(e, size); - encode_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED); + while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) { + encode_mapentry(e, f->number, layout, &ent); + } + _upb_mapsorter_popmap(&e->sorter, &sorted); + } else { + upb_strtable_iter i; + upb_strtable_begin(&i, &map->table); + for(; !upb_strtable_done(&i); upb_strtable_next(&i)) { + upb_strview key = upb_strtable_iter_key(&i); + const upb_value val = upb_strtable_iter_value(&i); + upb_map_entry ent; + _upb_map_fromkey(key, &ent.k, map->key_size); + _upb_map_fromvalue(val, &ent.v, map->val_size); + encode_mapentry(e, f->number, layout, &ent); + } } } @@ -414,28 +433,32 @@ static void encode_message(upb_encstate *e, const char *msg, *size = (e->limit - e->ptr) - pre_len; } -char *upb_encode(const void *msg, const upb_msglayout *m, upb_arena *arena, - size_t *size) { +char *upb_encode_ex(const void *msg, const upb_msglayout *m, int options, + upb_arena *arena, size_t *size) { upb_encstate e; e.alloc = upb_arena_alloc(arena); e.buf = NULL; e.limit = NULL; e.ptr = NULL; + e.options = options; + _upb_mapsorter_init(&e.sorter); + char *ret = NULL; if (UPB_SETJMP(e.err)) { *size = 0; - return NULL; - } - - encode_message(&e, msg, m, size); - - *size = e.limit - e.ptr; - - if (*size == 0) { - static char ch; - return &ch; + ret = NULL; } else { - UPB_ASSERT(e.ptr); - return e.ptr; + encode_message(&e, msg, m, size); + *size = e.limit - e.ptr; + if (*size == 0) { + static char ch; + ret = &ch; + } else { + UPB_ASSERT(e.ptr); + ret = e.ptr; + } } + + _upb_mapsorter_destroy(&e.sorter); + return ret; } diff --git a/upb/encode.h b/upb/encode.h index 6842777058..8ff51ab626 100644 --- a/upb/encode.h +++ b/upb/encode.h @@ -7,12 +7,32 @@ #include "upb/msg.h" +/* Must be last. */ +#include "upb/port_def.inc" + #ifdef __cplusplus extern "C" { #endif -char *upb_encode(const void *msg, const upb_msglayout *l, upb_arena *arena, - size_t *size); +enum { + /* If set, the results of serializing will be deterministic across all + * instances of this binary. There are no guarantees across different + * binary builds. + * + * If your proto contains maps, the encoder will need to malloc()/free() + * memory during encode. */ + UPB_ENCODE_DETERMINISTIC = 1, +}; + +char *upb_encode_ex(const void *msg, const upb_msglayout *l, int options, + upb_arena *arena, size_t *size); + +UPB_INLINE char *upb_encode(const void *msg, const upb_msglayout *l, + upb_arena *arena, size_t *size) { + return upb_encode_ex(msg, l, 0, arena, size); +} + +#include "upb/port_undef.inc" #ifdef __cplusplus } /* extern "C" */ diff --git a/upb/msg.c b/upb/msg.c index 1dbb8b0fab..876a06d6fa 100644 --- a/upb/msg.c +++ b/upb/msg.c @@ -139,3 +139,118 @@ upb_map *_upb_map_new(upb_arena *a, size_t key_size, size_t value_size) { return map; } + +static void _upb_mapsorter_getkeys(const void *_a, const void *_b, void *a_key, + void *b_key, size_t size) { + const upb_tabent *const*a = _a; + const upb_tabent *const*b = _b; + upb_strview a_tabkey = upb_tabstrview((*a)->key); + upb_strview b_tabkey = upb_tabstrview((*b)->key); + _upb_map_fromkey(a_tabkey, a_key, size); + _upb_map_fromkey(b_tabkey, b_key, size); +} + +static int _upb_mapsorter_cmpi64(const void *_a, const void *_b) { + int64_t a, b; + _upb_mapsorter_getkeys(_a, _b, &a, &b, 8); + return a - b; +} + +static int _upb_mapsorter_cmpu64(const void *_a, const void *_b) { + uint64_t a, b; + _upb_mapsorter_getkeys(_a, _b, &a, &b, 8); + return a - b; +} + +static int _upb_mapsorter_cmpi32(const void *_a, const void *_b) { + int32_t a, b; + _upb_mapsorter_getkeys(_a, _b, &a, &b, 4); + return a - b; +} + +static int _upb_mapsorter_cmpu32(const void *_a, const void *_b) { + uint32_t a, b; + _upb_mapsorter_getkeys(_a, _b, &a, &b, 4); + return a - b; +} + +static int _upb_mapsorter_cmpbool(const void *_a, const void *_b) { + bool a, b; + _upb_mapsorter_getkeys(_a, _b, &a, &b, 1); + return a - b; +} + +static int _upb_mapsorter_cmpstr(const void *_a, const void *_b) { + upb_strview a, b; + _upb_mapsorter_getkeys(_a, _b, &a, &b, UPB_MAPTYPE_STRING); + size_t common_size = UPB_MIN(a.size, b.size); + int cmp = memcmp(a.data, b.data, common_size); + if (cmp) return cmp; + return a.size - b.size; +} + +bool _upb_mapsorter_pushmap(_upb_mapsorter *s, upb_descriptortype_t key_type, + const upb_map *map, _upb_sortedmap *sorted) { + int map_size = _upb_map_size(map); + sorted->start = s->size; + sorted->pos = sorted->start; + sorted->end = sorted->start + map_size; + + /* Grow s->entries if necessary. */ + if (sorted->end > s->cap) { + s->cap = _upb_lg2ceilsize(sorted->end); + s->entries = realloc(s->entries, s->cap * sizeof(*s->entries)); + if (!s->entries) return false; + } + + s->size = sorted->end; + + /* Copy non-empty entries from the table to s->entries. */ + upb_tabent const**dst = &s->entries[sorted->start]; + const upb_tabent *src = map->table.t.entries; + const upb_tabent *end = src + upb_table_size(&map->table.t); + for (; src < end; src++) { + if (!upb_tabent_isempty(src)) { + *dst = src; + dst++; + } + } + UPB_ASSERT(dst == &s->entries[sorted->end]); + + /* Sort entries according to the key type. */ + + int (*compar)(const void *, const void *); + + switch (key_type) { + case UPB_DESCRIPTOR_TYPE_INT64: + case UPB_DESCRIPTOR_TYPE_SFIXED64: + case UPB_DESCRIPTOR_TYPE_SINT64: + compar = _upb_mapsorter_cmpi64; + break; + case UPB_DESCRIPTOR_TYPE_UINT64: + case UPB_DESCRIPTOR_TYPE_FIXED64: + compar = _upb_mapsorter_cmpu64; + break; + case UPB_DESCRIPTOR_TYPE_INT32: + case UPB_DESCRIPTOR_TYPE_SINT32: + case UPB_DESCRIPTOR_TYPE_SFIXED32: + case UPB_DESCRIPTOR_TYPE_ENUM: + compar = _upb_mapsorter_cmpi32; + break; + case UPB_DESCRIPTOR_TYPE_UINT32: + case UPB_DESCRIPTOR_TYPE_FIXED32: + compar = _upb_mapsorter_cmpu32; + break; + case UPB_DESCRIPTOR_TYPE_BOOL: + compar = _upb_mapsorter_cmpbool; + break; + case UPB_DESCRIPTOR_TYPE_STRING: + compar = _upb_mapsorter_cmpstr; + break; + default: + UPB_UNREACHABLE(); + } + + qsort(&s->entries[sorted->start], map_size, sizeof(*s->entries), compar); + return true; +} diff --git a/upb/msg.h b/upb/msg.h index 36c57b32cd..9b4557ac18 100644 --- a/upb/msg.h +++ b/upb/msg.h @@ -9,11 +9,13 @@ #define UPB_MSG_H_ #include +#include #include #include "upb/table.int.h" #include "upb/upb.h" +/* Must be last. */ #include "upb/port_def.inc" #ifdef __cplusplus @@ -553,6 +555,53 @@ UPB_INLINE void _upb_msg_map_set_value(void* msg, const void* val, size_t size) } } +/** _upb_mapsorter *************************************************************/ + +/* _upb_mapsorter sorts maps and provides ordered iteration over the entries. + * Since maps can be recursive (map values can be messages which contain other maps). + * _upb_mapsorter can contain a stack of maps. */ + +typedef struct { + upb_tabent const**entries; + int size; + int cap; +} _upb_mapsorter; + +typedef struct { + int start; + int pos; + int end; +} _upb_sortedmap; + +UPB_INLINE void _upb_mapsorter_init(_upb_mapsorter *s) { + s->entries = NULL; + s->size = 0; + s->cap = 0; +} + +UPB_INLINE void _upb_mapsorter_destroy(_upb_mapsorter *s) { + if (s->entries) free(s->entries); +} + +bool _upb_mapsorter_pushmap(_upb_mapsorter *s, upb_descriptortype_t key_type, + const upb_map *map, _upb_sortedmap *sorted); + +UPB_INLINE void _upb_mapsorter_popmap(_upb_mapsorter *s, _upb_sortedmap *sorted) { + s->size = sorted->start; +} + +UPB_INLINE bool _upb_sortedmap_next(_upb_mapsorter *s, const upb_map *map, + _upb_sortedmap *sorted, + upb_map_entry *ent) { + if (sorted->pos == sorted->end) return false; + const upb_tabent *tabent = s->entries[sorted->pos++]; + upb_strview key = upb_tabstrview(tabent->key); + _upb_map_fromkey(key, &ent->k, map->key_size); + upb_value val = {tabent->val.val}; + _upb_map_fromvalue(val, &ent->v, map->val_size); + return true; +} + #undef PTR_AT #ifdef __cplusplus diff --git a/upb/table.int.h b/upb/table.int.h index ab67cc34d6..49caac463e 100644 --- a/upb/table.int.h +++ b/upb/table.int.h @@ -147,10 +147,17 @@ UPB_INLINE char *upb_tabstr(upb_tabkey key, uint32_t *len) { return mem + sizeof(*len); } +UPB_INLINE upb_strview upb_tabstrview(upb_tabkey key) { + upb_strview ret; + uint32_t len; + ret.data = upb_tabstr(key, &len); + ret.size = len; + return ret; +} /* upb_tabval *****************************************************************/ -typedef struct { +typedef struct upb_tabval { uint64_t val; } upb_tabval; diff --git a/upb/text_encode.c b/upb/text_encode.c index 3e9630d8cd..028cc29e09 100644 --- a/upb/text_encode.c +++ b/upb/text_encode.c @@ -17,6 +17,7 @@ typedef struct { int indent_depth; int options; const upb_symtab *ext_pool; + _upb_mapsorter sorter; } txtenc; static void txtenc_msg(txtenc *e, const upb_msg *msg, const upb_msgdef *m); @@ -187,6 +188,25 @@ static void txtenc_array(txtenc *e, const upb_array *arr, } } +static void txtenc_mapentry(txtenc *e, upb_msgval key, upb_msgval val, + const upb_fielddef *f) { + const upb_msgdef *entry = upb_fielddef_msgsubdef(f); + const upb_fielddef *key_f = upb_msgdef_field(entry, 0); + const upb_fielddef *val_f = upb_msgdef_field(entry, 1); + txtenc_indent(e); + txtenc_printf(e, "%s: {", upb_fielddef_name(f)); + txtenc_endfield(e); + e->indent_depth++; + + txtenc_field(e, key, key_f); + txtenc_field(e, val, val_f); + + e->indent_depth--; + txtenc_indent(e); + txtenc_putstr(e, "}"); + txtenc_endfield(e); +} + /* * Maps print as messages of key/value, etc. * @@ -200,27 +220,28 @@ static void txtenc_array(txtenc *e, const upb_array *arr, * } */ static void txtenc_map(txtenc *e, const upb_map *map, const upb_fielddef *f) { - const upb_msgdef *entry = upb_fielddef_msgsubdef(f); - const upb_fielddef *key_f = upb_msgdef_itof(entry, 1); - const upb_fielddef *val_f = upb_msgdef_itof(entry, 2); - size_t iter = UPB_MAP_BEGIN; - - while (upb_mapiter_next(map, &iter)) { - upb_msgval key = upb_mapiter_key(map, iter); - upb_msgval val = upb_mapiter_value(map, iter); - - txtenc_indent(e); - txtenc_printf(e, "%s: {", upb_fielddef_name(f)); - txtenc_endfield(e); - e->indent_depth++; - - txtenc_field(e, key, key_f); - txtenc_field(e, val, val_f); - - e->indent_depth--; - txtenc_indent(e); - txtenc_putstr(e, "}"); - txtenc_endfield(e); + if (e->options & UPB_TXTENC_NOSORT) { + size_t iter = UPB_MAP_BEGIN; + while (upb_mapiter_next(map, &iter)) { + upb_msgval key = upb_mapiter_key(map, iter); + upb_msgval val = upb_mapiter_value(map, iter); + txtenc_mapentry(e, key, val, f); + } + } else { + const upb_msgdef *entry = upb_fielddef_msgsubdef(f); + const upb_fielddef *key_f = upb_msgdef_field(entry, 0); + _upb_sortedmap sorted; + upb_map_entry ent; + + _upb_mapsorter_pushmap(&e->sorter, upb_fielddef_descriptortype(key_f), map, + &sorted); + while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) { + upb_msgval key, val; + memcpy(&key, &ent.k, sizeof(key)); + memcpy(&val, &ent.v, sizeof(val)); + txtenc_mapentry(e, key, val, f); + } + _upb_mapsorter_popmap(&e->sorter, &sorted); } } @@ -392,7 +413,9 @@ size_t upb_text_encode(const upb_msg *msg, const upb_msgdef *m, e.indent_depth = 0; e.options = options; e.ext_pool = ext_pool; + _upb_mapsorter_init(&e.sorter); txtenc_msg(&e, msg, m); + _upb_mapsorter_destroy(&e.sorter); return txtenc_nullz(&e, size); } diff --git a/upb/text_encode.h b/upb/text_encode.h index 1a0003cb31..4ad3d1c4d7 100644 --- a/upb/text_encode.h +++ b/upb/text_encode.h @@ -13,7 +13,10 @@ enum { UPB_TXTENC_SINGLELINE = 1, /* When set, unknown fields are not printed. */ - UPB_TXTENC_SKIPUNKNOWN = 2 + UPB_TXTENC_SKIPUNKNOWN = 2, + + /* When set, maps are *not* sorted (this avoids allocating tmp mem). */ + UPB_TXTENC_NOSORT = 4 }; /* Encodes the given |msg| to text format. The message's reflection is given in diff --git a/upb/upb.h b/upb/upb.h index 7e7309dc45..11c0e4dc99 100644 --- a/upb/upb.h +++ b/upb/upb.h @@ -324,6 +324,10 @@ UPB_INLINE int _upb_lg2ceil(int x) { #endif } +UPB_INLINE int _upb_lg2ceilsize(int x) { + return 1 << _upb_lg2ceil(x); +} + #include "upb/port_undef.inc" #ifdef __cplusplus