From a04627abc8b364a3a141204686673054e1d88dca Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 25 Nov 2020 16:15:01 -0800 Subject: [PATCH] Added map sorting to binary and text encoders. For the binary encoder, sorting is off by default. For the text encoder, sorting is on by default. Both defaults can be explicitly overridden. This grows code size a bit. I think we could potentially shave this (and other map-related code size) by having the generated code inject a function pointer to the map-related parsing/serialization code if maps are present. FILE SIZE VM SIZE -------------- -------------- +86% +1.07Ki +71% +768 upb/msg.c [NEW] +391 [NEW] +344 _upb_mapsorter_pushmap [NEW] +158 [NEW] +112 _upb_mapsorter_cmpstr [NEW] +111 [NEW] +64 _upb_mapsorter_cmpbool [NEW] +110 [NEW] +64 _upb_mapsorter_cmpi32 [NEW] +110 [NEW] +64 _upb_mapsorter_cmpi64 [NEW] +110 [NEW] +64 _upb_mapsorter_cmpu32 [NEW] +110 [NEW] +64 _upb_mapsorter_cmpu64 -3.6% -8 -4.3% -8 _upb_map_new +9.5% +464 +9.2% +424 upb/text_encode.c [NEW] +656 [NEW] +616 txtenc_mapentry +15% +32 +20% +32 upb_text_encode -20.1% -224 -20.7% -224 txtenc_msg +5.7% +342 +5.3% +296 upb/encode.c [NEW] +344 [NEW] +304 encode_mapentry [NEW] +246 [NEW] +208 upb_encode_ex [NEW] +41 [NEW] +16 upb_encode_ex.ch +0.7% +8 +0.7% +8 encode_scalar -1.0% -32 -1.0% -32 encode_message [DEL] -38 [DEL] -16 upb_encode.ch [DEL] -227 [DEL] -192 upb_encode +2.0% +152 +2.2% +152 upb/decode.c +44% +128 +44% +128 [section .rodata] +3.4% +24 +3.4% +24 _GLOBAL_OFFSET_TABLE_ +0.6% +107 +0.3% +48 upb/def.c [NEW] +100 [NEW] +48 upb_fielddef_descriptortype +7.1% +7 [ = ] 0 upb_fielddef_defaultint32 +2.9% +24 +2.9% +24 [section .dynsym] +1.2% +24 [ = ] 0 [section .symtab] +3.2% +16 +3.2% +16 [section .plt] [NEW] +16 [NEW] +16 memcmp@plt +0.5% +16 +0.6% +16 tests/conformance_upb.c +1.5% +16 +1.6% +16 DoTestIo +0.1% +16 +0.1% +16 upb/json_decode.c +0.4% +16 +0.4% +16 jsondec_wellknown +3.0% +8 +3.0% +8 [section .got.plt] +3.0% +8 +3.0% +8 _GLOBAL_OFFSET_TABLE_ +1.6% +7 +1.6% +7 [section .dynstr] +1.8% +4 +1.8% +4 [section .hash] +0.5% +3 +0.5% +3 [LOAD #2 [RX]] +2.8% +2 +2.8% +2 [section .gnu.version] -60.0% -1.74Ki [ = ] 0 [Unmapped] +0.3% +496 +1.4% +1.74Ki TOTAL --- benchmarks/compare.py | 2 +- tests/bindings/lua/test_upb.lua | 63 +++++++++++++++++ upb/bindings/lua/def.c | 32 ++++++++- upb/bindings/lua/msg.c | 109 +++++++++++++++++++++++++----- upb/bindings/lua/upb.c | 35 ++++++++-- upb/decode.h | 2 + upb/encode.c | 91 +++++++++++++++---------- upb/encode.h | 24 ++++++- upb/msg.c | 115 ++++++++++++++++++++++++++++++++ upb/msg.h | 49 ++++++++++++++ upb/table.int.h | 9 ++- upb/text_encode.c | 65 ++++++++++++------ upb/text_encode.h | 5 +- upb/upb.h | 4 ++ 14 files changed, 521 insertions(+), 84 deletions(-) diff --git a/benchmarks/compare.py b/benchmarks/compare.py index 9bdf787138..8d62d94272 100755 --- a/benchmarks/compare.py +++ b/benchmarks/compare.py @@ -58,7 +58,7 @@ def Benchmark(outbase, bench_cpu=True, runs=12, fasttable=False): baseline = "master" -bench_cpu = True +bench_cpu = False fasttable = False if len(sys.argv) > 1: diff --git a/tests/bindings/lua/test_upb.lua b/tests/bindings/lua/test_upb.lua index 84178aa79c..0f4a3d2725 100644 --- a/tests/bindings/lua/test_upb.lua +++ b/tests/bindings/lua/test_upb.lua @@ -91,6 +91,69 @@ function test_msg_map() assert_equal(12, msg2.map_int32_int32[6]) end +function test_map_sorting() + function msg_with_int32_entries(start, expand) + local msg = test_messages_proto3.TestAllTypesProto3() + for i=start,start + 8 do + msg.map_int32_int32[i] = i * 2 + end + + if expand then + for i=start+20,200 do + msg.map_int32_int32[i] = i + end + for i=start+20,200 do + msg.map_int32_int32[i] = nil + end + end + return msg + end + + function msg_with_msg_entries(expand) + local msg = test_messages_proto3.TestAllTypesProto3() + -- 8! = 40320 possible orderings makes it overwhelmingly likely that two + -- random orderings will be different. + for i=1,8 do + local submsg = test_messages_proto3.TestAllTypesProto3.NestedMessage() + submsg.corecursive = msg_with_int32_entries(i, expand) + msg.map_string_nested_message[tostring(i)] = submsg + end + + expand = false + if expand then + for i=21,2000 do + local submsg = test_messages_proto3.TestAllTypesProto3.NestedMessage() + submsg.corecursive = msg_with_int32_entries(i, expand) + msg.map_string_nested_message[tostring(i)] = submsg + end + for i=21,2000 do + msg.map_string_nested_message[tostring(i)] = nil + end + end + return msg + end + + -- Create two messages with the same contents but (hopefully) different + -- map table orderings. + local msg = msg_with_msg_entries(false) + local msg2 = msg_with_msg_entries(true) + + local text1 = upb.text_encode(msg) + local text2 = upb.text_encode(msg2) + assert_equal(text1, text2) + + local binary1 = upb.encode(msg, {upb.ENCODE_DETERMINISTIC}) + local binary2 = upb.encode(msg2, {upb.ENCODE_DETERMINISTIC}) + assert_equal(binary1, binary2) + + -- Non-sorted map should compare different. + local text3 = upb.text_encode(msg, {upb.TXTENC_NOSORT}) + assert_not_equal(text1, text3) + + local binary3 = upb.encode(msg) + assert_not_equal(binary1, binary3) +end + function test_utf8() local proto2_msg = test_messages_proto2.TestAllTypesProto2() proto2_msg.optional_string = "\xff" diff --git a/upb/bindings/lua/def.c b/upb/bindings/lua/def.c index 3bf1d9654f..ca277c8354 100644 --- a/upb/bindings/lua/def.c +++ b/upb/bindings/lua/def.c @@ -53,12 +53,12 @@ static void lupb_wrapper_pushwrapper(lua_State *L, int narg, const void *def, /* lupb_msgdef_pushsubmsgdef() * * Pops the msgdef wrapper at the top of the stack and replaces it with a msgdef - * wrapper for field |f| of this msgdef. + * wrapper for field |f| of this msgdef (submsg may not be direct, for example it + * may be the submessage of the map value). */ void lupb_msgdef_pushsubmsgdef(lua_State *L, const upb_fielddef *f) { const upb_msgdef *m = upb_fielddef_msgsubdef(f); assert(m); - assert(upb_fielddef_containingtype(f) == lupb_msgdef_check(L, -1)); lupb_wrapper_pushwrapper(L, -1, m, LUPB_MSGDEF); lua_replace(L, -2); /* Replace msgdef with submsgdef. */ } @@ -337,6 +337,26 @@ static int lupb_msgdef_oneofcount(lua_State *L) { return 1; } +static bool lupb_msgdef_pushnested(lua_State *L, int msgdef, int name) { + const upb_msgdef *m = lupb_msgdef_check(L, msgdef); + lupb_wrapper_pushsymtab(L, msgdef); + upb_symtab *symtab = lupb_symtab_check(L, -1); + lua_pop(L, 1); + + /* Construct full package.Message.SubMessage name. */ + lua_pushstring(L, upb_msgdef_fullname(m)); + lua_pushstring(L, "."); + lua_pushvalue(L, name); + lua_concat(L, 3); + const char *nested_name = lua_tostring(L, -1); + + /* Try lookup. */ + const upb_msgdef *nested = upb_symtab_lookupmsg(symtab, nested_name); + if (!nested) return false; + lupb_wrapper_pushwrapper(L, msgdef, nested, LUPB_MSGDEF); + return true; +} + /* lupb_msgdef_field() * * Handles: @@ -430,6 +450,13 @@ static int lupb_msgdef_fullname(lua_State *L) { return 1; } +static int lupb_msgdef_index(lua_State *L) { + if (!lupb_msgdef_pushnested(L, 1, 2)) { + luaL_error(L, "No such nested message"); + } + return 1; +} + static int lupb_msgoneofiter_next(lua_State *L) { const upb_msgdef *m = lupb_msgdef_check(L, lua_upvalueindex(1)); int *index = lua_touserdata(L, lua_upvalueindex(2)); @@ -471,6 +498,7 @@ static int lupb_msgdef_tostring(lua_State *L) { static const struct luaL_Reg lupb_msgdef_mm[] = { {"__call", lupb_msg_pushnew}, + {"__index", lupb_msgdef_index}, {"__len", lupb_msgdef_fieldcount}, {"__tostring", lupb_msgdef_tostring}, {NULL, NULL} diff --git a/upb/bindings/lua/msg.c b/upb/bindings/lua/msg.c index 6a2ecb36c7..0c4c35f2c4 100644 --- a/upb/bindings/lua/msg.c +++ b/upb/bindings/lua/msg.c @@ -187,6 +187,13 @@ static void lupb_arena_fuse(lua_State *L, int to, int from) { upb_arena_fuse(to_arena, from_arena); } +static void lupb_arena_fuseobjs(lua_State *L, int to, int from) { + lua_getiuservalue(L, to, LUPB_ARENA_INDEX); + lua_getiuservalue(L, from, LUPB_ARENA_INDEX); + lupb_arena_fuse(L, lua_absindex(L, -2), lua_absindex(L, -1)); + lua_pop(L, 2); +} + static int lupb_arena_gc(lua_State *L) { upb_arena *a = lupb_arena_check(L, 1); upb_arena_free(a); @@ -398,6 +405,10 @@ static int lupb_array_newindex(lua_State *L) { upb_array_set(larray->arr, n, msgval); } + if (larray->type == UPB_TYPE_MESSAGE) { + lupb_arena_fuseobjs(L, 1, 3); + } + return 0; /* 1 for chained assignments? */ } @@ -535,6 +546,9 @@ static int lupb_map_newindex(lua_State *L) { } else { upb_msgval val = lupb_tomsgval(L, lmap->value_type, 3, 1, LUPB_COPY); upb_map_set(map, key, val, lupb_arenaget(L, 1)); + if (lmap->value_type == UPB_TYPE_MESSAGE) { + lupb_arena_fuseobjs(L, 1, 3); + } } return 0; @@ -600,21 +614,26 @@ static upb_msg *lupb_msg_check(lua_State *L, int narg) { return msg->msg; } -static const upb_fielddef *lupb_msg_checkfield(lua_State *L, int msg, - int field) { +static const upb_msgdef *lupb_msg_getmsgdef(lua_State *L, int msg) { + lua_getiuservalue(L, msg, LUPB_MSGDEF_INDEX); + const upb_msgdef *m = lupb_msgdef_check(L, -1); + lua_pop(L, 1); + return m; +} + +static const upb_fielddef *lupb_msg_tofield(lua_State *L, int msg, int field) { size_t len; const char *fieldname = luaL_checklstring(L, field, &len); - const upb_msgdef *m; - const upb_fielddef *f; + const upb_msgdef *m = lupb_msg_getmsgdef(L, msg); + return upb_msgdef_ntof(m, fieldname, len); +} - lua_getiuservalue(L, msg, LUPB_MSGDEF_INDEX); - m = lupb_msgdef_check(L, -1); - f = upb_msgdef_ntof(m, fieldname, len); +static const upb_fielddef *lupb_msg_checkfield(lua_State *L, int msg, + int field) { + const upb_fielddef *f = lupb_msg_tofield(L, msg, field); if (f == NULL) { - luaL_error(L, "no such field '%s'", fieldname); + luaL_error(L, "no such field '%s'", lua_tostring(L, field)); } - lua_pop(L, 1); - return f; } @@ -813,10 +832,7 @@ static int lupb_msg_newindex(lua_State *L) { } if (merge_arenas) { - lua_getiuservalue(L, 1, LUPB_ARENA_INDEX); - lua_getiuservalue(L, 3, LUPB_ARENA_INDEX); - lupb_arena_fuse(L, lua_absindex(L, -2), lua_absindex(L, -1)); - lua_pop(L, 2); + lupb_arena_fuseobjs(L, 1, 3); } upb_msg_set(msg, f, msgval, lupb_arenaget(L, 1)); @@ -907,6 +923,46 @@ static int lupb_decode(lua_State *L) { return 1; } +/** + * lupb_msg_textencode() + * + * Handles: + * text_string = upb.text_encode(msg, {upb.TXTENC_SINGLELINE}) + */ +static int lupb_textencode(lua_State *L) { + int argcount = lua_gettop(L); + upb_msg *msg = lupb_msg_check(L, 1); + const upb_msgdef *m; + char buf[1024]; + size_t size; + int options = 0; + + lua_getiuservalue(L, 1, LUPB_MSGDEF_INDEX); + m = lupb_msgdef_check(L, -1); + + if (argcount > 1) { + size_t len = lua_rawlen(L, 2); + for (size_t i = 1; i <= len; i++) { + lua_rawgeti(L, 2, i); + options |= lupb_checkuint32(L, -1); + lua_pop(L, 1); + } + } + + size = upb_text_encode(msg, m, NULL, options, buf, sizeof(buf)); + + if (size < sizeof(buf)) { + lua_pushlstring(L, buf, size); + } else { + char *ptr = malloc(size + 1); + upb_text_encode(msg, m, NULL, options, ptr, size + 1); + lua_pushlstring(L, ptr, size); + free(ptr); + } + + return 1; +} + /** * lupb_encode() * @@ -914,17 +970,28 @@ static int lupb_decode(lua_State *L) { * bin_string = upb.encode(msg) */ static int lupb_encode(lua_State *L) { + int argcount = lua_gettop(L); const upb_msg *msg = lupb_msg_check(L, 1); const upb_msglayout *layout; upb_arena *arena = lupb_arena_pushnew(L); size_t size; char *result; + int options = 0; + + if (argcount > 1) { + size_t len = lua_rawlen(L, 2); + for (size_t i = 1; i <= len; i++) { + lua_rawgeti(L, 2, i); + options |= lupb_checkuint32(L, -1); + lua_pop(L, 1); + } + } lua_getiuservalue(L, 1, LUPB_MSGDEF_INDEX); layout = upb_msgdef_layout(lupb_msgdef_check(L, -1)); lua_pop(L, 1); - result = upb_encode(msg, (const void*)layout, arena, &size); + result = upb_encode_ex(msg, (const void*)layout, options, arena, &size); if (!result) { lua_pushstring(L, "Error encoding protobuf."); @@ -936,11 +1003,17 @@ static int lupb_encode(lua_State *L) { return 1; } +static void lupb_setfieldi(lua_State *L, const char *field, int i) { + lua_pushinteger(L, i); + lua_setfield(L, -2, field); +} + static const struct luaL_Reg lupb_msg_toplevel_m[] = { {"Array", lupb_array_new}, {"Map", lupb_map_new}, {"decode", lupb_decode}, {"encode", lupb_encode}, + {"text_encode", lupb_textencode}, {NULL, NULL} }; @@ -952,5 +1025,11 @@ void lupb_msg_registertypes(lua_State *L) { lupb_register_type(L, LUPB_MAP, NULL, lupb_map_mm); lupb_register_type(L, LUPB_MSG, NULL, lupb_msg_mm); + lupb_setfieldi(L, "TXTENC_SINGLELINE", UPB_TXTENC_SINGLELINE); + lupb_setfieldi(L, "TXTENC_SKIPUNKNOWN", UPB_TXTENC_SKIPUNKNOWN); + lupb_setfieldi(L, "TXTENC_NOSORT", UPB_TXTENC_NOSORT); + + lupb_setfieldi(L, "ENCODE_DETERMINISTIC", UPB_ENCODE_DETERMINISTIC); + lupb_cacheinit(L); } diff --git a/upb/bindings/lua/upb.c b/upb/bindings/lua/upb.c index 3630ae2ef3..c247c03746 100644 --- a/upb/bindings/lua/upb.c +++ b/upb/bindings/lua/upb.c @@ -84,6 +84,24 @@ int lua_getiuservalue(lua_State *L, int index, int n) { } #endif +/* We use this function as the __index metamethod when a type has both methods + * and an __index metamethod. */ +int lupb_indexmm(lua_State *L) { + /* Look up in __index table (which is a closure param). */ + lua_pushvalue(L, 2); + lua_rawget(L, lua_upvalueindex(1)); + if (!lua_isnil(L, -1)) { + return 1; + } + + /* Not found, chain to user __index metamethod. */ + lua_pushvalue(L, lua_upvalueindex(2)); + lua_pushvalue(L, 1); + lua_pushvalue(L, 2); + lua_call(L, 2, 1); + return 1; +} + void lupb_register_type(lua_State *L, const char *name, const luaL_Reg *m, const luaL_Reg *mm) { luaL_newmetatable(L, name); @@ -93,14 +111,17 @@ void lupb_register_type(lua_State *L, const char *name, const luaL_Reg *m, } if (m) { - /* Methods go in the mt's __index method. This implies that you can' - * implement __index and also have methods. */ - lua_getfield(L, -1, "__index"); - lupb_assert(L, lua_isnil(L, -1)); - lua_pop(L, 1); - - lua_createtable(L, 0, 0); + lua_createtable(L, 0, 0); /* __index table */ lupb_setfuncs(L, m); + + /* Methods go in the mt's __index slot. If the user also specified an + * __index metamethod, use our custom lupb_indexmm() that can check both. */ + lua_getfield(L, -2, "__index"); + if (lua_isnil(L, -1)) { + lua_pop(L, 1); + } else { + lua_pushcclosure(L, &lupb_indexmm, 2); + } lua_setfield(L, -2, "__index"); } diff --git a/upb/decode.h b/upb/decode.h index 00419ab373..eff4b8bd3d 100644 --- a/upb/decode.h +++ b/upb/decode.h @@ -15,6 +15,8 @@ extern "C" { #endif enum { + /* If set, strings will alias the input buffer instead of copying into the + * arena. */ UPB_DECODE_ALIAS = 1, }; diff --git a/upb/encode.c b/upb/encode.c index 6d36619d62..c1fe02decb 100644 --- a/upb/encode.c +++ b/upb/encode.c @@ -32,6 +32,8 @@ typedef struct { jmp_buf err; upb_alloc *alloc; char *buf, *ptr, *limit; + int options; + _upb_mapsorter sorter; } upb_encstate; static size_t upb_roundup_pow2(size_t bytes) { @@ -341,31 +343,48 @@ static void encode_array(upb_encstate *e, const char *field_mem, } } +static void encode_mapentry(upb_encstate *e, uint32_t number, + const upb_msglayout *layout, + const upb_map_entry *ent) { + const upb_msglayout_field *key_field = &layout->fields[0]; + const upb_msglayout_field *val_field = &layout->fields[1]; + size_t pre_len = e->limit - e->ptr; + size_t size; + encode_scalar(e, &ent->v, layout, val_field, false); + encode_scalar(e, &ent->k, layout, key_field, false); + size = (e->limit - e->ptr) - pre_len; + encode_varint(e, size); + encode_tag(e, number, UPB_WIRE_TYPE_DELIMITED); +} + static void encode_map(upb_encstate *e, const char *field_mem, const upb_msglayout *m, const upb_msglayout_field *f) { const upb_map *map = *(const upb_map**)field_mem; - const upb_msglayout *entry = m->submsgs[f->submsg_index]; - const upb_msglayout_field *key_field = &entry->fields[0]; - const upb_msglayout_field *val_field = &entry->fields[1]; - upb_strtable_iter i; - if (map == NULL) { - return; - } + const upb_msglayout *layout = m->submsgs[f->submsg_index]; + UPB_ASSERT(layout->field_count == 2); - upb_strtable_begin(&i, &map->table); - for(; !upb_strtable_done(&i); upb_strtable_next(&i)) { - size_t pre_len = e->limit - e->ptr; - size_t size; - upb_strview key = upb_strtable_iter_key(&i); - const upb_value val = upb_strtable_iter_value(&i); + if (map == NULL) return; + + if (e->options & UPB_ENCODE_DETERMINISTIC) { + _upb_sortedmap sorted; + _upb_mapsorter_pushmap(&e->sorter, layout->fields[0].descriptortype, map, + &sorted); upb_map_entry ent; - _upb_map_fromkey(key, &ent.k, map->key_size); - _upb_map_fromvalue(val, &ent.v, map->val_size); - encode_scalar(e, &ent.v, entry, val_field, false); - encode_scalar(e, &ent.k, entry, key_field, false); - size = (e->limit - e->ptr) - pre_len; - encode_varint(e, size); - encode_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED); + while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) { + encode_mapentry(e, f->number, layout, &ent); + } + _upb_mapsorter_popmap(&e->sorter, &sorted); + } else { + upb_strtable_iter i; + upb_strtable_begin(&i, &map->table); + for(; !upb_strtable_done(&i); upb_strtable_next(&i)) { + upb_strview key = upb_strtable_iter_key(&i); + const upb_value val = upb_strtable_iter_value(&i); + upb_map_entry ent; + _upb_map_fromkey(key, &ent.k, map->key_size); + _upb_map_fromvalue(val, &ent.v, map->val_size); + encode_mapentry(e, f->number, layout, &ent); + } } } @@ -414,28 +433,32 @@ static void encode_message(upb_encstate *e, const char *msg, *size = (e->limit - e->ptr) - pre_len; } -char *upb_encode(const void *msg, const upb_msglayout *m, upb_arena *arena, - size_t *size) { +char *upb_encode_ex(const void *msg, const upb_msglayout *m, int options, + upb_arena *arena, size_t *size) { upb_encstate e; e.alloc = upb_arena_alloc(arena); e.buf = NULL; e.limit = NULL; e.ptr = NULL; + e.options = options; + _upb_mapsorter_init(&e.sorter); + char *ret = NULL; if (UPB_SETJMP(e.err)) { *size = 0; - return NULL; - } - - encode_message(&e, msg, m, size); - - *size = e.limit - e.ptr; - - if (*size == 0) { - static char ch; - return &ch; + ret = NULL; } else { - UPB_ASSERT(e.ptr); - return e.ptr; + encode_message(&e, msg, m, size); + *size = e.limit - e.ptr; + if (*size == 0) { + static char ch; + ret = &ch; + } else { + UPB_ASSERT(e.ptr); + ret = e.ptr; + } } + + _upb_mapsorter_destroy(&e.sorter); + return ret; } diff --git a/upb/encode.h b/upb/encode.h index 6842777058..8ff51ab626 100644 --- a/upb/encode.h +++ b/upb/encode.h @@ -7,12 +7,32 @@ #include "upb/msg.h" +/* Must be last. */ +#include "upb/port_def.inc" + #ifdef __cplusplus extern "C" { #endif -char *upb_encode(const void *msg, const upb_msglayout *l, upb_arena *arena, - size_t *size); +enum { + /* If set, the results of serializing will be deterministic across all + * instances of this binary. There are no guarantees across different + * binary builds. + * + * If your proto contains maps, the encoder will need to malloc()/free() + * memory during encode. */ + UPB_ENCODE_DETERMINISTIC = 1, +}; + +char *upb_encode_ex(const void *msg, const upb_msglayout *l, int options, + upb_arena *arena, size_t *size); + +UPB_INLINE char *upb_encode(const void *msg, const upb_msglayout *l, + upb_arena *arena, size_t *size) { + return upb_encode_ex(msg, l, 0, arena, size); +} + +#include "upb/port_undef.inc" #ifdef __cplusplus } /* extern "C" */ diff --git a/upb/msg.c b/upb/msg.c index 1dbb8b0fab..876a06d6fa 100644 --- a/upb/msg.c +++ b/upb/msg.c @@ -139,3 +139,118 @@ upb_map *_upb_map_new(upb_arena *a, size_t key_size, size_t value_size) { return map; } + +static void _upb_mapsorter_getkeys(const void *_a, const void *_b, void *a_key, + void *b_key, size_t size) { + const upb_tabent *const*a = _a; + const upb_tabent *const*b = _b; + upb_strview a_tabkey = upb_tabstrview((*a)->key); + upb_strview b_tabkey = upb_tabstrview((*b)->key); + _upb_map_fromkey(a_tabkey, a_key, size); + _upb_map_fromkey(b_tabkey, b_key, size); +} + +static int _upb_mapsorter_cmpi64(const void *_a, const void *_b) { + int64_t a, b; + _upb_mapsorter_getkeys(_a, _b, &a, &b, 8); + return a - b; +} + +static int _upb_mapsorter_cmpu64(const void *_a, const void *_b) { + uint64_t a, b; + _upb_mapsorter_getkeys(_a, _b, &a, &b, 8); + return a - b; +} + +static int _upb_mapsorter_cmpi32(const void *_a, const void *_b) { + int32_t a, b; + _upb_mapsorter_getkeys(_a, _b, &a, &b, 4); + return a - b; +} + +static int _upb_mapsorter_cmpu32(const void *_a, const void *_b) { + uint32_t a, b; + _upb_mapsorter_getkeys(_a, _b, &a, &b, 4); + return a - b; +} + +static int _upb_mapsorter_cmpbool(const void *_a, const void *_b) { + bool a, b; + _upb_mapsorter_getkeys(_a, _b, &a, &b, 1); + return a - b; +} + +static int _upb_mapsorter_cmpstr(const void *_a, const void *_b) { + upb_strview a, b; + _upb_mapsorter_getkeys(_a, _b, &a, &b, UPB_MAPTYPE_STRING); + size_t common_size = UPB_MIN(a.size, b.size); + int cmp = memcmp(a.data, b.data, common_size); + if (cmp) return cmp; + return a.size - b.size; +} + +bool _upb_mapsorter_pushmap(_upb_mapsorter *s, upb_descriptortype_t key_type, + const upb_map *map, _upb_sortedmap *sorted) { + int map_size = _upb_map_size(map); + sorted->start = s->size; + sorted->pos = sorted->start; + sorted->end = sorted->start + map_size; + + /* Grow s->entries if necessary. */ + if (sorted->end > s->cap) { + s->cap = _upb_lg2ceilsize(sorted->end); + s->entries = realloc(s->entries, s->cap * sizeof(*s->entries)); + if (!s->entries) return false; + } + + s->size = sorted->end; + + /* Copy non-empty entries from the table to s->entries. */ + upb_tabent const**dst = &s->entries[sorted->start]; + const upb_tabent *src = map->table.t.entries; + const upb_tabent *end = src + upb_table_size(&map->table.t); + for (; src < end; src++) { + if (!upb_tabent_isempty(src)) { + *dst = src; + dst++; + } + } + UPB_ASSERT(dst == &s->entries[sorted->end]); + + /* Sort entries according to the key type. */ + + int (*compar)(const void *, const void *); + + switch (key_type) { + case UPB_DESCRIPTOR_TYPE_INT64: + case UPB_DESCRIPTOR_TYPE_SFIXED64: + case UPB_DESCRIPTOR_TYPE_SINT64: + compar = _upb_mapsorter_cmpi64; + break; + case UPB_DESCRIPTOR_TYPE_UINT64: + case UPB_DESCRIPTOR_TYPE_FIXED64: + compar = _upb_mapsorter_cmpu64; + break; + case UPB_DESCRIPTOR_TYPE_INT32: + case UPB_DESCRIPTOR_TYPE_SINT32: + case UPB_DESCRIPTOR_TYPE_SFIXED32: + case UPB_DESCRIPTOR_TYPE_ENUM: + compar = _upb_mapsorter_cmpi32; + break; + case UPB_DESCRIPTOR_TYPE_UINT32: + case UPB_DESCRIPTOR_TYPE_FIXED32: + compar = _upb_mapsorter_cmpu32; + break; + case UPB_DESCRIPTOR_TYPE_BOOL: + compar = _upb_mapsorter_cmpbool; + break; + case UPB_DESCRIPTOR_TYPE_STRING: + compar = _upb_mapsorter_cmpstr; + break; + default: + UPB_UNREACHABLE(); + } + + qsort(&s->entries[sorted->start], map_size, sizeof(*s->entries), compar); + return true; +} diff --git a/upb/msg.h b/upb/msg.h index 36c57b32cd..9b4557ac18 100644 --- a/upb/msg.h +++ b/upb/msg.h @@ -9,11 +9,13 @@ #define UPB_MSG_H_ #include +#include #include #include "upb/table.int.h" #include "upb/upb.h" +/* Must be last. */ #include "upb/port_def.inc" #ifdef __cplusplus @@ -553,6 +555,53 @@ UPB_INLINE void _upb_msg_map_set_value(void* msg, const void* val, size_t size) } } +/** _upb_mapsorter *************************************************************/ + +/* _upb_mapsorter sorts maps and provides ordered iteration over the entries. + * Since maps can be recursive (map values can be messages which contain other maps). + * _upb_mapsorter can contain a stack of maps. */ + +typedef struct { + upb_tabent const**entries; + int size; + int cap; +} _upb_mapsorter; + +typedef struct { + int start; + int pos; + int end; +} _upb_sortedmap; + +UPB_INLINE void _upb_mapsorter_init(_upb_mapsorter *s) { + s->entries = NULL; + s->size = 0; + s->cap = 0; +} + +UPB_INLINE void _upb_mapsorter_destroy(_upb_mapsorter *s) { + if (s->entries) free(s->entries); +} + +bool _upb_mapsorter_pushmap(_upb_mapsorter *s, upb_descriptortype_t key_type, + const upb_map *map, _upb_sortedmap *sorted); + +UPB_INLINE void _upb_mapsorter_popmap(_upb_mapsorter *s, _upb_sortedmap *sorted) { + s->size = sorted->start; +} + +UPB_INLINE bool _upb_sortedmap_next(_upb_mapsorter *s, const upb_map *map, + _upb_sortedmap *sorted, + upb_map_entry *ent) { + if (sorted->pos == sorted->end) return false; + const upb_tabent *tabent = s->entries[sorted->pos++]; + upb_strview key = upb_tabstrview(tabent->key); + _upb_map_fromkey(key, &ent->k, map->key_size); + upb_value val = {tabent->val.val}; + _upb_map_fromvalue(val, &ent->v, map->val_size); + return true; +} + #undef PTR_AT #ifdef __cplusplus diff --git a/upb/table.int.h b/upb/table.int.h index ab67cc34d6..49caac463e 100644 --- a/upb/table.int.h +++ b/upb/table.int.h @@ -147,10 +147,17 @@ UPB_INLINE char *upb_tabstr(upb_tabkey key, uint32_t *len) { return mem + sizeof(*len); } +UPB_INLINE upb_strview upb_tabstrview(upb_tabkey key) { + upb_strview ret; + uint32_t len; + ret.data = upb_tabstr(key, &len); + ret.size = len; + return ret; +} /* upb_tabval *****************************************************************/ -typedef struct { +typedef struct upb_tabval { uint64_t val; } upb_tabval; diff --git a/upb/text_encode.c b/upb/text_encode.c index 3e9630d8cd..028cc29e09 100644 --- a/upb/text_encode.c +++ b/upb/text_encode.c @@ -17,6 +17,7 @@ typedef struct { int indent_depth; int options; const upb_symtab *ext_pool; + _upb_mapsorter sorter; } txtenc; static void txtenc_msg(txtenc *e, const upb_msg *msg, const upb_msgdef *m); @@ -187,6 +188,25 @@ static void txtenc_array(txtenc *e, const upb_array *arr, } } +static void txtenc_mapentry(txtenc *e, upb_msgval key, upb_msgval val, + const upb_fielddef *f) { + const upb_msgdef *entry = upb_fielddef_msgsubdef(f); + const upb_fielddef *key_f = upb_msgdef_field(entry, 0); + const upb_fielddef *val_f = upb_msgdef_field(entry, 1); + txtenc_indent(e); + txtenc_printf(e, "%s: {", upb_fielddef_name(f)); + txtenc_endfield(e); + e->indent_depth++; + + txtenc_field(e, key, key_f); + txtenc_field(e, val, val_f); + + e->indent_depth--; + txtenc_indent(e); + txtenc_putstr(e, "}"); + txtenc_endfield(e); +} + /* * Maps print as messages of key/value, etc. * @@ -200,27 +220,28 @@ static void txtenc_array(txtenc *e, const upb_array *arr, * } */ static void txtenc_map(txtenc *e, const upb_map *map, const upb_fielddef *f) { - const upb_msgdef *entry = upb_fielddef_msgsubdef(f); - const upb_fielddef *key_f = upb_msgdef_itof(entry, 1); - const upb_fielddef *val_f = upb_msgdef_itof(entry, 2); - size_t iter = UPB_MAP_BEGIN; - - while (upb_mapiter_next(map, &iter)) { - upb_msgval key = upb_mapiter_key(map, iter); - upb_msgval val = upb_mapiter_value(map, iter); - - txtenc_indent(e); - txtenc_printf(e, "%s: {", upb_fielddef_name(f)); - txtenc_endfield(e); - e->indent_depth++; - - txtenc_field(e, key, key_f); - txtenc_field(e, val, val_f); - - e->indent_depth--; - txtenc_indent(e); - txtenc_putstr(e, "}"); - txtenc_endfield(e); + if (e->options & UPB_TXTENC_NOSORT) { + size_t iter = UPB_MAP_BEGIN; + while (upb_mapiter_next(map, &iter)) { + upb_msgval key = upb_mapiter_key(map, iter); + upb_msgval val = upb_mapiter_value(map, iter); + txtenc_mapentry(e, key, val, f); + } + } else { + const upb_msgdef *entry = upb_fielddef_msgsubdef(f); + const upb_fielddef *key_f = upb_msgdef_field(entry, 0); + _upb_sortedmap sorted; + upb_map_entry ent; + + _upb_mapsorter_pushmap(&e->sorter, upb_fielddef_descriptortype(key_f), map, + &sorted); + while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) { + upb_msgval key, val; + memcpy(&key, &ent.k, sizeof(key)); + memcpy(&val, &ent.v, sizeof(val)); + txtenc_mapentry(e, key, val, f); + } + _upb_mapsorter_popmap(&e->sorter, &sorted); } } @@ -392,7 +413,9 @@ size_t upb_text_encode(const upb_msg *msg, const upb_msgdef *m, e.indent_depth = 0; e.options = options; e.ext_pool = ext_pool; + _upb_mapsorter_init(&e.sorter); txtenc_msg(&e, msg, m); + _upb_mapsorter_destroy(&e.sorter); return txtenc_nullz(&e, size); } diff --git a/upb/text_encode.h b/upb/text_encode.h index 1a0003cb31..4ad3d1c4d7 100644 --- a/upb/text_encode.h +++ b/upb/text_encode.h @@ -13,7 +13,10 @@ enum { UPB_TXTENC_SINGLELINE = 1, /* When set, unknown fields are not printed. */ - UPB_TXTENC_SKIPUNKNOWN = 2 + UPB_TXTENC_SKIPUNKNOWN = 2, + + /* When set, maps are *not* sorted (this avoids allocating tmp mem). */ + UPB_TXTENC_NOSORT = 4 }; /* Encodes the given |msg| to text format. The message's reflection is given in diff --git a/upb/upb.h b/upb/upb.h index 7e7309dc45..11c0e4dc99 100644 --- a/upb/upb.h +++ b/upb/upb.h @@ -324,6 +324,10 @@ UPB_INLINE int _upb_lg2ceil(int x) { #endif } +UPB_INLINE int _upb_lg2ceilsize(int x) { + return 1 << _upb_lg2ceil(x); +} + #include "upb/port_undef.inc" #ifdef __cplusplus