Added map sorting to binary and text encoders.

For the binary encoder, sorting is off by default.
For the text encoder, sorting is on by default.
Both defaults can be explicitly overridden.

This grows code size a bit. I think we could potentially
shave this (and other map-related code size) by having
the generated code inject a function pointer to the map-related
parsing/serialization code if maps are present.

    FILE SIZE        VM SIZE
 --------------  --------------
   +86% +1.07Ki   +71%    +768    upb/msg.c
    [NEW]    +391  [NEW]    +344    _upb_mapsorter_pushmap
    [NEW]    +158  [NEW]    +112    _upb_mapsorter_cmpstr
    [NEW]    +111  [NEW]     +64    _upb_mapsorter_cmpbool
    [NEW]    +110  [NEW]     +64    _upb_mapsorter_cmpi32
    [NEW]    +110  [NEW]     +64    _upb_mapsorter_cmpi64
    [NEW]    +110  [NEW]     +64    _upb_mapsorter_cmpu32
    [NEW]    +110  [NEW]     +64    _upb_mapsorter_cmpu64
    -3.6%      -8  -4.3%      -8    _upb_map_new
  +9.5%    +464  +9.2%    +424    upb/text_encode.c
    [NEW]    +656  [NEW]    +616    txtenc_mapentry
     +15%     +32   +20%     +32    upb_text_encode
   -20.1%    -224 -20.7%    -224    txtenc_msg
  +5.7%    +342  +5.3%    +296    upb/encode.c
    [NEW]    +344  [NEW]    +304    encode_mapentry
    [NEW]    +246  [NEW]    +208    upb_encode_ex
    [NEW]     +41  [NEW]     +16    upb_encode_ex.ch
    +0.7%      +8  +0.7%      +8    encode_scalar
    -1.0%     -32  -1.0%     -32    encode_message
    [DEL]     -38  [DEL]     -16    upb_encode.ch
    [DEL]    -227  [DEL]    -192    upb_encode
  +2.0%    +152  +2.2%    +152    upb/decode.c
     +44%    +128   +44%    +128    [section .rodata]
    +3.4%     +24  +3.4%     +24    _GLOBAL_OFFSET_TABLE_
  +0.6%    +107  +0.3%     +48    upb/def.c
    [NEW]    +100  [NEW]     +48    upb_fielddef_descriptortype
    +7.1%      +7  [ = ]       0    upb_fielddef_defaultint32
  +2.9%     +24  +2.9%     +24    [section .dynsym]
  +1.2%     +24  [ = ]       0    [section .symtab]
  +3.2%     +16  +3.2%     +16    [section .plt]
    [NEW]     +16  [NEW]     +16    memcmp@plt
  +0.5%     +16  +0.6%     +16    tests/conformance_upb.c
    +1.5%     +16  +1.6%     +16    DoTestIo
  +0.1%     +16  +0.1%     +16    upb/json_decode.c
    +0.4%     +16  +0.4%     +16    jsondec_wellknown
  +3.0%      +8  +3.0%      +8    [section .got.plt]
    +3.0%      +8  +3.0%      +8    _GLOBAL_OFFSET_TABLE_
  +1.6%      +7  +1.6%      +7    [section .dynstr]
  +1.8%      +4  +1.8%      +4    [section .hash]
  +0.5%      +3  +0.5%      +3    [LOAD #2 [RX]]
  +2.8%      +2  +2.8%      +2    [section .gnu.version]
 -60.0% -1.74Ki  [ = ]       0    [Unmapped]
  +0.3%    +496  +1.4% +1.74Ki    TOTAL
pull/13171/head
Joshua Haberman 4 years ago
parent 3e071ea91a
commit a04627abc8
  1. 2
      benchmarks/compare.py
  2. 63
      tests/bindings/lua/test_upb.lua
  3. 32
      upb/bindings/lua/def.c
  4. 109
      upb/bindings/lua/msg.c
  5. 35
      upb/bindings/lua/upb.c
  6. 2
      upb/decode.h
  7. 91
      upb/encode.c
  8. 24
      upb/encode.h
  9. 115
      upb/msg.c
  10. 49
      upb/msg.h
  11. 9
      upb/table.int.h
  12. 65
      upb/text_encode.c
  13. 5
      upb/text_encode.h
  14. 4
      upb/upb.h

@ -58,7 +58,7 @@ def Benchmark(outbase, bench_cpu=True, runs=12, fasttable=False):
baseline = "master"
bench_cpu = True
bench_cpu = False
fasttable = False
if len(sys.argv) > 1:

@ -91,6 +91,69 @@ function test_msg_map()
assert_equal(12, msg2.map_int32_int32[6])
end
function test_map_sorting()
function msg_with_int32_entries(start, expand)
local msg = test_messages_proto3.TestAllTypesProto3()
for i=start,start + 8 do
msg.map_int32_int32[i] = i * 2
end
if expand then
for i=start+20,200 do
msg.map_int32_int32[i] = i
end
for i=start+20,200 do
msg.map_int32_int32[i] = nil
end
end
return msg
end
function msg_with_msg_entries(expand)
local msg = test_messages_proto3.TestAllTypesProto3()
-- 8! = 40320 possible orderings makes it overwhelmingly likely that two
-- random orderings will be different.
for i=1,8 do
local submsg = test_messages_proto3.TestAllTypesProto3.NestedMessage()
submsg.corecursive = msg_with_int32_entries(i, expand)
msg.map_string_nested_message[tostring(i)] = submsg
end
expand = false
if expand then
for i=21,2000 do
local submsg = test_messages_proto3.TestAllTypesProto3.NestedMessage()
submsg.corecursive = msg_with_int32_entries(i, expand)
msg.map_string_nested_message[tostring(i)] = submsg
end
for i=21,2000 do
msg.map_string_nested_message[tostring(i)] = nil
end
end
return msg
end
-- Create two messages with the same contents but (hopefully) different
-- map table orderings.
local msg = msg_with_msg_entries(false)
local msg2 = msg_with_msg_entries(true)
local text1 = upb.text_encode(msg)
local text2 = upb.text_encode(msg2)
assert_equal(text1, text2)
local binary1 = upb.encode(msg, {upb.ENCODE_DETERMINISTIC})
local binary2 = upb.encode(msg2, {upb.ENCODE_DETERMINISTIC})
assert_equal(binary1, binary2)
-- Non-sorted map should compare different.
local text3 = upb.text_encode(msg, {upb.TXTENC_NOSORT})
assert_not_equal(text1, text3)
local binary3 = upb.encode(msg)
assert_not_equal(binary1, binary3)
end
function test_utf8()
local proto2_msg = test_messages_proto2.TestAllTypesProto2()
proto2_msg.optional_string = "\xff"

@ -53,12 +53,12 @@ static void lupb_wrapper_pushwrapper(lua_State *L, int narg, const void *def,
/* lupb_msgdef_pushsubmsgdef()
*
* Pops the msgdef wrapper at the top of the stack and replaces it with a msgdef
* wrapper for field |f| of this msgdef.
* wrapper for field |f| of this msgdef (submsg may not be direct, for example it
* may be the submessage of the map value).
*/
void lupb_msgdef_pushsubmsgdef(lua_State *L, const upb_fielddef *f) {
const upb_msgdef *m = upb_fielddef_msgsubdef(f);
assert(m);
assert(upb_fielddef_containingtype(f) == lupb_msgdef_check(L, -1));
lupb_wrapper_pushwrapper(L, -1, m, LUPB_MSGDEF);
lua_replace(L, -2); /* Replace msgdef with submsgdef. */
}
@ -337,6 +337,26 @@ static int lupb_msgdef_oneofcount(lua_State *L) {
return 1;
}
static bool lupb_msgdef_pushnested(lua_State *L, int msgdef, int name) {
const upb_msgdef *m = lupb_msgdef_check(L, msgdef);
lupb_wrapper_pushsymtab(L, msgdef);
upb_symtab *symtab = lupb_symtab_check(L, -1);
lua_pop(L, 1);
/* Construct full package.Message.SubMessage name. */
lua_pushstring(L, upb_msgdef_fullname(m));
lua_pushstring(L, ".");
lua_pushvalue(L, name);
lua_concat(L, 3);
const char *nested_name = lua_tostring(L, -1);
/* Try lookup. */
const upb_msgdef *nested = upb_symtab_lookupmsg(symtab, nested_name);
if (!nested) return false;
lupb_wrapper_pushwrapper(L, msgdef, nested, LUPB_MSGDEF);
return true;
}
/* lupb_msgdef_field()
*
* Handles:
@ -430,6 +450,13 @@ static int lupb_msgdef_fullname(lua_State *L) {
return 1;
}
static int lupb_msgdef_index(lua_State *L) {
if (!lupb_msgdef_pushnested(L, 1, 2)) {
luaL_error(L, "No such nested message");
}
return 1;
}
static int lupb_msgoneofiter_next(lua_State *L) {
const upb_msgdef *m = lupb_msgdef_check(L, lua_upvalueindex(1));
int *index = lua_touserdata(L, lua_upvalueindex(2));
@ -471,6 +498,7 @@ static int lupb_msgdef_tostring(lua_State *L) {
static const struct luaL_Reg lupb_msgdef_mm[] = {
{"__call", lupb_msg_pushnew},
{"__index", lupb_msgdef_index},
{"__len", lupb_msgdef_fieldcount},
{"__tostring", lupb_msgdef_tostring},
{NULL, NULL}

@ -187,6 +187,13 @@ static void lupb_arena_fuse(lua_State *L, int to, int from) {
upb_arena_fuse(to_arena, from_arena);
}
static void lupb_arena_fuseobjs(lua_State *L, int to, int from) {
lua_getiuservalue(L, to, LUPB_ARENA_INDEX);
lua_getiuservalue(L, from, LUPB_ARENA_INDEX);
lupb_arena_fuse(L, lua_absindex(L, -2), lua_absindex(L, -1));
lua_pop(L, 2);
}
static int lupb_arena_gc(lua_State *L) {
upb_arena *a = lupb_arena_check(L, 1);
upb_arena_free(a);
@ -398,6 +405,10 @@ static int lupb_array_newindex(lua_State *L) {
upb_array_set(larray->arr, n, msgval);
}
if (larray->type == UPB_TYPE_MESSAGE) {
lupb_arena_fuseobjs(L, 1, 3);
}
return 0; /* 1 for chained assignments? */
}
@ -535,6 +546,9 @@ static int lupb_map_newindex(lua_State *L) {
} else {
upb_msgval val = lupb_tomsgval(L, lmap->value_type, 3, 1, LUPB_COPY);
upb_map_set(map, key, val, lupb_arenaget(L, 1));
if (lmap->value_type == UPB_TYPE_MESSAGE) {
lupb_arena_fuseobjs(L, 1, 3);
}
}
return 0;
@ -600,21 +614,26 @@ static upb_msg *lupb_msg_check(lua_State *L, int narg) {
return msg->msg;
}
static const upb_fielddef *lupb_msg_checkfield(lua_State *L, int msg,
int field) {
static const upb_msgdef *lupb_msg_getmsgdef(lua_State *L, int msg) {
lua_getiuservalue(L, msg, LUPB_MSGDEF_INDEX);
const upb_msgdef *m = lupb_msgdef_check(L, -1);
lua_pop(L, 1);
return m;
}
static const upb_fielddef *lupb_msg_tofield(lua_State *L, int msg, int field) {
size_t len;
const char *fieldname = luaL_checklstring(L, field, &len);
const upb_msgdef *m;
const upb_fielddef *f;
const upb_msgdef *m = lupb_msg_getmsgdef(L, msg);
return upb_msgdef_ntof(m, fieldname, len);
}
lua_getiuservalue(L, msg, LUPB_MSGDEF_INDEX);
m = lupb_msgdef_check(L, -1);
f = upb_msgdef_ntof(m, fieldname, len);
static const upb_fielddef *lupb_msg_checkfield(lua_State *L, int msg,
int field) {
const upb_fielddef *f = lupb_msg_tofield(L, msg, field);
if (f == NULL) {
luaL_error(L, "no such field '%s'", fieldname);
luaL_error(L, "no such field '%s'", lua_tostring(L, field));
}
lua_pop(L, 1);
return f;
}
@ -813,10 +832,7 @@ static int lupb_msg_newindex(lua_State *L) {
}
if (merge_arenas) {
lua_getiuservalue(L, 1, LUPB_ARENA_INDEX);
lua_getiuservalue(L, 3, LUPB_ARENA_INDEX);
lupb_arena_fuse(L, lua_absindex(L, -2), lua_absindex(L, -1));
lua_pop(L, 2);
lupb_arena_fuseobjs(L, 1, 3);
}
upb_msg_set(msg, f, msgval, lupb_arenaget(L, 1));
@ -907,6 +923,46 @@ static int lupb_decode(lua_State *L) {
return 1;
}
/**
* lupb_msg_textencode()
*
* Handles:
* text_string = upb.text_encode(msg, {upb.TXTENC_SINGLELINE})
*/
static int lupb_textencode(lua_State *L) {
int argcount = lua_gettop(L);
upb_msg *msg = lupb_msg_check(L, 1);
const upb_msgdef *m;
char buf[1024];
size_t size;
int options = 0;
lua_getiuservalue(L, 1, LUPB_MSGDEF_INDEX);
m = lupb_msgdef_check(L, -1);
if (argcount > 1) {
size_t len = lua_rawlen(L, 2);
for (size_t i = 1; i <= len; i++) {
lua_rawgeti(L, 2, i);
options |= lupb_checkuint32(L, -1);
lua_pop(L, 1);
}
}
size = upb_text_encode(msg, m, NULL, options, buf, sizeof(buf));
if (size < sizeof(buf)) {
lua_pushlstring(L, buf, size);
} else {
char *ptr = malloc(size + 1);
upb_text_encode(msg, m, NULL, options, ptr, size + 1);
lua_pushlstring(L, ptr, size);
free(ptr);
}
return 1;
}
/**
* lupb_encode()
*
@ -914,17 +970,28 @@ static int lupb_decode(lua_State *L) {
* bin_string = upb.encode(msg)
*/
static int lupb_encode(lua_State *L) {
int argcount = lua_gettop(L);
const upb_msg *msg = lupb_msg_check(L, 1);
const upb_msglayout *layout;
upb_arena *arena = lupb_arena_pushnew(L);
size_t size;
char *result;
int options = 0;
if (argcount > 1) {
size_t len = lua_rawlen(L, 2);
for (size_t i = 1; i <= len; i++) {
lua_rawgeti(L, 2, i);
options |= lupb_checkuint32(L, -1);
lua_pop(L, 1);
}
}
lua_getiuservalue(L, 1, LUPB_MSGDEF_INDEX);
layout = upb_msgdef_layout(lupb_msgdef_check(L, -1));
lua_pop(L, 1);
result = upb_encode(msg, (const void*)layout, arena, &size);
result = upb_encode_ex(msg, (const void*)layout, options, arena, &size);
if (!result) {
lua_pushstring(L, "Error encoding protobuf.");
@ -936,11 +1003,17 @@ static int lupb_encode(lua_State *L) {
return 1;
}
static void lupb_setfieldi(lua_State *L, const char *field, int i) {
lua_pushinteger(L, i);
lua_setfield(L, -2, field);
}
static const struct luaL_Reg lupb_msg_toplevel_m[] = {
{"Array", lupb_array_new},
{"Map", lupb_map_new},
{"decode", lupb_decode},
{"encode", lupb_encode},
{"text_encode", lupb_textencode},
{NULL, NULL}
};
@ -952,5 +1025,11 @@ void lupb_msg_registertypes(lua_State *L) {
lupb_register_type(L, LUPB_MAP, NULL, lupb_map_mm);
lupb_register_type(L, LUPB_MSG, NULL, lupb_msg_mm);
lupb_setfieldi(L, "TXTENC_SINGLELINE", UPB_TXTENC_SINGLELINE);
lupb_setfieldi(L, "TXTENC_SKIPUNKNOWN", UPB_TXTENC_SKIPUNKNOWN);
lupb_setfieldi(L, "TXTENC_NOSORT", UPB_TXTENC_NOSORT);
lupb_setfieldi(L, "ENCODE_DETERMINISTIC", UPB_ENCODE_DETERMINISTIC);
lupb_cacheinit(L);
}

@ -84,6 +84,24 @@ int lua_getiuservalue(lua_State *L, int index, int n) {
}
#endif
/* We use this function as the __index metamethod when a type has both methods
* and an __index metamethod. */
int lupb_indexmm(lua_State *L) {
/* Look up in __index table (which is a closure param). */
lua_pushvalue(L, 2);
lua_rawget(L, lua_upvalueindex(1));
if (!lua_isnil(L, -1)) {
return 1;
}
/* Not found, chain to user __index metamethod. */
lua_pushvalue(L, lua_upvalueindex(2));
lua_pushvalue(L, 1);
lua_pushvalue(L, 2);
lua_call(L, 2, 1);
return 1;
}
void lupb_register_type(lua_State *L, const char *name, const luaL_Reg *m,
const luaL_Reg *mm) {
luaL_newmetatable(L, name);
@ -93,14 +111,17 @@ void lupb_register_type(lua_State *L, const char *name, const luaL_Reg *m,
}
if (m) {
/* Methods go in the mt's __index method. This implies that you can'
* implement __index and also have methods. */
lua_getfield(L, -1, "__index");
lupb_assert(L, lua_isnil(L, -1));
lua_pop(L, 1);
lua_createtable(L, 0, 0);
lua_createtable(L, 0, 0); /* __index table */
lupb_setfuncs(L, m);
/* Methods go in the mt's __index slot. If the user also specified an
* __index metamethod, use our custom lupb_indexmm() that can check both. */
lua_getfield(L, -2, "__index");
if (lua_isnil(L, -1)) {
lua_pop(L, 1);
} else {
lua_pushcclosure(L, &lupb_indexmm, 2);
}
lua_setfield(L, -2, "__index");
}

@ -15,6 +15,8 @@ extern "C" {
#endif
enum {
/* If set, strings will alias the input buffer instead of copying into the
* arena. */
UPB_DECODE_ALIAS = 1,
};

@ -32,6 +32,8 @@ typedef struct {
jmp_buf err;
upb_alloc *alloc;
char *buf, *ptr, *limit;
int options;
_upb_mapsorter sorter;
} upb_encstate;
static size_t upb_roundup_pow2(size_t bytes) {
@ -341,31 +343,48 @@ static void encode_array(upb_encstate *e, const char *field_mem,
}
}
static void encode_mapentry(upb_encstate *e, uint32_t number,
const upb_msglayout *layout,
const upb_map_entry *ent) {
const upb_msglayout_field *key_field = &layout->fields[0];
const upb_msglayout_field *val_field = &layout->fields[1];
size_t pre_len = e->limit - e->ptr;
size_t size;
encode_scalar(e, &ent->v, layout, val_field, false);
encode_scalar(e, &ent->k, layout, key_field, false);
size = (e->limit - e->ptr) - pre_len;
encode_varint(e, size);
encode_tag(e, number, UPB_WIRE_TYPE_DELIMITED);
}
static void encode_map(upb_encstate *e, const char *field_mem,
const upb_msglayout *m, const upb_msglayout_field *f) {
const upb_map *map = *(const upb_map**)field_mem;
const upb_msglayout *entry = m->submsgs[f->submsg_index];
const upb_msglayout_field *key_field = &entry->fields[0];
const upb_msglayout_field *val_field = &entry->fields[1];
upb_strtable_iter i;
if (map == NULL) {
return;
}
const upb_msglayout *layout = m->submsgs[f->submsg_index];
UPB_ASSERT(layout->field_count == 2);
upb_strtable_begin(&i, &map->table);
for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
size_t pre_len = e->limit - e->ptr;
size_t size;
upb_strview key = upb_strtable_iter_key(&i);
const upb_value val = upb_strtable_iter_value(&i);
if (map == NULL) return;
if (e->options & UPB_ENCODE_DETERMINISTIC) {
_upb_sortedmap sorted;
_upb_mapsorter_pushmap(&e->sorter, layout->fields[0].descriptortype, map,
&sorted);
upb_map_entry ent;
_upb_map_fromkey(key, &ent.k, map->key_size);
_upb_map_fromvalue(val, &ent.v, map->val_size);
encode_scalar(e, &ent.v, entry, val_field, false);
encode_scalar(e, &ent.k, entry, key_field, false);
size = (e->limit - e->ptr) - pre_len;
encode_varint(e, size);
encode_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) {
encode_mapentry(e, f->number, layout, &ent);
}
_upb_mapsorter_popmap(&e->sorter, &sorted);
} else {
upb_strtable_iter i;
upb_strtable_begin(&i, &map->table);
for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
upb_strview key = upb_strtable_iter_key(&i);
const upb_value val = upb_strtable_iter_value(&i);
upb_map_entry ent;
_upb_map_fromkey(key, &ent.k, map->key_size);
_upb_map_fromvalue(val, &ent.v, map->val_size);
encode_mapentry(e, f->number, layout, &ent);
}
}
}
@ -414,28 +433,32 @@ static void encode_message(upb_encstate *e, const char *msg,
*size = (e->limit - e->ptr) - pre_len;
}
char *upb_encode(const void *msg, const upb_msglayout *m, upb_arena *arena,
size_t *size) {
char *upb_encode_ex(const void *msg, const upb_msglayout *m, int options,
upb_arena *arena, size_t *size) {
upb_encstate e;
e.alloc = upb_arena_alloc(arena);
e.buf = NULL;
e.limit = NULL;
e.ptr = NULL;
e.options = options;
_upb_mapsorter_init(&e.sorter);
char *ret = NULL;
if (UPB_SETJMP(e.err)) {
*size = 0;
return NULL;
}
encode_message(&e, msg, m, size);
*size = e.limit - e.ptr;
if (*size == 0) {
static char ch;
return &ch;
ret = NULL;
} else {
UPB_ASSERT(e.ptr);
return e.ptr;
encode_message(&e, msg, m, size);
*size = e.limit - e.ptr;
if (*size == 0) {
static char ch;
ret = &ch;
} else {
UPB_ASSERT(e.ptr);
ret = e.ptr;
}
}
_upb_mapsorter_destroy(&e.sorter);
return ret;
}

@ -7,12 +7,32 @@
#include "upb/msg.h"
/* Must be last. */
#include "upb/port_def.inc"
#ifdef __cplusplus
extern "C" {
#endif
char *upb_encode(const void *msg, const upb_msglayout *l, upb_arena *arena,
size_t *size);
enum {
/* If set, the results of serializing will be deterministic across all
* instances of this binary. There are no guarantees across different
* binary builds.
*
* If your proto contains maps, the encoder will need to malloc()/free()
* memory during encode. */
UPB_ENCODE_DETERMINISTIC = 1,
};
char *upb_encode_ex(const void *msg, const upb_msglayout *l, int options,
upb_arena *arena, size_t *size);
UPB_INLINE char *upb_encode(const void *msg, const upb_msglayout *l,
upb_arena *arena, size_t *size) {
return upb_encode_ex(msg, l, 0, arena, size);
}
#include "upb/port_undef.inc"
#ifdef __cplusplus
} /* extern "C" */

@ -139,3 +139,118 @@ upb_map *_upb_map_new(upb_arena *a, size_t key_size, size_t value_size) {
return map;
}
static void _upb_mapsorter_getkeys(const void *_a, const void *_b, void *a_key,
void *b_key, size_t size) {
const upb_tabent *const*a = _a;
const upb_tabent *const*b = _b;
upb_strview a_tabkey = upb_tabstrview((*a)->key);
upb_strview b_tabkey = upb_tabstrview((*b)->key);
_upb_map_fromkey(a_tabkey, a_key, size);
_upb_map_fromkey(b_tabkey, b_key, size);
}
static int _upb_mapsorter_cmpi64(const void *_a, const void *_b) {
int64_t a, b;
_upb_mapsorter_getkeys(_a, _b, &a, &b, 8);
return a - b;
}
static int _upb_mapsorter_cmpu64(const void *_a, const void *_b) {
uint64_t a, b;
_upb_mapsorter_getkeys(_a, _b, &a, &b, 8);
return a - b;
}
static int _upb_mapsorter_cmpi32(const void *_a, const void *_b) {
int32_t a, b;
_upb_mapsorter_getkeys(_a, _b, &a, &b, 4);
return a - b;
}
static int _upb_mapsorter_cmpu32(const void *_a, const void *_b) {
uint32_t a, b;
_upb_mapsorter_getkeys(_a, _b, &a, &b, 4);
return a - b;
}
static int _upb_mapsorter_cmpbool(const void *_a, const void *_b) {
bool a, b;
_upb_mapsorter_getkeys(_a, _b, &a, &b, 1);
return a - b;
}
static int _upb_mapsorter_cmpstr(const void *_a, const void *_b) {
upb_strview a, b;
_upb_mapsorter_getkeys(_a, _b, &a, &b, UPB_MAPTYPE_STRING);
size_t common_size = UPB_MIN(a.size, b.size);
int cmp = memcmp(a.data, b.data, common_size);
if (cmp) return cmp;
return a.size - b.size;
}
bool _upb_mapsorter_pushmap(_upb_mapsorter *s, upb_descriptortype_t key_type,
const upb_map *map, _upb_sortedmap *sorted) {
int map_size = _upb_map_size(map);
sorted->start = s->size;
sorted->pos = sorted->start;
sorted->end = sorted->start + map_size;
/* Grow s->entries if necessary. */
if (sorted->end > s->cap) {
s->cap = _upb_lg2ceilsize(sorted->end);
s->entries = realloc(s->entries, s->cap * sizeof(*s->entries));
if (!s->entries) return false;
}
s->size = sorted->end;
/* Copy non-empty entries from the table to s->entries. */
upb_tabent const**dst = &s->entries[sorted->start];
const upb_tabent *src = map->table.t.entries;
const upb_tabent *end = src + upb_table_size(&map->table.t);
for (; src < end; src++) {
if (!upb_tabent_isempty(src)) {
*dst = src;
dst++;
}
}
UPB_ASSERT(dst == &s->entries[sorted->end]);
/* Sort entries according to the key type. */
int (*compar)(const void *, const void *);
switch (key_type) {
case UPB_DESCRIPTOR_TYPE_INT64:
case UPB_DESCRIPTOR_TYPE_SFIXED64:
case UPB_DESCRIPTOR_TYPE_SINT64:
compar = _upb_mapsorter_cmpi64;
break;
case UPB_DESCRIPTOR_TYPE_UINT64:
case UPB_DESCRIPTOR_TYPE_FIXED64:
compar = _upb_mapsorter_cmpu64;
break;
case UPB_DESCRIPTOR_TYPE_INT32:
case UPB_DESCRIPTOR_TYPE_SINT32:
case UPB_DESCRIPTOR_TYPE_SFIXED32:
case UPB_DESCRIPTOR_TYPE_ENUM:
compar = _upb_mapsorter_cmpi32;
break;
case UPB_DESCRIPTOR_TYPE_UINT32:
case UPB_DESCRIPTOR_TYPE_FIXED32:
compar = _upb_mapsorter_cmpu32;
break;
case UPB_DESCRIPTOR_TYPE_BOOL:
compar = _upb_mapsorter_cmpbool;
break;
case UPB_DESCRIPTOR_TYPE_STRING:
compar = _upb_mapsorter_cmpstr;
break;
default:
UPB_UNREACHABLE();
}
qsort(&s->entries[sorted->start], map_size, sizeof(*s->entries), compar);
return true;
}

@ -9,11 +9,13 @@
#define UPB_MSG_H_
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "upb/table.int.h"
#include "upb/upb.h"
/* Must be last. */
#include "upb/port_def.inc"
#ifdef __cplusplus
@ -553,6 +555,53 @@ UPB_INLINE void _upb_msg_map_set_value(void* msg, const void* val, size_t size)
}
}
/** _upb_mapsorter *************************************************************/
/* _upb_mapsorter sorts maps and provides ordered iteration over the entries.
* Since maps can be recursive (map values can be messages which contain other maps).
* _upb_mapsorter can contain a stack of maps. */
typedef struct {
upb_tabent const**entries;
int size;
int cap;
} _upb_mapsorter;
typedef struct {
int start;
int pos;
int end;
} _upb_sortedmap;
UPB_INLINE void _upb_mapsorter_init(_upb_mapsorter *s) {
s->entries = NULL;
s->size = 0;
s->cap = 0;
}
UPB_INLINE void _upb_mapsorter_destroy(_upb_mapsorter *s) {
if (s->entries) free(s->entries);
}
bool _upb_mapsorter_pushmap(_upb_mapsorter *s, upb_descriptortype_t key_type,
const upb_map *map, _upb_sortedmap *sorted);
UPB_INLINE void _upb_mapsorter_popmap(_upb_mapsorter *s, _upb_sortedmap *sorted) {
s->size = sorted->start;
}
UPB_INLINE bool _upb_sortedmap_next(_upb_mapsorter *s, const upb_map *map,
_upb_sortedmap *sorted,
upb_map_entry *ent) {
if (sorted->pos == sorted->end) return false;
const upb_tabent *tabent = s->entries[sorted->pos++];
upb_strview key = upb_tabstrview(tabent->key);
_upb_map_fromkey(key, &ent->k, map->key_size);
upb_value val = {tabent->val.val};
_upb_map_fromvalue(val, &ent->v, map->val_size);
return true;
}
#undef PTR_AT
#ifdef __cplusplus

@ -147,10 +147,17 @@ UPB_INLINE char *upb_tabstr(upb_tabkey key, uint32_t *len) {
return mem + sizeof(*len);
}
UPB_INLINE upb_strview upb_tabstrview(upb_tabkey key) {
upb_strview ret;
uint32_t len;
ret.data = upb_tabstr(key, &len);
ret.size = len;
return ret;
}
/* upb_tabval *****************************************************************/
typedef struct {
typedef struct upb_tabval {
uint64_t val;
} upb_tabval;

@ -17,6 +17,7 @@ typedef struct {
int indent_depth;
int options;
const upb_symtab *ext_pool;
_upb_mapsorter sorter;
} txtenc;
static void txtenc_msg(txtenc *e, const upb_msg *msg, const upb_msgdef *m);
@ -187,6 +188,25 @@ static void txtenc_array(txtenc *e, const upb_array *arr,
}
}
static void txtenc_mapentry(txtenc *e, upb_msgval key, upb_msgval val,
const upb_fielddef *f) {
const upb_msgdef *entry = upb_fielddef_msgsubdef(f);
const upb_fielddef *key_f = upb_msgdef_field(entry, 0);
const upb_fielddef *val_f = upb_msgdef_field(entry, 1);
txtenc_indent(e);
txtenc_printf(e, "%s: {", upb_fielddef_name(f));
txtenc_endfield(e);
e->indent_depth++;
txtenc_field(e, key, key_f);
txtenc_field(e, val, val_f);
e->indent_depth--;
txtenc_indent(e);
txtenc_putstr(e, "}");
txtenc_endfield(e);
}
/*
* Maps print as messages of key/value, etc.
*
@ -200,27 +220,28 @@ static void txtenc_array(txtenc *e, const upb_array *arr,
* }
*/
static void txtenc_map(txtenc *e, const upb_map *map, const upb_fielddef *f) {
const upb_msgdef *entry = upb_fielddef_msgsubdef(f);
const upb_fielddef *key_f = upb_msgdef_itof(entry, 1);
const upb_fielddef *val_f = upb_msgdef_itof(entry, 2);
size_t iter = UPB_MAP_BEGIN;
while (upb_mapiter_next(map, &iter)) {
upb_msgval key = upb_mapiter_key(map, iter);
upb_msgval val = upb_mapiter_value(map, iter);
txtenc_indent(e);
txtenc_printf(e, "%s: {", upb_fielddef_name(f));
txtenc_endfield(e);
e->indent_depth++;
txtenc_field(e, key, key_f);
txtenc_field(e, val, val_f);
e->indent_depth--;
txtenc_indent(e);
txtenc_putstr(e, "}");
txtenc_endfield(e);
if (e->options & UPB_TXTENC_NOSORT) {
size_t iter = UPB_MAP_BEGIN;
while (upb_mapiter_next(map, &iter)) {
upb_msgval key = upb_mapiter_key(map, iter);
upb_msgval val = upb_mapiter_value(map, iter);
txtenc_mapentry(e, key, val, f);
}
} else {
const upb_msgdef *entry = upb_fielddef_msgsubdef(f);
const upb_fielddef *key_f = upb_msgdef_field(entry, 0);
_upb_sortedmap sorted;
upb_map_entry ent;
_upb_mapsorter_pushmap(&e->sorter, upb_fielddef_descriptortype(key_f), map,
&sorted);
while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) {
upb_msgval key, val;
memcpy(&key, &ent.k, sizeof(key));
memcpy(&val, &ent.v, sizeof(val));
txtenc_mapentry(e, key, val, f);
}
_upb_mapsorter_popmap(&e->sorter, &sorted);
}
}
@ -392,7 +413,9 @@ size_t upb_text_encode(const upb_msg *msg, const upb_msgdef *m,
e.indent_depth = 0;
e.options = options;
e.ext_pool = ext_pool;
_upb_mapsorter_init(&e.sorter);
txtenc_msg(&e, msg, m);
_upb_mapsorter_destroy(&e.sorter);
return txtenc_nullz(&e, size);
}

@ -13,7 +13,10 @@ enum {
UPB_TXTENC_SINGLELINE = 1,
/* When set, unknown fields are not printed. */
UPB_TXTENC_SKIPUNKNOWN = 2
UPB_TXTENC_SKIPUNKNOWN = 2,
/* When set, maps are *not* sorted (this avoids allocating tmp mem). */
UPB_TXTENC_NOSORT = 4
};
/* Encodes the given |msg| to text format. The message's reflection is given in

@ -324,6 +324,10 @@ UPB_INLINE int _upb_lg2ceil(int x) {
#endif
}
UPB_INLINE int _upb_lg2ceilsize(int x) {
return 1 << _upb_lg2ceil(x);
}
#include "upb/port_undef.inc"
#ifdef __cplusplus

Loading…
Cancel
Save