Merge pull request #344 from haberman/deterministic-serialize

Added map sorting to binary and text encoders.
pull/13171/head
Joshua Haberman 4 years ago committed by GitHub
commit 794ce6d061
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      benchmarks/compare.py
  2. 63
      tests/bindings/lua/test_upb.lua
  3. 32
      upb/bindings/lua/def.c
  4. 109
      upb/bindings/lua/msg.c
  5. 35
      upb/bindings/lua/upb.c
  6. 2
      upb/decode.h
  7. 91
      upb/encode.c
  8. 24
      upb/encode.h
  9. 115
      upb/msg.c
  10. 49
      upb/msg.h
  11. 9
      upb/table.int.h
  12. 65
      upb/text_encode.c
  13. 5
      upb/text_encode.h
  14. 4
      upb/upb.h

@ -58,7 +58,7 @@ def Benchmark(outbase, bench_cpu=True, runs=12, fasttable=False):
baseline = "master"
bench_cpu = True
bench_cpu = False
fasttable = False
if len(sys.argv) > 1:

@ -91,6 +91,69 @@ function test_msg_map()
assert_equal(12, msg2.map_int32_int32[6])
end
function test_map_sorting()
function msg_with_int32_entries(start, expand)
local msg = test_messages_proto3.TestAllTypesProto3()
for i=start,start + 8 do
msg.map_int32_int32[i] = i * 2
end
if expand then
for i=start+20,200 do
msg.map_int32_int32[i] = i
end
for i=start+20,200 do
msg.map_int32_int32[i] = nil
end
end
return msg
end
function msg_with_msg_entries(expand)
local msg = test_messages_proto3.TestAllTypesProto3()
-- 8! = 40320 possible orderings makes it overwhelmingly likely that two
-- random orderings will be different.
for i=1,8 do
local submsg = test_messages_proto3.TestAllTypesProto3.NestedMessage()
submsg.corecursive = msg_with_int32_entries(i, expand)
msg.map_string_nested_message[tostring(i)] = submsg
end
expand = false
if expand then
for i=21,2000 do
local submsg = test_messages_proto3.TestAllTypesProto3.NestedMessage()
submsg.corecursive = msg_with_int32_entries(i, expand)
msg.map_string_nested_message[tostring(i)] = submsg
end
for i=21,2000 do
msg.map_string_nested_message[tostring(i)] = nil
end
end
return msg
end
-- Create two messages with the same contents but (hopefully) different
-- map table orderings.
local msg = msg_with_msg_entries(false)
local msg2 = msg_with_msg_entries(true)
local text1 = upb.text_encode(msg)
local text2 = upb.text_encode(msg2)
assert_equal(text1, text2)
local binary1 = upb.encode(msg, {upb.ENCODE_DETERMINISTIC})
local binary2 = upb.encode(msg2, {upb.ENCODE_DETERMINISTIC})
assert_equal(binary1, binary2)
-- Non-sorted map should compare different.
local text3 = upb.text_encode(msg, {upb.TXTENC_NOSORT})
assert_not_equal(text1, text3)
local binary3 = upb.encode(msg)
assert_not_equal(binary1, binary3)
end
function test_utf8()
local proto2_msg = test_messages_proto2.TestAllTypesProto2()
proto2_msg.optional_string = "\xff"

@ -53,12 +53,12 @@ static void lupb_wrapper_pushwrapper(lua_State *L, int narg, const void *def,
/* lupb_msgdef_pushsubmsgdef()
*
* Pops the msgdef wrapper at the top of the stack and replaces it with a msgdef
* wrapper for field |f| of this msgdef.
* wrapper for field |f| of this msgdef (submsg may not be direct, for example it
* may be the submessage of the map value).
*/
void lupb_msgdef_pushsubmsgdef(lua_State *L, const upb_fielddef *f) {
const upb_msgdef *m = upb_fielddef_msgsubdef(f);
assert(m);
assert(upb_fielddef_containingtype(f) == lupb_msgdef_check(L, -1));
lupb_wrapper_pushwrapper(L, -1, m, LUPB_MSGDEF);
lua_replace(L, -2); /* Replace msgdef with submsgdef. */
}
@ -337,6 +337,26 @@ static int lupb_msgdef_oneofcount(lua_State *L) {
return 1;
}
static bool lupb_msgdef_pushnested(lua_State *L, int msgdef, int name) {
const upb_msgdef *m = lupb_msgdef_check(L, msgdef);
lupb_wrapper_pushsymtab(L, msgdef);
upb_symtab *symtab = lupb_symtab_check(L, -1);
lua_pop(L, 1);
/* Construct full package.Message.SubMessage name. */
lua_pushstring(L, upb_msgdef_fullname(m));
lua_pushstring(L, ".");
lua_pushvalue(L, name);
lua_concat(L, 3);
const char *nested_name = lua_tostring(L, -1);
/* Try lookup. */
const upb_msgdef *nested = upb_symtab_lookupmsg(symtab, nested_name);
if (!nested) return false;
lupb_wrapper_pushwrapper(L, msgdef, nested, LUPB_MSGDEF);
return true;
}
/* lupb_msgdef_field()
*
* Handles:
@ -430,6 +450,13 @@ static int lupb_msgdef_fullname(lua_State *L) {
return 1;
}
static int lupb_msgdef_index(lua_State *L) {
if (!lupb_msgdef_pushnested(L, 1, 2)) {
luaL_error(L, "No such nested message");
}
return 1;
}
static int lupb_msgoneofiter_next(lua_State *L) {
const upb_msgdef *m = lupb_msgdef_check(L, lua_upvalueindex(1));
int *index = lua_touserdata(L, lua_upvalueindex(2));
@ -471,6 +498,7 @@ static int lupb_msgdef_tostring(lua_State *L) {
static const struct luaL_Reg lupb_msgdef_mm[] = {
{"__call", lupb_msg_pushnew},
{"__index", lupb_msgdef_index},
{"__len", lupb_msgdef_fieldcount},
{"__tostring", lupb_msgdef_tostring},
{NULL, NULL}

@ -187,6 +187,13 @@ static void lupb_arena_fuse(lua_State *L, int to, int from) {
upb_arena_fuse(to_arena, from_arena);
}
static void lupb_arena_fuseobjs(lua_State *L, int to, int from) {
lua_getiuservalue(L, to, LUPB_ARENA_INDEX);
lua_getiuservalue(L, from, LUPB_ARENA_INDEX);
lupb_arena_fuse(L, lua_absindex(L, -2), lua_absindex(L, -1));
lua_pop(L, 2);
}
static int lupb_arena_gc(lua_State *L) {
upb_arena *a = lupb_arena_check(L, 1);
upb_arena_free(a);
@ -398,6 +405,10 @@ static int lupb_array_newindex(lua_State *L) {
upb_array_set(larray->arr, n, msgval);
}
if (larray->type == UPB_TYPE_MESSAGE) {
lupb_arena_fuseobjs(L, 1, 3);
}
return 0; /* 1 for chained assignments? */
}
@ -535,6 +546,9 @@ static int lupb_map_newindex(lua_State *L) {
} else {
upb_msgval val = lupb_tomsgval(L, lmap->value_type, 3, 1, LUPB_COPY);
upb_map_set(map, key, val, lupb_arenaget(L, 1));
if (lmap->value_type == UPB_TYPE_MESSAGE) {
lupb_arena_fuseobjs(L, 1, 3);
}
}
return 0;
@ -600,21 +614,26 @@ static upb_msg *lupb_msg_check(lua_State *L, int narg) {
return msg->msg;
}
static const upb_fielddef *lupb_msg_checkfield(lua_State *L, int msg,
int field) {
static const upb_msgdef *lupb_msg_getmsgdef(lua_State *L, int msg) {
lua_getiuservalue(L, msg, LUPB_MSGDEF_INDEX);
const upb_msgdef *m = lupb_msgdef_check(L, -1);
lua_pop(L, 1);
return m;
}
static const upb_fielddef *lupb_msg_tofield(lua_State *L, int msg, int field) {
size_t len;
const char *fieldname = luaL_checklstring(L, field, &len);
const upb_msgdef *m;
const upb_fielddef *f;
const upb_msgdef *m = lupb_msg_getmsgdef(L, msg);
return upb_msgdef_ntof(m, fieldname, len);
}
lua_getiuservalue(L, msg, LUPB_MSGDEF_INDEX);
m = lupb_msgdef_check(L, -1);
f = upb_msgdef_ntof(m, fieldname, len);
static const upb_fielddef *lupb_msg_checkfield(lua_State *L, int msg,
int field) {
const upb_fielddef *f = lupb_msg_tofield(L, msg, field);
if (f == NULL) {
luaL_error(L, "no such field '%s'", fieldname);
luaL_error(L, "no such field '%s'", lua_tostring(L, field));
}
lua_pop(L, 1);
return f;
}
@ -813,10 +832,7 @@ static int lupb_msg_newindex(lua_State *L) {
}
if (merge_arenas) {
lua_getiuservalue(L, 1, LUPB_ARENA_INDEX);
lua_getiuservalue(L, 3, LUPB_ARENA_INDEX);
lupb_arena_fuse(L, lua_absindex(L, -2), lua_absindex(L, -1));
lua_pop(L, 2);
lupb_arena_fuseobjs(L, 1, 3);
}
upb_msg_set(msg, f, msgval, lupb_arenaget(L, 1));
@ -907,6 +923,46 @@ static int lupb_decode(lua_State *L) {
return 1;
}
/**
* lupb_msg_textencode()
*
* Handles:
* text_string = upb.text_encode(msg, {upb.TXTENC_SINGLELINE})
*/
static int lupb_textencode(lua_State *L) {
int argcount = lua_gettop(L);
upb_msg *msg = lupb_msg_check(L, 1);
const upb_msgdef *m;
char buf[1024];
size_t size;
int options = 0;
lua_getiuservalue(L, 1, LUPB_MSGDEF_INDEX);
m = lupb_msgdef_check(L, -1);
if (argcount > 1) {
size_t len = lua_rawlen(L, 2);
for (size_t i = 1; i <= len; i++) {
lua_rawgeti(L, 2, i);
options |= lupb_checkuint32(L, -1);
lua_pop(L, 1);
}
}
size = upb_text_encode(msg, m, NULL, options, buf, sizeof(buf));
if (size < sizeof(buf)) {
lua_pushlstring(L, buf, size);
} else {
char *ptr = malloc(size + 1);
upb_text_encode(msg, m, NULL, options, ptr, size + 1);
lua_pushlstring(L, ptr, size);
free(ptr);
}
return 1;
}
/**
* lupb_encode()
*
@ -914,17 +970,28 @@ static int lupb_decode(lua_State *L) {
* bin_string = upb.encode(msg)
*/
static int lupb_encode(lua_State *L) {
int argcount = lua_gettop(L);
const upb_msg *msg = lupb_msg_check(L, 1);
const upb_msglayout *layout;
upb_arena *arena = lupb_arena_pushnew(L);
size_t size;
char *result;
int options = 0;
if (argcount > 1) {
size_t len = lua_rawlen(L, 2);
for (size_t i = 1; i <= len; i++) {
lua_rawgeti(L, 2, i);
options |= lupb_checkuint32(L, -1);
lua_pop(L, 1);
}
}
lua_getiuservalue(L, 1, LUPB_MSGDEF_INDEX);
layout = upb_msgdef_layout(lupb_msgdef_check(L, -1));
lua_pop(L, 1);
result = upb_encode(msg, (const void*)layout, arena, &size);
result = upb_encode_ex(msg, (const void*)layout, options, arena, &size);
if (!result) {
lua_pushstring(L, "Error encoding protobuf.");
@ -936,11 +1003,17 @@ static int lupb_encode(lua_State *L) {
return 1;
}
static void lupb_setfieldi(lua_State *L, const char *field, int i) {
lua_pushinteger(L, i);
lua_setfield(L, -2, field);
}
static const struct luaL_Reg lupb_msg_toplevel_m[] = {
{"Array", lupb_array_new},
{"Map", lupb_map_new},
{"decode", lupb_decode},
{"encode", lupb_encode},
{"text_encode", lupb_textencode},
{NULL, NULL}
};
@ -952,5 +1025,11 @@ void lupb_msg_registertypes(lua_State *L) {
lupb_register_type(L, LUPB_MAP, NULL, lupb_map_mm);
lupb_register_type(L, LUPB_MSG, NULL, lupb_msg_mm);
lupb_setfieldi(L, "TXTENC_SINGLELINE", UPB_TXTENC_SINGLELINE);
lupb_setfieldi(L, "TXTENC_SKIPUNKNOWN", UPB_TXTENC_SKIPUNKNOWN);
lupb_setfieldi(L, "TXTENC_NOSORT", UPB_TXTENC_NOSORT);
lupb_setfieldi(L, "ENCODE_DETERMINISTIC", UPB_ENCODE_DETERMINISTIC);
lupb_cacheinit(L);
}

@ -84,6 +84,24 @@ int lua_getiuservalue(lua_State *L, int index, int n) {
}
#endif
/* We use this function as the __index metamethod when a type has both methods
* and an __index metamethod. */
int lupb_indexmm(lua_State *L) {
/* Look up in __index table (which is a closure param). */
lua_pushvalue(L, 2);
lua_rawget(L, lua_upvalueindex(1));
if (!lua_isnil(L, -1)) {
return 1;
}
/* Not found, chain to user __index metamethod. */
lua_pushvalue(L, lua_upvalueindex(2));
lua_pushvalue(L, 1);
lua_pushvalue(L, 2);
lua_call(L, 2, 1);
return 1;
}
void lupb_register_type(lua_State *L, const char *name, const luaL_Reg *m,
const luaL_Reg *mm) {
luaL_newmetatable(L, name);
@ -93,14 +111,17 @@ void lupb_register_type(lua_State *L, const char *name, const luaL_Reg *m,
}
if (m) {
/* Methods go in the mt's __index method. This implies that you can'
* implement __index and also have methods. */
lua_getfield(L, -1, "__index");
lupb_assert(L, lua_isnil(L, -1));
lua_pop(L, 1);
lua_createtable(L, 0, 0);
lua_createtable(L, 0, 0); /* __index table */
lupb_setfuncs(L, m);
/* Methods go in the mt's __index slot. If the user also specified an
* __index metamethod, use our custom lupb_indexmm() that can check both. */
lua_getfield(L, -2, "__index");
if (lua_isnil(L, -1)) {
lua_pop(L, 1);
} else {
lua_pushcclosure(L, &lupb_indexmm, 2);
}
lua_setfield(L, -2, "__index");
}

@ -15,6 +15,8 @@ extern "C" {
#endif
enum {
/* If set, strings will alias the input buffer instead of copying into the
* arena. */
UPB_DECODE_ALIAS = 1,
};

@ -32,6 +32,8 @@ typedef struct {
jmp_buf err;
upb_alloc *alloc;
char *buf, *ptr, *limit;
int options;
_upb_mapsorter sorter;
} upb_encstate;
static size_t upb_roundup_pow2(size_t bytes) {
@ -341,31 +343,48 @@ static void encode_array(upb_encstate *e, const char *field_mem,
}
}
static void encode_mapentry(upb_encstate *e, uint32_t number,
const upb_msglayout *layout,
const upb_map_entry *ent) {
const upb_msglayout_field *key_field = &layout->fields[0];
const upb_msglayout_field *val_field = &layout->fields[1];
size_t pre_len = e->limit - e->ptr;
size_t size;
encode_scalar(e, &ent->v, layout, val_field, false);
encode_scalar(e, &ent->k, layout, key_field, false);
size = (e->limit - e->ptr) - pre_len;
encode_varint(e, size);
encode_tag(e, number, UPB_WIRE_TYPE_DELIMITED);
}
static void encode_map(upb_encstate *e, const char *field_mem,
const upb_msglayout *m, const upb_msglayout_field *f) {
const upb_map *map = *(const upb_map**)field_mem;
const upb_msglayout *entry = m->submsgs[f->submsg_index];
const upb_msglayout_field *key_field = &entry->fields[0];
const upb_msglayout_field *val_field = &entry->fields[1];
upb_strtable_iter i;
if (map == NULL) {
return;
}
const upb_msglayout *layout = m->submsgs[f->submsg_index];
UPB_ASSERT(layout->field_count == 2);
upb_strtable_begin(&i, &map->table);
for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
size_t pre_len = e->limit - e->ptr;
size_t size;
upb_strview key = upb_strtable_iter_key(&i);
const upb_value val = upb_strtable_iter_value(&i);
if (map == NULL) return;
if (e->options & UPB_ENCODE_DETERMINISTIC) {
_upb_sortedmap sorted;
_upb_mapsorter_pushmap(&e->sorter, layout->fields[0].descriptortype, map,
&sorted);
upb_map_entry ent;
_upb_map_fromkey(key, &ent.k, map->key_size);
_upb_map_fromvalue(val, &ent.v, map->val_size);
encode_scalar(e, &ent.v, entry, val_field, false);
encode_scalar(e, &ent.k, entry, key_field, false);
size = (e->limit - e->ptr) - pre_len;
encode_varint(e, size);
encode_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) {
encode_mapentry(e, f->number, layout, &ent);
}
_upb_mapsorter_popmap(&e->sorter, &sorted);
} else {
upb_strtable_iter i;
upb_strtable_begin(&i, &map->table);
for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
upb_strview key = upb_strtable_iter_key(&i);
const upb_value val = upb_strtable_iter_value(&i);
upb_map_entry ent;
_upb_map_fromkey(key, &ent.k, map->key_size);
_upb_map_fromvalue(val, &ent.v, map->val_size);
encode_mapentry(e, f->number, layout, &ent);
}
}
}
@ -414,28 +433,32 @@ static void encode_message(upb_encstate *e, const char *msg,
*size = (e->limit - e->ptr) - pre_len;
}
char *upb_encode(const void *msg, const upb_msglayout *m, upb_arena *arena,
size_t *size) {
char *upb_encode_ex(const void *msg, const upb_msglayout *m, int options,
upb_arena *arena, size_t *size) {
upb_encstate e;
e.alloc = upb_arena_alloc(arena);
e.buf = NULL;
e.limit = NULL;
e.ptr = NULL;
e.options = options;
_upb_mapsorter_init(&e.sorter);
char *ret = NULL;
if (UPB_SETJMP(e.err)) {
*size = 0;
return NULL;
}
encode_message(&e, msg, m, size);
*size = e.limit - e.ptr;
if (*size == 0) {
static char ch;
return &ch;
ret = NULL;
} else {
UPB_ASSERT(e.ptr);
return e.ptr;
encode_message(&e, msg, m, size);
*size = e.limit - e.ptr;
if (*size == 0) {
static char ch;
ret = &ch;
} else {
UPB_ASSERT(e.ptr);
ret = e.ptr;
}
}
_upb_mapsorter_destroy(&e.sorter);
return ret;
}

@ -7,12 +7,32 @@
#include "upb/msg.h"
/* Must be last. */
#include "upb/port_def.inc"
#ifdef __cplusplus
extern "C" {
#endif
char *upb_encode(const void *msg, const upb_msglayout *l, upb_arena *arena,
size_t *size);
enum {
/* If set, the results of serializing will be deterministic across all
* instances of this binary. There are no guarantees across different
* binary builds.
*
* If your proto contains maps, the encoder will need to malloc()/free()
* memory during encode. */
UPB_ENCODE_DETERMINISTIC = 1,
};
char *upb_encode_ex(const void *msg, const upb_msglayout *l, int options,
upb_arena *arena, size_t *size);
UPB_INLINE char *upb_encode(const void *msg, const upb_msglayout *l,
upb_arena *arena, size_t *size) {
return upb_encode_ex(msg, l, 0, arena, size);
}
#include "upb/port_undef.inc"
#ifdef __cplusplus
} /* extern "C" */

@ -139,3 +139,118 @@ upb_map *_upb_map_new(upb_arena *a, size_t key_size, size_t value_size) {
return map;
}
static void _upb_mapsorter_getkeys(const void *_a, const void *_b, void *a_key,
void *b_key, size_t size) {
const upb_tabent *const*a = _a;
const upb_tabent *const*b = _b;
upb_strview a_tabkey = upb_tabstrview((*a)->key);
upb_strview b_tabkey = upb_tabstrview((*b)->key);
_upb_map_fromkey(a_tabkey, a_key, size);
_upb_map_fromkey(b_tabkey, b_key, size);
}
static int _upb_mapsorter_cmpi64(const void *_a, const void *_b) {
int64_t a, b;
_upb_mapsorter_getkeys(_a, _b, &a, &b, 8);
return a - b;
}
static int _upb_mapsorter_cmpu64(const void *_a, const void *_b) {
uint64_t a, b;
_upb_mapsorter_getkeys(_a, _b, &a, &b, 8);
return a - b;
}
static int _upb_mapsorter_cmpi32(const void *_a, const void *_b) {
int32_t a, b;
_upb_mapsorter_getkeys(_a, _b, &a, &b, 4);
return a - b;
}
static int _upb_mapsorter_cmpu32(const void *_a, const void *_b) {
uint32_t a, b;
_upb_mapsorter_getkeys(_a, _b, &a, &b, 4);
return a - b;
}
static int _upb_mapsorter_cmpbool(const void *_a, const void *_b) {
bool a, b;
_upb_mapsorter_getkeys(_a, _b, &a, &b, 1);
return a - b;
}
static int _upb_mapsorter_cmpstr(const void *_a, const void *_b) {
upb_strview a, b;
_upb_mapsorter_getkeys(_a, _b, &a, &b, UPB_MAPTYPE_STRING);
size_t common_size = UPB_MIN(a.size, b.size);
int cmp = memcmp(a.data, b.data, common_size);
if (cmp) return cmp;
return a.size - b.size;
}
bool _upb_mapsorter_pushmap(_upb_mapsorter *s, upb_descriptortype_t key_type,
const upb_map *map, _upb_sortedmap *sorted) {
int map_size = _upb_map_size(map);
sorted->start = s->size;
sorted->pos = sorted->start;
sorted->end = sorted->start + map_size;
/* Grow s->entries if necessary. */
if (sorted->end > s->cap) {
s->cap = _upb_lg2ceilsize(sorted->end);
s->entries = realloc(s->entries, s->cap * sizeof(*s->entries));
if (!s->entries) return false;
}
s->size = sorted->end;
/* Copy non-empty entries from the table to s->entries. */
upb_tabent const**dst = &s->entries[sorted->start];
const upb_tabent *src = map->table.t.entries;
const upb_tabent *end = src + upb_table_size(&map->table.t);
for (; src < end; src++) {
if (!upb_tabent_isempty(src)) {
*dst = src;
dst++;
}
}
UPB_ASSERT(dst == &s->entries[sorted->end]);
/* Sort entries according to the key type. */
int (*compar)(const void *, const void *);
switch (key_type) {
case UPB_DESCRIPTOR_TYPE_INT64:
case UPB_DESCRIPTOR_TYPE_SFIXED64:
case UPB_DESCRIPTOR_TYPE_SINT64:
compar = _upb_mapsorter_cmpi64;
break;
case UPB_DESCRIPTOR_TYPE_UINT64:
case UPB_DESCRIPTOR_TYPE_FIXED64:
compar = _upb_mapsorter_cmpu64;
break;
case UPB_DESCRIPTOR_TYPE_INT32:
case UPB_DESCRIPTOR_TYPE_SINT32:
case UPB_DESCRIPTOR_TYPE_SFIXED32:
case UPB_DESCRIPTOR_TYPE_ENUM:
compar = _upb_mapsorter_cmpi32;
break;
case UPB_DESCRIPTOR_TYPE_UINT32:
case UPB_DESCRIPTOR_TYPE_FIXED32:
compar = _upb_mapsorter_cmpu32;
break;
case UPB_DESCRIPTOR_TYPE_BOOL:
compar = _upb_mapsorter_cmpbool;
break;
case UPB_DESCRIPTOR_TYPE_STRING:
compar = _upb_mapsorter_cmpstr;
break;
default:
UPB_UNREACHABLE();
}
qsort(&s->entries[sorted->start], map_size, sizeof(*s->entries), compar);
return true;
}

@ -9,11 +9,13 @@
#define UPB_MSG_H_
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "upb/table.int.h"
#include "upb/upb.h"
/* Must be last. */
#include "upb/port_def.inc"
#ifdef __cplusplus
@ -553,6 +555,53 @@ UPB_INLINE void _upb_msg_map_set_value(void* msg, const void* val, size_t size)
}
}
/** _upb_mapsorter *************************************************************/
/* _upb_mapsorter sorts maps and provides ordered iteration over the entries.
* Since maps can be recursive (map values can be messages which contain other maps).
* _upb_mapsorter can contain a stack of maps. */
typedef struct {
upb_tabent const**entries;
int size;
int cap;
} _upb_mapsorter;
typedef struct {
int start;
int pos;
int end;
} _upb_sortedmap;
UPB_INLINE void _upb_mapsorter_init(_upb_mapsorter *s) {
s->entries = NULL;
s->size = 0;
s->cap = 0;
}
UPB_INLINE void _upb_mapsorter_destroy(_upb_mapsorter *s) {
if (s->entries) free(s->entries);
}
bool _upb_mapsorter_pushmap(_upb_mapsorter *s, upb_descriptortype_t key_type,
const upb_map *map, _upb_sortedmap *sorted);
UPB_INLINE void _upb_mapsorter_popmap(_upb_mapsorter *s, _upb_sortedmap *sorted) {
s->size = sorted->start;
}
UPB_INLINE bool _upb_sortedmap_next(_upb_mapsorter *s, const upb_map *map,
_upb_sortedmap *sorted,
upb_map_entry *ent) {
if (sorted->pos == sorted->end) return false;
const upb_tabent *tabent = s->entries[sorted->pos++];
upb_strview key = upb_tabstrview(tabent->key);
_upb_map_fromkey(key, &ent->k, map->key_size);
upb_value val = {tabent->val.val};
_upb_map_fromvalue(val, &ent->v, map->val_size);
return true;
}
#undef PTR_AT
#ifdef __cplusplus

@ -147,10 +147,17 @@ UPB_INLINE char *upb_tabstr(upb_tabkey key, uint32_t *len) {
return mem + sizeof(*len);
}
UPB_INLINE upb_strview upb_tabstrview(upb_tabkey key) {
upb_strview ret;
uint32_t len;
ret.data = upb_tabstr(key, &len);
ret.size = len;
return ret;
}
/* upb_tabval *****************************************************************/
typedef struct {
typedef struct upb_tabval {
uint64_t val;
} upb_tabval;

@ -17,6 +17,7 @@ typedef struct {
int indent_depth;
int options;
const upb_symtab *ext_pool;
_upb_mapsorter sorter;
} txtenc;
static void txtenc_msg(txtenc *e, const upb_msg *msg, const upb_msgdef *m);
@ -187,6 +188,25 @@ static void txtenc_array(txtenc *e, const upb_array *arr,
}
}
static void txtenc_mapentry(txtenc *e, upb_msgval key, upb_msgval val,
const upb_fielddef *f) {
const upb_msgdef *entry = upb_fielddef_msgsubdef(f);
const upb_fielddef *key_f = upb_msgdef_field(entry, 0);
const upb_fielddef *val_f = upb_msgdef_field(entry, 1);
txtenc_indent(e);
txtenc_printf(e, "%s: {", upb_fielddef_name(f));
txtenc_endfield(e);
e->indent_depth++;
txtenc_field(e, key, key_f);
txtenc_field(e, val, val_f);
e->indent_depth--;
txtenc_indent(e);
txtenc_putstr(e, "}");
txtenc_endfield(e);
}
/*
* Maps print as messages of key/value, etc.
*
@ -200,27 +220,28 @@ static void txtenc_array(txtenc *e, const upb_array *arr,
* }
*/
static void txtenc_map(txtenc *e, const upb_map *map, const upb_fielddef *f) {
const upb_msgdef *entry = upb_fielddef_msgsubdef(f);
const upb_fielddef *key_f = upb_msgdef_itof(entry, 1);
const upb_fielddef *val_f = upb_msgdef_itof(entry, 2);
size_t iter = UPB_MAP_BEGIN;
while (upb_mapiter_next(map, &iter)) {
upb_msgval key = upb_mapiter_key(map, iter);
upb_msgval val = upb_mapiter_value(map, iter);
txtenc_indent(e);
txtenc_printf(e, "%s: {", upb_fielddef_name(f));
txtenc_endfield(e);
e->indent_depth++;
txtenc_field(e, key, key_f);
txtenc_field(e, val, val_f);
e->indent_depth--;
txtenc_indent(e);
txtenc_putstr(e, "}");
txtenc_endfield(e);
if (e->options & UPB_TXTENC_NOSORT) {
size_t iter = UPB_MAP_BEGIN;
while (upb_mapiter_next(map, &iter)) {
upb_msgval key = upb_mapiter_key(map, iter);
upb_msgval val = upb_mapiter_value(map, iter);
txtenc_mapentry(e, key, val, f);
}
} else {
const upb_msgdef *entry = upb_fielddef_msgsubdef(f);
const upb_fielddef *key_f = upb_msgdef_field(entry, 0);
_upb_sortedmap sorted;
upb_map_entry ent;
_upb_mapsorter_pushmap(&e->sorter, upb_fielddef_descriptortype(key_f), map,
&sorted);
while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) {
upb_msgval key, val;
memcpy(&key, &ent.k, sizeof(key));
memcpy(&val, &ent.v, sizeof(val));
txtenc_mapentry(e, key, val, f);
}
_upb_mapsorter_popmap(&e->sorter, &sorted);
}
}
@ -392,7 +413,9 @@ size_t upb_text_encode(const upb_msg *msg, const upb_msgdef *m,
e.indent_depth = 0;
e.options = options;
e.ext_pool = ext_pool;
_upb_mapsorter_init(&e.sorter);
txtenc_msg(&e, msg, m);
_upb_mapsorter_destroy(&e.sorter);
return txtenc_nullz(&e, size);
}

@ -13,7 +13,10 @@ enum {
UPB_TXTENC_SINGLELINE = 1,
/* When set, unknown fields are not printed. */
UPB_TXTENC_SKIPUNKNOWN = 2
UPB_TXTENC_SKIPUNKNOWN = 2,
/* When set, maps are *not* sorted (this avoids allocating tmp mem). */
UPB_TXTENC_NOSORT = 4
};
/* Encodes the given |msg| to text format. The message's reflection is given in

@ -324,6 +324,10 @@ UPB_INLINE int _upb_lg2ceil(int x) {
#endif
}
UPB_INLINE int _upb_lg2ceilsize(int x) {
return 1 << _upb_lg2ceil(x);
}
#include "upb/port_undef.inc"
#ifdef __cplusplus

Loading…
Cancel
Save