Major work on Lua extension and default values.

Default values are now supported, and the Lua extension
can now create and modify individual protobuf objects.
pull/13171/head
Joshua Haberman 14 years ago
parent 0c6786c6fa
commit fd184f0df2
  1. 8
      Makefile
  2. 17
      lang_ext/lua/test.lua
  3. 330
      lang_ext/lua/upb.c
  4. 26
      src/descriptor.h
  5. 126
      src/upb_decoder.c
  6. 4
      src/upb_decoder_x64.asm
  7. 166
      src/upb_def.c
  8. 16
      src/upb_def.h
  9. 16
      src/upb_msg.c
  10. 47
      src/upb_msg.h
  11. 13
      src/upb_string.c
  12. 10
      src/upb_string.h
  13. 1
      src/upbc.c
  14. 18
      tests/test_vs_proto2.cc
  15. 208
      tests/tests.c

@ -94,6 +94,7 @@ TESTS_SRC= \
tests/test_stream.c \
tests/test_string.c \
tests/tests.c \
tests/tests_varint.c \
tests/test_vs_proto2.cc
ALLSRC=$(CORE) $(STREAM) $(BENCHMARKS_SRC) $(TESTS_SRC)
@ -138,11 +139,11 @@ $(LIBUPB_PIC): $(PICOBJ)
# critical path but gets very large when -O3 is used.
src/upb_def.o: src/upb_def.c
$(E) CC $<
$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $<
$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -O0 -c -o $@ $<
src/upb_def.lo: src/upb_def.c
$(E) 'CC -fPIC' $<
$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -Os -c -o $@ $< -fPIC
$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -O0 -c -o $@ $< -fPIC
src/upb_decoder_x64.o: src/upb_decoder_x64.asm
$(E) NASM $<
@ -183,6 +184,7 @@ SIMPLE_TESTS= \
tests/test_string \
tests/test_def \
tests/test_stream \
tests/test_varint \
tests/tests
# tests/test_decoder \
@ -202,7 +204,7 @@ tests/tests: tests/test.proto.pb
$(SIMPLE_TESTS): % : %.c
$(E) CC $<
$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $<
$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) -o $@ $< $(LIBUPB)
VALGRIND=valgrind --leak-check=full --error-exitcode=1
test: tests

@ -18,3 +18,20 @@ symtab:parsedesc(f:read("*all"))
for _, def in ipairs(symtab:getdefs(-1)) do
print(def:name())
end
SpeedMessage1 = symtab:lookup("benchmarks.SpeedMessage1")
print(SpeedMessage1:name())
msg = SpeedMessage1()
-- print(msg.field1)
-- print(msg.field129)
-- print(msg.field271)
-- print(msg.field15.field15)
-- print(msg.field1)
-- print(msg.field1)
-- msg.field1 = "YEAH BABY!"
-- print(msg.field1)
print(msg.field129)
msg.field129 = 5
print(msg.field129)

@ -7,9 +7,20 @@
*/
#include <stdlib.h>
#include <math.h>
#include <float.h>
#include "lauxlib.h"
#include "upb_def.h"
#include "upb_glue.h"
#include "upb_msg.h"
static void lupb_msg_getorcreate(lua_State *L, upb_msg *msg, upb_msgdef *md);
// All the def types share the same C layout, even though they are different Lua
// types with different metatables.
typedef struct {
upb_def *def;
} lupb_def;
void lupb_pushstring(lua_State *L, upb_string *str) {
lua_pushlstring(L, upb_string_getrobuf(str), upb_string_len(str));
@ -30,21 +41,17 @@ void lupb_checkstatus(lua_State *L, upb_status *s) {
upb_status_uninit(s);
}
/* object cache ***************************************************************/
// We cache all the lua objects (userdata) we vend in a weak table, indexed by
// the C pointer of the object they are caching.
typedef void (*lupb_cb)(void *cobj);
static void lupb_nop(void *foo) {
(void)foo;
}
static void lupb_cache_getorcreate(lua_State *L, void *cobj, const char *type,
lupb_cb ref, lupb_cb unref) {
static void *lupb_cache_getorcreate_size(
lua_State *L, void *cobj, const char *type, size_t size) {
// Lookup our cache in the registry (we don't put our objects in the registry
// directly because we need our cache to be a weak table).
void **obj = NULL;
lua_getfield(L, LUA_REGISTRYINDEX, "upb.objcache");
assert(!lua_isnil(L, -1)); // Should have been created by luaopen_upb.
lua_pushlightuserdata(L, cobj);
@ -55,7 +62,7 @@ static void lupb_cache_getorcreate(lua_State *L, void *cobj, const char *type,
lua_pop(L, 1);
// We take advantage of the fact that all of our objects are currently a
// single pointer, and thus have the same layout.
void **obj = lua_newuserdata(L, sizeof(void*));
obj = lua_newuserdata(L, size);
*obj = cobj;
luaL_getmetatable(L, type);
assert(!lua_isnil(L, -1)); // Should have been created by luaopen_upb.
@ -65,44 +72,235 @@ static void lupb_cache_getorcreate(lua_State *L, void *cobj, const char *type,
lua_pushlightuserdata(L, cobj);
lua_pushvalue(L, -2);
lua_rawset(L, -4);
ref(cobj);
} else {
unref(cobj);
}
lua_insert(L, -2);
lua_pop(L, 1);
return obj;
}
// Most types are just 1 pointer and can use this helper.
static bool lupb_cache_getorcreate(lua_State *L, void *cobj, const char *type) {
return lupb_cache_getorcreate_size(L, cobj, type, sizeof(void*)) != NULL;
}
/* lupb_def *******************************************************************/
// All the def types share the same C layout, even though they are different Lua
// types with different metatables.
/* lupb_msg********************************************************************/
// We prefer field access syntax (foo.bar, foo.bar = 5) over method syntax
// (foo:bar(), foo:set_bar(5)) to make messages behave more like regular tables.
// However, there are methods also, like foo:CopyFrom(other_foo) or foo:Clear().
typedef struct {
upb_def *def;
} lupb_def;
upb_msg *msg;
upb_msgdef *msgdef;
} lupb_msg;
static void lupb_def_unref(void *cobj) {
upb_def_unref((upb_def*)cobj);
static lupb_msg *lupb_msg_check(lua_State *L, int narg) {
return luaL_checkudata(L, narg, "upb.msg");
}
static void lupb_def_getorcreate(lua_State *L, upb_def *def) {
const char *type_name;
switch(def->type) {
case UPB_DEF_MSG:
type_name = "upb.msgdef";
static void lupb_msg_pushnew(lua_State *L, upb_msgdef *md) {
upb_msg *msg = upb_msg_new(md);
lupb_msg *m = lupb_cache_getorcreate_size(L, msg, "upb.msg", sizeof(lupb_msg));
assert(m);
m->msgdef = md;
// We need to ensure that the msgdef outlives the msg. This performs an
// atomic ref, if this turns out to be too expensive there are other
// possible approaches, like creating a separate metatable for every
// msgdef that references the msgdef.
upb_msgdef_ref(md);
}
// Caller does *not* pass a ref.
static void lupb_msg_getorcreate(lua_State *L, upb_msg *msg, upb_msgdef *md) {
lupb_msg *m = lupb_cache_getorcreate_size(L, msg, "upb.msg", sizeof(lupb_msg));
if (m) {
// New Lua object, we need to ref the message.
m->msg = upb_msg_getref(msg);
m->msgdef = md;
// See comment above.
upb_msgdef_ref(md);
}
}
static int lupb_msg_gc(lua_State *L) {
lupb_msg *m = lupb_msg_check(L, 1);
upb_msg_unref(m->msg, m->msgdef);
upb_msgdef_unref(m->msgdef);
return 0;
}
static void lupb_pushvalue(lua_State *L, upb_value val, upb_fielddef *f) {
switch (f->type) {
case UPB_TYPE(INT32):
case UPB_TYPE(SINT32):
case UPB_TYPE(SFIXED32):
case UPB_TYPE(ENUM):
lua_pushnumber(L, upb_value_getint32(val)); break;
case UPB_TYPE(INT64):
case UPB_TYPE(SINT64):
case UPB_TYPE(SFIXED64):
lua_pushnumber(L, upb_value_getint64(val)); break;
case UPB_TYPE(UINT32):
case UPB_TYPE(FIXED32):
lua_pushnumber(L, upb_value_getuint32(val)); break;
case UPB_TYPE(UINT64):
case UPB_TYPE(FIXED64):
lua_pushnumber(L, upb_value_getuint64(val)); break;
case UPB_TYPE(DOUBLE):
lua_pushnumber(L, upb_value_getdouble(val)); break;
case UPB_TYPE(FLOAT):
lua_pushnumber(L, upb_value_getfloat(val)); break;
case UPB_TYPE(BOOL):
lua_pushboolean(L, upb_value_getbool(val)); break;
case UPB_TYPE(STRING):
case UPB_TYPE(BYTES): {
upb_string *str = upb_value_getstr(val);
assert(str);
lua_pushlstring(L, upb_string_getrobuf(str), upb_string_len(str)); break;
}
case UPB_TYPE(MESSAGE):
case UPB_TYPE(GROUP): {
upb_msg *msg = upb_value_getmsg(val);
assert(msg);
lupb_msg_getorcreate(L, msg, upb_downcast_msgdef(f->def));
}
}
}
static upb_value lupb_getvalue(lua_State *L, int narg, upb_fielddef *f) {
upb_value val;
lua_Number num;
if (!upb_issubmsg(f) && !upb_isstring(f) && f->type != UPB_TYPE(BOOL)) {
num = luaL_checknumber(L, narg);
if (f->type != UPB_TYPE(DOUBLE) && f->type != UPB_TYPE(FLOAT) &&
num != rint(num)) {
luaL_error(L, "Cannot assign non-integer number %f to integer field", num);
}
}
switch (f->type) {
case UPB_TYPE(INT32):
case UPB_TYPE(SINT32):
case UPB_TYPE(SFIXED32):
case UPB_TYPE(ENUM):
if (num > INT32_MAX || num < INT32_MIN)
luaL_error(L, "Number %f is out-of-range for 32-bit integer field.", num);
upb_value_setint32(&val, num);
break;
case UPB_DEF_ENUM:
type_name = "upb.enumdef";
case UPB_TYPE(INT64):
case UPB_TYPE(SINT64):
case UPB_TYPE(SFIXED64):
if (num > INT64_MAX || num < INT64_MIN)
luaL_error(L, "Number %f is out-of-range for 64-bit integer field.", num);
upb_value_setint64(&val, num);
break;
default:
luaL_error(L, "unknown deftype %d", def->type);
type_name = NULL; // Placate the compiler.
case UPB_TYPE(UINT32):
case UPB_TYPE(FIXED32):
if (num > UINT32_MAX || num < 0)
luaL_error(L, "Number %f is out-of-range for unsigned 32-bit integer field.", num);
upb_value_setuint32(&val, num);
break;
case UPB_TYPE(UINT64):
case UPB_TYPE(FIXED64):
if (num > UINT64_MAX || num < 0)
luaL_error(L, "Number %f is out-of-range for unsigned 64-bit integer field.", num);
upb_value_setuint64(&val, num);
break;
case UPB_TYPE(DOUBLE):
if (num > DBL_MAX || num < -DBL_MAX) {
// This could happen if lua_Number was long double.
luaL_error(L, "Number %f is out-of-range for double field.", num);
}
upb_value_setdouble(&val, num);
break;
case UPB_TYPE(FLOAT):
if (num > FLT_MAX || num < -FLT_MAX)
luaL_error(L, "Number %f is out-of-range for float field.", num);
upb_value_setfloat(&val, num);
break;
case UPB_TYPE(BOOL):
if (!lua_isboolean(L, narg))
luaL_error(L, "Must explicitly pass true or false for boolean fields");
upb_value_setbool(&val, lua_toboolean(L, narg));
break;
case UPB_TYPE(STRING):
case UPB_TYPE(BYTES): {
// TODO: is there any reasonable way to avoid a copy here?
size_t len;
const char *str = luaL_checklstring(L, narg, &len);
upb_value_setstr(&val, upb_strduplen(str, len));
break;
}
case UPB_TYPE(MESSAGE):
case UPB_TYPE(GROUP): {
lupb_msg *m = lupb_msg_check(L, narg);
if (m->msgdef != upb_downcast_msgdef(f->def))
luaL_error(L, "Tried to assign a message of the wrong type.");
upb_value_setmsg(&val, m->msg);
break;
}
}
return val;
}
static int lupb_msg_index(lua_State *L) {
assert(lua_gettop(L) == 2); // __index should always be called with 2 args.
lupb_msg *m = lupb_msg_check(L, 1);
size_t len;
const char *name = luaL_checklstring(L, 2, &len);
upb_string namestr = UPB_STACK_STRING_LEN(name, len);
upb_fielddef *f = upb_msgdef_ntof(m->msgdef, &namestr);
if (f) {
lupb_pushvalue(L, upb_msg_get(m->msg, f), f);
} else {
// It wasn't a field, perhaps it's a method?
lua_getmetatable(L, 1);
lua_pushvalue(L, 2);
lua_rawget(L, -2);
if (lua_isnil(L, -1)) {
luaL_error(L, "%s is not a field name or a method name", name);
}
}
return 1;
}
static int lupb_msg_newindex(lua_State *L) {
assert(lua_gettop(L) == 3); // __newindex should always be called with 3 args.
lupb_msg *m = lupb_msg_check(L, 1);
size_t len;
const char *name = luaL_checklstring(L, 2, &len);
upb_string namestr = UPB_STACK_STRING_LEN(name, len);
upb_fielddef *f = upb_msgdef_ntof(m->msgdef, &namestr);
if (f) {
upb_value val = lupb_getvalue(L, 3, f);
upb_msg_set(m->msg, f, val);
if (upb_isstring(f)) {
upb_string_unref(upb_value_getstr(val));
}
} else {
luaL_error(L, "%s is not a field name", name);
}
lupb_cache_getorcreate(L, def, type_name, lupb_nop, lupb_def_unref);
return 0;
}
static int lupb_msg_clear(lua_State *L) {
lupb_msg *m = lupb_msg_check(L, 1);
upb_msg_clear(m->msg, m->msgdef);
return 0;
}
// msgdef
static const struct luaL_Reg lupb_msg_mm[] = {
{"__gc", lupb_msg_gc},
{"__index", lupb_msg_index},
{"__newindex", lupb_msg_newindex},
// Our __index mm will look up methods if the index isn't a field name.
{"Clear", lupb_msg_clear},
{NULL, NULL}
};
/* lupb_msgdef ****************************************************************/
static upb_msgdef *lupb_msgdef_check(lua_State *L, int narg) {
lupb_def *ldef = luaL_checkudata(L, narg, "upb.msgdef");
@ -115,6 +313,12 @@ static int lupb_msgdef_gc(lua_State *L) {
return 0;
}
static int lupb_msgdef_call(lua_State *L) {
upb_msgdef *md = lupb_msgdef_check(L, 1);
lupb_msg_pushnew(L, md);
return 1;
}
static void lupb_fielddef_getorcreate(lua_State *L, upb_fielddef *f);
static int lupb_msgdef_name(lua_State *L) {
@ -150,6 +354,7 @@ static int lupb_msgdef_fieldbynum(lua_State *L) {
}
static const struct luaL_Reg lupb_msgdef_mm[] = {
{"__call", lupb_msgdef_call},
{"__gc", lupb_msgdef_gc},
{NULL, NULL}
};
@ -161,7 +366,8 @@ static const struct luaL_Reg lupb_msgdef_m[] = {
{NULL, NULL}
};
// enumdef
/* lupb_enumdef ***************************************************************/
static upb_enumdef *lupb_enumdef_check(lua_State *L, int narg) {
lupb_def *ldef = luaL_checkudata(L, narg, "upb.enumdef");
@ -191,18 +397,41 @@ static const struct luaL_Reg lupb_enumdef_m[] = {
};
/* lupb_def *******************************************************************/
static void lupb_def_getorcreate(lua_State *L, upb_def *def, int owned) {
bool created = false;
switch(def->type) {
case UPB_DEF_MSG:
created = lupb_cache_getorcreate(L, def, "upb.msgdef");
break;
case UPB_DEF_ENUM:
created = lupb_cache_getorcreate(L, def, "upb.enumdef");
break;
default:
luaL_error(L, "unknown deftype %d", def->type);
}
if (!owned && created) {
upb_def_ref(def);
} else if (owned && !created) {
upb_def_unref(def);
}
}
/* lupb_fielddef **************************************************************/
typedef struct {
upb_fielddef *field;
} lupb_fielddef;
static void lupb_fielddef_ref(void *cobj) {
upb_def_ref(UPB_UPCAST(((upb_fielddef*)cobj)->msgdef));
}
static void lupb_fielddef_getorcreate(lua_State *L, upb_fielddef *f) {
lupb_cache_getorcreate(L, f, "upb.fielddef", lupb_fielddef_ref, lupb_nop);
bool created = lupb_cache_getorcreate(L, f, "upb.fielddef");
if (created) {
// Need to obtain a ref on this field's msgdef (fielddefs themselves aren't
// refcounted, but they're kept alive by their owning msgdef).
upb_def_ref(UPB_UPCAST(f->msgdef));
}
}
static lupb_fielddef *lupb_fielddef_check(lua_State *L, int narg) {
@ -221,11 +450,9 @@ static int lupb_fielddef_index(lua_State *L) {
} else if (strcmp(str, "label") == 0) {
lua_pushinteger(L, f->field->label);
} else if (strcmp(str, "def") == 0) {
upb_def_ref(f->field->def);
lupb_def_getorcreate(L, f->field->def);
lupb_def_getorcreate(L, f->field->def, false);
} else if (strcmp(str, "msgdef") == 0) {
upb_def_ref(UPB_UPCAST(f->field->msgdef));
lupb_def_getorcreate(L, UPB_UPCAST(f->field->msgdef));
lupb_def_getorcreate(L, UPB_UPCAST(f->field->msgdef), false);
} else {
lua_pushnil(L);
}
@ -264,10 +491,6 @@ static int lupb_symtab_gc(lua_State *L) {
return 0;
}
static void lupb_symtab_unref(void *cobj) {
upb_symtab_unref((upb_symtab*)cobj);
}
static int lupb_symtab_lookup(lua_State *L) {
lupb_symtab *s = lupb_symtab_check(L, 1);
size_t len;
@ -275,7 +498,7 @@ static int lupb_symtab_lookup(lua_State *L) {
upb_string namestr = UPB_STACK_STRING_LEN(name, len);
upb_def *def = upb_symtab_lookup(s->symtab, &namestr);
if (def) {
lupb_def_getorcreate(L, def);
lupb_def_getorcreate(L, def, true);
} else {
lua_pushnil(L);
}
@ -293,7 +516,7 @@ static int lupb_symtab_getdefs(lua_State *L) {
for (int i = 0; i < count; i++) {
upb_def *def = defs[i];
lua_pushnumber(L, i + 1); // 1-based array.
lupb_def_getorcreate(L, def);
lupb_def_getorcreate(L, def, true);
// Add it to our return table.
lua_settable(L, -3);
}
@ -331,13 +554,15 @@ static const struct luaL_Reg lupb_symtab_mm[] = {
static int lupb_symtab_new(lua_State *L) {
upb_symtab *s = upb_symtab_new();
lupb_cache_getorcreate(L, s, "upb.symtab", lupb_nop, lupb_symtab_unref);
bool created = lupb_cache_getorcreate(L, s, "upb.symtab");
(void)created; // For NDEBUG
assert(created); // It's new, there shouldn't be an obj for it already.
return 1;
}
static int lupb_getfdsdef(lua_State *L) {
lupb_cache_getorcreate(
L, upb_getfdsdef(), "upb.msgdef", lupb_nop, lupb_def_unref);
upb_msgdef *fdsdef = upb_getfdsdef(); // Gets a ref on fdsdef.
lupb_def_getorcreate(L, UPB_UPCAST(fdsdef), true);
return 1;
}
@ -357,7 +582,7 @@ static void lupb_register_type(lua_State *L, const char *name,
// Methods go in the mt's __index method. This implies that you can't
// implement __index and also set methods yourself.
luaL_register(L, NULL, m);
lua_setfield(L, -2, "__index");
lua_setfield(L, -2, "__index");
}
lua_pop(L, 1); // The mt.
}
@ -367,8 +592,9 @@ int luaopen_upb(lua_State *L) {
lupb_register_type(L, "upb.enumdef", lupb_enumdef_m, lupb_enumdef_mm);
lupb_register_type(L, "upb.fielddef", NULL, lupb_fielddef_mm);
lupb_register_type(L, "upb.symtab", lupb_symtab_m, lupb_symtab_mm);
lupb_register_type(L, "upb.msg", NULL, lupb_msg_mm);
// Create our object cache. TODO: need to make this table weak!
// Create our object cache.
lua_createtable(L, 0, 0);
lua_createtable(L, 0, 1); // Cache metatable.
lua_pushstring(L, "v"); // Values are weak.

@ -1,26 +0,0 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*
* This file contains declarations for an array that contains the contents
* of descriptor.proto, serialized as a protobuf. xxd is used to create
* the actual definition.
*/
#ifndef UPB_DESCRIPTOR_H_
#define UPB_DESCRIPTOR_H_
#include "upb_string.h"
#ifdef __cplusplus
extern "C" {
#endif
extern upb_string descriptor_str;
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* UPB_DESCRIPTOR_H_ */

@ -1,10 +1,11 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2008-2009 Joshua Haberman. See LICENSE for details.
* Copyright (c) 2008-2011 Joshua Haberman. See LICENSE for details.
*/
#include "upb_decoder.h"
#include "upb_varint_decoder.h"
#include <inttypes.h>
#include <stddef.h>
@ -21,105 +22,6 @@ extern fastdecode_ret upb_fastdecode(const char *p, const char *end,
upb_value_handler_t value_cb, void *closure,
void *table, int table_size);
/* Pure Decoding **************************************************************/
// The key fast-path varint-decoding routine. Here we can assume we have at
// least UPB_MAX_VARINT_ENCODED_SIZE bytes available. There are a lot of
// possibilities for optimization/experimentation here.
#ifdef USE_SSE_VARINT_DECODING
#include <emmintrin.h>
// This works, but is empirically slower than the branchy version below. Why?
// Most varints are very short. Next step: use branches for 1/2-byte varints,
// but use the SSE version for 3-10 byte varints.
INLINE bool upb_decode_varint_fast(const char **ptr, uint64_t *val, upb_status *s) {
const char *p = *ptr;
__m128i val128 = _mm_loadu_si128((void*)p);
unsigned int continuation_bits = _mm_movemask_epi8(val128);
unsigned int bsr_val = ~continuation_bits;
int varint_length = __builtin_ffs(bsr_val);
if (varint_length > 10) {
upb_seterr(s, UPB_ERROR, "Unterminated varint");
return false;
}
uint16_t twob;
memcpy(&twob, p, 2);
twob &= 0x7f7f;
twob = ((twob & 0xff00) >> 1) | (twob & 0xff);
uint64_t eightb;
memcpy(&eightb, p + 2, 8);
eightb &= 0x7f7f7f7f7f7f7f7f;
eightb = ((eightb & 0xff00ff00ff00ff00) >> 1) | (eightb & 0x00ff00ff00ff00ff);
eightb = ((eightb & 0xffff0000ffff0000) >> 2) | (eightb & 0x0000ffff0000ffff);
eightb = ((eightb & 0xffffffff00000000) >> 4) | (eightb & 0x00000000ffffffff);
uint64_t all_bits = twob | (eightb << 14);
int varint_bits = varint_length * 7;
uint64_t mask = varint_bits == 70 ? (uint64_t)-1 : (1ULL << (varint_bits)) - 1;
*val = all_bits & mask;
*ptr = p + varint_length;
return true;
}
#else
INLINE bool upb_decode_varint_fast(const char **ptr, uint64_t *val, upb_status *s) {
const char *p = *ptr;
uint32_t low, high = 0;
uint32_t b;
b = *(p++); low = (b & 0x7f) ; if(!(b & 0x80)) goto done;
b = *(p++); low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done;
b = *(p++); low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done;
b = *(p++); low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done;
b = *(p++); low |= (b & 0x7f) << 28;
high = (b & 0x7f) >> 4; if(!(b & 0x80)) goto done;
b = *(p++); high |= (b & 0x7f) << 3; if(!(b & 0x80)) goto done;
b = *(p++); high |= (b & 0x7f) << 10; if(!(b & 0x80)) goto done;
b = *(p++); high |= (b & 0x7f) << 17; if(!(b & 0x80)) goto done;
b = *(p++); high |= (b & 0x7f) << 24; if(!(b & 0x80)) goto done;
b = *(p++); high |= (b & 0x7f) << 31; if(!(b & 0x80)) goto done;
upb_seterr(s, UPB_ERROR, "Unterminated varint");
return false;
done:
*val = ((uint64_t)high << 32) | low;
*ptr = p;
return true;
}
typedef struct {
const char *newbuf;
uint64_t val;
} retval;
retval upb_decode_varint_fast64(const char *p) {
uint64_t ret;
uint64_t b;
retval r = {(void*)0, 0};
b = *(p++); ret = (b & 0x7f) ; if(!(b & 0x80)) goto done;
b = *(p++); ret |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done;
b = *(p++); ret |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done;
b = *(p++); ret |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done;
b = *(p++); ret |= (b & 0x7f) << 28; if(!(b & 0x80)) goto done;
b = *(p++); ret |= (b & 0x7f) << 35; if(!(b & 0x80)) goto done;
b = *(p++); ret |= (b & 0x7f) << 42; if(!(b & 0x80)) goto done;
b = *(p++); ret |= (b & 0x7f) << 49; if(!(b & 0x80)) goto done;
b = *(p++); ret |= (b & 0x7f) << 56; if(!(b & 0x80)) goto done;
b = *(p++); ret |= (b & 0x7f) << 63; if(!(b & 0x80)) goto done;
return r;
done:
r.val = ret;
r.newbuf = p;
return r;
}
#endif
/* Decoding/Buffering of individual values ************************************/
@ -233,11 +135,13 @@ done:
INLINE bool upb_decode_varint(upb_decoder *d, upb_value *val) {
if (upb_decoder_bufleft(d) >= 16) {
// Common (fast) case.
uint64_t val64;
const char *p = d->ptr;
if (!upb_decode_varint_fast(&p, &val64, d->status)) return false;
upb_decoder_advance(d, p - d->ptr);
upb_value_setraw(val, val64);
upb_decoderet r = upb_decode_varint_fast(d->ptr);
if (r.p == NULL) {
upb_seterr(d->status, UPB_ERROR, "Unterminated varint.\n");
return false;
}
upb_value_setraw(val, r.val);
upb_decoder_advance(d, r.p - d->ptr);
return true;
} else {
return upb_decode_varint_slow(d, val);
@ -352,11 +256,19 @@ void upb_decoder_run(upb_src *src, upb_status *status) {
d->dispatcher.top->handlers.set->value,
d->dispatcher.top->handlers.closure,
d->top->msgdef->itof.array,
d->top->msgdef->itof.array_size);
d->top->msgdef->itof.array_size,
d->tmp);
CHECK_FLOW(ret.flow);
if (ret.ptr - d->ptr > 0) {
DEBUGPRINTF("Fast path parsed %d bytes of data!\n", ret.ptr - d->ptr);
}
d->ptr = ret.ptr;
if (end - d->ptr < 12) {
DEBUGPRINTF("Off the fast path because <12 bytes of data\n");
if (end == d->submsg_end && end != d->end) {
DEBUGPRINTF("Off the fast path because <12 bytes of data, but ONLY because of submsg end.\n");
} else {
DEBUGPRINTF("Off the fast path because <12 bytes of data, NOT because of submsg end.\n");
}
} else {
DEBUGPRINTF("Off the fast path for some other reason.\n");
}

@ -33,7 +33,7 @@ SECTION .text
; Register allocation.
%define BUF rbx ; const char *p, current buf position.
%define END rbp ; const char *end, where the buf ends (either submsg end or buf end)
%define FREE r12 ; unused
%define STRING r12 ; unused
%define FIELDDEF r13 ; upb_fielddef *f, needs to be preserved across varint decoding call.
%define CALLBACK r14
%define CLOSURE r15
@ -143,6 +143,7 @@ _upb_fastdecode:
; Parse arguments into reg vals and stack.
mov BUF, rdi
mov COMMITTED_BUF_SPILL, rdi
mov END, rsi
mov CALLBACK, rdx
mov CLOSURE, rcx
@ -210,7 +211,6 @@ align 16
align 16
.string:
.cant_fast_path:
mov rax, 0 ; UPB_CONTINUE -- continue as before.
.done:

@ -6,9 +6,11 @@
#include <stdlib.h>
#include <stddef.h>
#include <errno.h>
#include "descriptor.c"
#include "descriptor_const.h"
#include "upb_def.h"
#include "upb_msg.h"
#define alignof(t) offsetof(struct { char c; t x; }, x)
@ -261,6 +263,8 @@ struct _upb_defbuilder {
bool saw_number;
bool saw_name;
upb_string *default_string;
upb_fielddef *f;
};
typedef struct _upb_defbuilder upb_defbuilder;
@ -276,12 +280,18 @@ static void upb_defbuilder_init(upb_defbuilder *b) {
upb_status_init(&b->status);
b->stack_len = 0;
b->name = NULL;
b->default_string = NULL;
}
static void upb_defbuilder_uninit(upb_defbuilder *b) {
upb_string_unref(b->name);
upb_status_uninit(&b->status);
upb_deflist_uninit(&b->defs);
upb_string_unref(b->default_string);
while (b->stack_len > 0) {
upb_defbuilder_frame *f = &b->stack[--b->stack_len];
upb_string_unref(f->name);
}
}
static upb_msgdef *upb_defbuilder_top(upb_defbuilder *b) {
@ -587,6 +597,19 @@ upb_string *upb_enumdef_iton(upb_enumdef *def, upb_enumval_t num) {
/* upb_fielddef ***************************************************************/
static void upb_fielddef_free(upb_fielddef *f) {
if (upb_isstring(f) || f->type == UPB_TYPE(ENUM)) {
upb_string_unref(upb_value_getstr(f->default_value));
} else if (upb_issubmsg(f)) {
upb_msg *m = upb_value_getmsg(f->default_value);
assert(m);
// We cheat a bit here. We need to unref msg, but we don't have a reliable
// way of accessing the msgdef (which is required by upb_msg_unref()),
// because f->def may have already been collected as part of a cycle if
// this is an unowned ref. But we know that default messages never contain
// references to other messages, and their only string references are to
// the singleton empty string, so we can safely unref+free msg directly.
if (upb_atomic_unref(&m->refcount)) free(m);
}
upb_string_unref(f->name);
if(f->owned) {
upb_def_unref(f->def);
@ -606,6 +629,109 @@ static upb_flow_t upb_fielddef_startmsg(void *_b) {
return UPB_CONTINUE;
}
// Converts the default value in string "dstr" into "d". Passes a ref on dstr.
// Returns true on success.
static bool upb_fielddef_setdefault(upb_string *dstr, upb_value *d, int type) {
bool success = true;
if (type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES) || type == UPB_TYPE(ENUM)) {
// We'll keep the ref we had on it. We include enums in this case because
// we need the enumdef to resolve the name, but we may not have it yet.
// We'll resolve it later.
if (dstr) {
upb_value_setstr(d, dstr);
} else {
upb_value_setstr(d, upb_emptystring());
}
} else if (type == UPB_TYPE(MESSAGE) || type == UPB_TYPE(GROUP)) {
// We don't expect to get a default value.
upb_string_unref(dstr);
if (dstr != NULL) {
printf("Returning false because I got a default string for a message!\n");
success = false;
}
} else {
// The strto* functions need the string to be NULL-terminated.
char *strz = upb_string_isempty(dstr) ? NULL : upb_string_newcstr(dstr);
char *end;
upb_string_unref(dstr);
switch (type) {
case UPB_TYPE(INT32):
case UPB_TYPE(SINT32):
case UPB_TYPE(SFIXED32):
if (strz) {
long val = strtol(strz, &end, 0);
if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end)
success = false;
else
upb_value_setint32(d, val);
} else {
upb_value_setint32(d, 0);
}
break;
case UPB_TYPE(INT64):
case UPB_TYPE(SINT64):
case UPB_TYPE(SFIXED64):
if (strz) {
upb_value_setint64(d, strtoll(strz, &end, 0));
if (errno == ERANGE || *end) success = false;
} else {
upb_value_setint64(d, 0);
}
break;
case UPB_TYPE(UINT32):
case UPB_TYPE(FIXED32):
if (strz) {
long val = strtoul(strz, &end, 0);
if (val > UINT32_MAX || errno == ERANGE || *end)
success = false;
else
upb_value_setuint32(d, val);
} else {
upb_value_setuint32(d, 0);
}
break;
case UPB_TYPE(UINT64):
case UPB_TYPE(FIXED64):
if (strz) {
upb_value_setuint64(d, strtoull(strz, &end, 0));
if (errno == ERANGE || *end) success = false;
} else {
upb_value_setuint64(d, 0);
}
break;
case UPB_TYPE(DOUBLE):
if (strz) {
upb_value_setdouble(d, strtod(strz, &end));
if (errno == ERANGE || *end) success = false;
} else {
upb_value_setdouble(d, 0.0);
}
break;
case UPB_TYPE(FLOAT):
if (strz) {
upb_value_setfloat(d, strtof(strz, &end));
if (errno == ERANGE || *end) success = false;
} else {
upb_value_setfloat(d, 0.0);
}
break;
case UPB_TYPE(BOOL):
if (!strz || strcmp(strz, "false") == 0)
upb_value_setbool(d, false);
else if (strcmp(strz, "true") == 0)
upb_value_setbool(d, true);
else
success = false;
break;
}
if (!success) {
printf("Returning false on the int conversion path, was trying to convert: %s, type=%d\n", strz, type);
}
free(strz);
}
return success;
}
static upb_flow_t upb_fielddef_endmsg(void *_b) {
upb_defbuilder *b = _b;
upb_fielddef *f = b->f;
@ -619,6 +745,15 @@ static upb_flow_t upb_fielddef_endmsg(void *_b) {
upb_ntof_ent ntof_ent = {{f->name, 0}, f};
upb_inttable_insert(&m->itof, f->number, &itof_ent);
upb_strtable_insert(&m->ntof, &ntof_ent.e);
upb_string *dstr = b->default_string;
b->default_string = NULL;
if (!upb_fielddef_setdefault(dstr, &f->default_value, f->type)) {
// We don't worry too much about giving a great error message since the
// compiler should have ensured this was correct.
upb_seterr(&b->status, UPB_ERROR, "Error converting default value.");
return UPB_BREAK;
}
return UPB_CONTINUE;
}
@ -644,6 +779,12 @@ static upb_flow_t upb_fielddef_value(void *_b, upb_fielddef *f, upb_value val) {
b->f->owned = true;
break;
}
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE_FIELDNUM:
// Have to convert from string to the correct type, but we might not know
// the type yet.
upb_string_unref(b->default_string);
b->default_string = upb_string_getref(upb_value_getstr(val));
break;
}
return UPB_CONTINUE;
}
@ -683,6 +824,7 @@ static upb_flow_t upb_msgdef_startmsg(void *_b) {
upb_atomic_refcount_init(&m->cycle_refcount, 0);
upb_inttable_init(&m->itof, 4, sizeof(upb_itof_ent));
upb_strtable_init(&m->ntof, 4, sizeof(upb_ntof_ent));
m->default_message = NULL;
upb_deflist_push(&b->defs, UPB_UPCAST(m));
upb_defbuilder_startcontainer(b);
return UPB_CONTINUE;
@ -703,7 +845,7 @@ static upb_flow_t upb_msgdef_endmsg(void *_b) {
upb_field_count_t field = 0;
upb_msg_iter i;
for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
sorted_fields[field++]= upb_msg_iter_field(i);
sorted_fields[field++] = upb_msg_iter_field(i);
}
qsort(sorted_fields, n, sizeof(*sorted_fields), upb_compare_fields);
@ -745,6 +887,18 @@ static upb_flow_t upb_msgdef_endmsg(void *_b) {
if (max_align > 0) m->size = upb_align_up(m->size, max_align);
// Create default message instance, an immutable message with all default
// values set (except submessages, which are simply marked as unset). We
// could alternatively leave all set bits unset, but this would make
// upb_msg_get() take its unexpected branch more often for no good reason.
m->default_message = upb_msg_new(m);
for (i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
upb_fielddef *f = upb_msg_iter_field(i);
if (!upb_issubmsg(f) && !f->type == UPB_TYPE(ENUM)) {
upb_msg_set(m->default_message, f, f->default_value);
}
}
upb_defbuilder_endcontainer(b);
return UPB_CONTINUE;
}
@ -802,6 +956,7 @@ static void upb_msgdef_register_DescriptorProto(upb_defbuilder *b,
static void upb_msgdef_free(upb_msgdef *m)
{
upb_msg_unref(m->default_message, m);
upb_msg_iter i;
for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i))
upb_fielddef_free(upb_msg_iter_field(i));
@ -818,6 +973,10 @@ static void upb_msgdef_resolve(upb_msgdef *m, upb_fielddef *f, upb_def *def) {
// We will later make the ref unowned if it is a part of a cycle.
f->owned = true;
upb_def_ref(def);
if (upb_issubmsg(f)) {
upb_msgdef *md = upb_downcast_msgdef(def);
upb_value_setmsg(&f->default_value, upb_msg_getref(md->default_message));
}
}
upb_msg_iter upb_msg_begin(upb_msgdef *m) {
@ -937,7 +1096,8 @@ static bool upb_symtab_findcycles(upb_msgdef *m, int depth, upb_status *status)
}
// Given a table of pending defs "tmptab" and a table of existing defs "symtab",
// resolves all of the unresolved refs for the defs in tmptab.
// resolves all of the unresolved refs for the defs in tmptab. Also resolves
// default values for enumerations and submessages.
bool upb_resolverefs(upb_strtable *tmptab, upb_strtable *symtab,
upb_status *status)
{
@ -1352,7 +1512,7 @@ upb_def *upb_getdescriptordef(upb_string *str) {
// upb itself is corrupt.
abort();
}
upb_def_unref(UPB_UPCAST(def)); // The symtab already holds a ref on it.
upb_msgdef_unref(def); // The symtab already holds a ref on it.
atexit(upb_free_descriptor_symtab);
}
return upb_symtab_resolve(

@ -81,6 +81,9 @@ INLINE void upb_def_unref(upb_def *def) {
if(def && upb_atomic_unref(&def->refcount)) _upb_def_reftozero(def);
}
#define UPB_UPCAST(ptr) (&(ptr)->base)
/* upb_fielddef ***************************************************************/
// A upb_fielddef describes a single field in a message. It isn't a full def
@ -158,6 +161,10 @@ typedef struct _upb_msgdef {
// Tables for looking up fields by number and name.
upb_inttable itof; // int to field
upb_strtable ntof; // name to field
// Immutable msg instance that has all default values set.
// TODO: need a way of making this immutable!
struct _upb_msg *default_message;
} upb_msgdef;
// Hash table entries for looking up fields by name or number.
@ -172,6 +179,13 @@ typedef struct {
upb_fielddef *f;
} upb_ntof_ent;
INLINE void upb_msgdef_unref(upb_msgdef *md) {
upb_def_unref(UPB_UPCAST(md));
}
INLINE void upb_msgdef_ref(upb_msgdef *md) {
upb_def_ref(UPB_UPCAST(md));
}
// Looks up a field by name or number. While these are written to be as fast
// as possible, it will still be faster to cache the results of this lookup if
// possible. These return NULL if no such field is found.
@ -361,8 +375,6 @@ UPB_DOWNCAST_DEF(extdef, EXT);
UPB_DOWNCAST_DEF(unresolveddef, UNRESOLVED);
#undef UPB_DOWNCAST_DEF
#define UPB_UPCAST(ptr) (&(ptr)->base)
#ifdef __cplusplus
} /* extern "C" */
#endif

@ -145,6 +145,22 @@ INLINE void upb_msg_sethas(upb_msg *msg, upb_fielddef *f) {
msg->data[f->set_bit_offset] |= f->set_bit_mask;
}
void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val) {
assert(val.type == upb_field_valuetype(f));
upb_valueptr ptr = _upb_msg_getptr(msg, f);
if (upb_field_ismm(f)) {
// Unref any previous value we may have had there.
upb_value oldval = upb_value_read(ptr, upb_field_valuetype(f));
upb_field_unref(oldval, f);
// Ref the new value.
upb_atomic_refcount_t *refcount = upb_value_getrefcount(val);
if (refcount) upb_atomic_ref(refcount);
}
upb_msg_sethas(msg, f);
return upb_value_write(ptr, val, upb_field_valuetype(f));
}
static upb_valueptr upb_msg_getappendptr(upb_msg *msg, upb_fielddef *f) {
upb_valueptr p = _upb_msg_getptr(msg, f);
if (upb_isarray(f)) {

@ -135,6 +135,7 @@ INLINE void upb_value_write(upb_valueptr ptr, upb_value val,
#undef CASE
}
/* upb_array ******************************************************************/
typedef uint32_t upb_arraylen_t;
@ -172,8 +173,17 @@ INLINE upb_value upb_array_get(upb_array *arr, upb_fielddef *f,
return upb_value_read(_upb_array_getptr(arr, f, i), f->type);
}
/* upb_msg ********************************************************************/
// upb_msg is not self-describing; the upb_msg does not contain a pointer to the
// upb_msgdef. While this makes the API a bit more cumbersome to use, this
// choice was made for a few important reasons:
//
// 1. it would make every message 8 bytes larger on 64-bit platforms. This is
// a high overhead for small messages.
// 2. you would want the msg to own a ref on its msgdef, but this would require
// an atomic operation for every message create or destroy!
struct _upb_msg {
upb_atomic_refcount_t refcount;
uint8_t data[4]; // We allocate the appropriate amount per message.
@ -194,6 +204,11 @@ upb_msg *upb_msg_new(upb_msgdef *md);
INLINE void upb_msg_unref(upb_msg *msg, upb_msgdef *md) {
if (msg && upb_atomic_unref(&msg->refcount)) _upb_msg_free(msg, md);
}
INLINE upb_msg *upb_msg_getref(upb_msg *msg) {
assert(msg);
upb_atomic_ref(&msg->refcount);
return msg;
}
void upb_msg_recycle(upb_msg **msg, upb_msgdef *msgdef);
@ -203,10 +218,40 @@ INLINE bool upb_msg_has(upb_msg *msg, upb_fielddef *f) {
return (msg->data[f->set_bit_offset] & f->set_bit_mask) != 0;
}
// We have several options for handling default values:
// 1. inside upb_msg_clear(), overwrite all values to be their defaults,
// overwriting submessage pointers to point to the default instance again.
// 2. inside upb_msg_get(), test upb_msg_has() and return md->default_value
// if it is not set. upb_msg_clear() only clears the set bits.
// We lazily clear objects if/when we reuse them.
// 3. inside upb_msg_clear(), overwrite all values to be their default,
// and recurse into submessages to set all their values to defaults also.
// 4. as a hybrid of (1) and (3), make each "set bit" tri-state, where it
// can have a value of "unset, but cached sub-message needs to be cleared."
// Like (2) we can cache sub-messages and lazily clear, but primitive values
// can always be returned straight from the message.
//
// (1) is undesirable, because it prevents us from caching sub-objects.
// (2) makes clear() cheaper, but makes get() branchier.
// (3) makes get() less branchy, but makes clear() have worse cache behavior.
// (4) makes get() differently branchy (only returns default from msgdef if
// NON-primitive value is unset), but uses more set bits. It's questionable
// whether it would be a performance improvement.
//
// For the moment we go with (2). Google's protobuf does (3), which is likely
// part of the reason we beat it in some benchmarks.
// For submessages and strings, the returned value is not owned.
INLINE upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) {
return upb_value_read(_upb_msg_getptr(msg, f), upb_field_valuetype(f));
if (upb_msg_has(msg, f)) {
return upb_value_read(_upb_msg_getptr(msg, f), upb_field_valuetype(f));
} else {
return f->default_value;
}
}
void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val);
// Unsets all field values back to their defaults.
INLINE void upb_msg_clear(upb_msg *msg, upb_msgdef *md) {
memset(msg->data, 0, md->set_flags_bytes);

@ -147,4 +147,15 @@ error:
return NULL;
}
void upb_string_noninlinerecycle(upb_string **_str) { return upb_string_recycle(_str); }
upb_string *upb_emptystring() {
static upb_string empty = UPB_STATIC_STRING("");
return &empty;
}
char *upb_string_newcstr(upb_string *str) {
upb_strlen_t len = upb_string_len(str);
char *ret = malloc(len+1);
memcpy(ret, upb_string_getrobuf(str), len);
ret[len] = '\0';
return ret;
}

@ -134,6 +134,9 @@ INLINE upb_string *upb_string_getref(upb_string *str) {
// Returns the length of the string.
INLINE upb_strlen_t upb_string_len(upb_string *str) { return str->len; }
INLINE bool upb_string_isempty(upb_string *str) {
return !str || upb_string_len(str) == 0;
}
// Use to read the bytes of the string. The caller *must* call
// upb_string_endread() after the data has been read. The window between
@ -273,6 +276,10 @@ void upb_string_substr(upb_string *str, upb_string *target_str,
//#endif
#define UPB_STRLIT(str) &(upb_string)UPB_STATIC_STRING(str)
// Returns a singleton empty string.
upb_string *upb_emptystring();
/* upb_string library functions ***********************************************/
// Named like their <string.h> counterparts, these are all safe against buffer
@ -339,6 +346,9 @@ INLINE upb_string *upb_strdupc(const char *src) {
return upb_strduplen(src, strlen(src));
}
// Returns a newly-allocated NULL-terminated copy of str.
char *upb_string_newcstr(upb_string *str);
// Appends 'append' to 's' in-place, resizing s if necessary.
void upb_strcat(upb_string *s, upb_string *append);

@ -12,7 +12,6 @@
#include <inttypes.h>
#include <stdarg.h>
#include <stdlib.h>
#include "descriptor.h"
#include "upb_def.h"
#include "upb_msg.h"
#include "upb_glue.h"

@ -1,20 +1,20 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* A test that verifies that our results are identical to proto2 for a
* given proto type and input protobuf.
*
* Copyright (c) 2011 Joshua Haberman. See LICENSE for details.
*/
#undef NDEBUG /* ensure tests always assert. */
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <google/protobuf/descriptor.h>
#include "upb_decoder.h"
#include "upb_test.h"
#include "upb_def.h"
#include "upb_glue.h"
#include "upb_msg.h"
#include "upb_strstream.h"
int num_assertions = 0;
#define ASSERT(expr) do { \
++num_assertions; \
assert(expr); \
} while(0)
#include MESSAGE_HFILE

@ -1,212 +1,13 @@
#undef NDEBUG /* ensure tests always assert. */
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include "upb_decoder.c"
#include "upb_def.h"
#include "upb_glue.h"
int num_assertions = 0;
#define ASSERT(expr) do { \
++num_assertions; \
assert(expr); \
} while(0)
static void test_get_v_uint64_t()
{
#define TEST(name, bytes, val) {\
upb_status status = UPB_STATUS_INIT; \
const char name[] = bytes "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" ; \
const char *name ## _buf = name; \
uint64_t name ## _val = 0; \
upb_decode_varint_fast(&name ## _buf, &name ## _val, &status); \
ASSERT(upb_ok(&status)); \
ASSERT(name ## _val == val); \
ASSERT(name ## _buf == name + sizeof(name) - 16); /* - 1 for NULL */ \
}
TEST(zero, "\x00", 0ULL);
TEST(one, "\x01", 1ULL);
TEST(twob, "\x81\x14", 0xa01ULL);
TEST(twob, "\x81\x03", 0x181ULL);
TEST(threeb, "\x81\x83\x07", 0x1c181ULL);
TEST(fourb, "\x81\x83\x87\x0f", 0x1e1c181ULL);
TEST(fiveb, "\x81\x83\x87\x8f\x1f", 0x1f1e1c181ULL);
TEST(sixb, "\x81\x83\x87\x8f\x9f\x3f", 0x1f9f1e1c181ULL);
TEST(sevenb, "\x81\x83\x87\x8f\x9f\xbf\x7f", 0x1fdf9f1e1c181ULL);
TEST(eightb, "\x81\x83\x87\x8f\x9f\xbf\xff\x01", 0x3fdf9f1e1c181ULL);
TEST(nineb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x03", 0x303fdf9f1e1c181ULL);
TEST(tenb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x83\x07", 0x8303fdf9f1e1c181ULL);
#undef TEST
char twelvebyte[16] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, 0x01};
const char *twelvebyte_buf = twelvebyte;
uint64_t twelvebyte_val = 0;
upb_status status = UPB_STATUS_INIT;
/* A varint that terminates before hitting the end of the provided buffer,
* but in too many bytes (11 instead of 10). */
upb_decode_varint_fast(&twelvebyte_buf, &twelvebyte_val, &status);
ASSERT(status.code == UPB_ERROR);
upb_status_uninit(&status);
}
#if 0
static void test_get_v_uint32_t()
{
#define TEST(name, bytes, val) {\
upb_status status = UPB_STATUS_INIT; \
const uint8_t name[] = bytes; \
const uint8_t *name ## _buf = name; \
uint32_t name ## _val = 0; \
name ## _buf = upb_get_v_uint32_t(name, name + sizeof(name), &name ## _val, &status); \
ASSERT(upb_ok(&status)); \
ASSERT(name ## _val == val); \
ASSERT(name ## _buf == name + sizeof(name) - 1); /* - 1 for NULL */ \
/* Test NEED_MORE_DATA. */ \
if(sizeof(name) > 2) { \
name ## _buf = upb_get_v_uint32_t(name, name + sizeof(name) - 2, &name ## _val, &status); \
ASSERT(status.code == UPB_STATUS_NEED_MORE_DATA); \
} \
}
TEST(zero, "\x00", 0UL);
TEST(one, "\x01", 1UL);
TEST(twob, "\x81\x03", 0x181UL);
TEST(threeb, "\x81\x83\x07", 0x1c181UL);
TEST(fourb, "\x81\x83\x87\x0f", 0x1e1c181UL);
/* get_v_uint32_t truncates, so all the rest return the same thing. */
TEST(fiveb, "\x81\x83\x87\x8f\x1f", 0xf1e1c181UL);
TEST(sixb, "\x81\x83\x87\x8f\x9f\x3f", 0xf1e1c181UL);
TEST(sevenb, "\x81\x83\x87\x8f\x9f\xbf\x7f", 0xf1e1c181UL);
TEST(eightb, "\x81\x83\x87\x8f\x9f\xbf\xff\x01", 0xf1e1c181UL);
TEST(nineb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x03", 0xf1e1c181UL);
TEST(tenb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x83\x07", 0xf1e1c181UL);
#undef TEST
uint8_t twelvebyte[] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, 0x01};
uint32_t twelvebyte_val = 0;
upb_status status = UPB_STATUS_INIT;
/* A varint that terminates before hitting the end of the provided buffer,
* but in too many bytes (11 instead of 10). */
upb_get_v_uint32_t(twelvebyte, twelvebyte + 12, &twelvebyte_val, &status);
ASSERT(status.code == UPB_ERROR_UNTERMINATED_VARINT);
/* A varint that terminates simultaneously with the end of the provided
* buffer, but in too many bytes (11 instead of 10). */
upb_reset(&status);
upb_get_v_uint32_t(twelvebyte, twelvebyte + 11, &twelvebyte_val, &status);
ASSERT(status.code == UPB_ERROR_UNTERMINATED_VARINT);
/* A varint whose buffer ends on exactly the byte where the varint must
* terminate, but the final byte does not terminate. The absolutely most
* correct return code here is UPB_ERROR_UNTERMINATED_VARINT, because we know
* by this point that the varint does not properly terminate. But we also
* allow a return value of UPB_STATUS_NEED_MORE_DATA here, because it does not
* compromise overall correctness -- clients who supply more data later will
* then receive a UPB_ERROR_UNTERMINATED_VARINT error; clients who have no
* more data to supply will (rightly) conclude that their protobuf is corrupt.
*/
upb_reset(&status);
upb_get_v_uint32_t(twelvebyte, twelvebyte + 10, &twelvebyte_val, &status);
ASSERT(status.code == UPB_ERROR_UNTERMINATED_VARINT ||
status.code == UPB_STATUS_NEED_MORE_DATA);
upb_reset(&status);
upb_get_v_uint32_t(twelvebyte, twelvebyte + 9, &twelvebyte_val, &status);
ASSERT(status.code == UPB_STATUS_NEED_MORE_DATA);
}
static void test_skip_v_uint64_t()
{
#define TEST(name, bytes) {\
upb_status status = UPB_STATUS_INIT; \
const uint8_t name[] = bytes; \
const uint8_t *name ## _buf = name; \
name ## _buf = upb_skip_v_uint64_t(name ## _buf, name + sizeof(name), &status); \
ASSERT(upb_ok(&status)); \
ASSERT(name ## _buf == name + sizeof(name) - 1); /* - 1 for NULL */ \
/* Test NEED_MORE_DATA. */ \
if(sizeof(name) > 2) { \
name ## _buf = upb_skip_v_uint64_t(name, name + sizeof(name) - 2, &status); \
ASSERT(status.code == UPB_STATUS_NEED_MORE_DATA); \
} \
}
TEST(zero, "\x00");
TEST(one, "\x01");
TEST(twob, "\x81\x03");
TEST(threeb, "\x81\x83\x07");
TEST(fourb, "\x81\x83\x87\x0f");
TEST(fiveb, "\x81\x83\x87\x8f\x1f");
TEST(sixb, "\x81\x83\x87\x8f\x9f\x3f");
TEST(sevenb, "\x81\x83\x87\x8f\x9f\xbf\x7f");
TEST(eightb, "\x81\x83\x87\x8f\x9f\xbf\xff\x01");
TEST(nineb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x03");
TEST(tenb, "\x81\x83\x87\x8f\x9f\xbf\xff\x81\x83\x07");
#undef TEST
uint8_t twelvebyte[] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, 0x01};
upb_status status = UPB_STATUS_INIT;
/* A varint that terminates before hitting the end of the provided buffer,
* but in too many bytes (11 instead of 10). */
upb_skip_v_uint64_t(twelvebyte, twelvebyte + 12, &status);
ASSERT(status.code == UPB_ERROR_UNTERMINATED_VARINT);
/* A varint that terminates simultaneously with the end of the provided
* buffer, but in too many bytes (11 instead of 10). */
upb_reset(&status);
upb_skip_v_uint64_t(twelvebyte, twelvebyte + 11, &status);
ASSERT(status.code == UPB_ERROR_UNTERMINATED_VARINT);
/* A varint whose buffer ends on exactly the byte where the varint must
* terminate, but the final byte does not terminate. The absolutely most
* correct return code here is UPB_ERROR_UNTERMINATED_VARINT, because we know
* by this point that the varint does not properly terminate. But we also
* allow a return value of UPB_STATUS_NEED_MORE_DATA here, because it does not
* compromise overall correctness -- clients who supply more data later will
* then receive a UPB_ERROR_UNTERMINATED_VARINT error; clients who have no
* more data to supply will (rightly) conclude that their protobuf is corrupt.
*/
upb_reset(&status);
upb_skip_v_uint64_t(twelvebyte, twelvebyte + 10, &status);
ASSERT(status.code == UPB_ERROR_UNTERMINATED_VARINT ||
status.code == UPB_STATUS_NEED_MORE_DATA);
upb_reset(&status);
upb_skip_v_uint64_t(twelvebyte, twelvebyte + 9, &status);
ASSERT(status.code == UPB_STATUS_NEED_MORE_DATA);
}
static void test_get_f_uint32_t()
{
#define TEST(name, bytes, val) {\
upb_status status = UPB_STATUS_INIT; \
const uint8_t name[] = bytes; \
const uint8_t *name ## _buf = name; \
uint32_t name ## _val = 0; \
name ## _buf = upb_get_f_uint32_t(name ## _buf, name + sizeof(name), &name ## _val, &status); \
ASSERT(upb_ok(&status)); \
ASSERT(name ## _val == val); \
ASSERT(name ## _buf == name + sizeof(name) - 1); /* - 1 for NULL */ \
}
TEST(zero, "\x00\x00\x00\x00", 0x0UL);
TEST(one, "\x01\x00\x00\x00", 0x1UL);
uint8_t threeb[] = {0x00, 0x00, 0x00};
uint32_t threeb_val;
upb_status status = UPB_STATUS_INIT;
upb_get_f_uint32_t(threeb, threeb + sizeof(threeb), &threeb_val, &status);
ASSERT(status.code == UPB_STATUS_NEED_MORE_DATA);
#undef TEST
}
#endif
#include "upb_test.h"
static void test_upb_symtab() {
upb_symtab *s = upb_symtab_new();
upb_symtab_add_descriptorproto(s);
ASSERT(s);
upb_string *descriptor = upb_strreadfile("tests/test.proto.pb");
if(!descriptor) {
@ -240,11 +41,8 @@ static void test_upb_symtab() {
upb_def_ref(def2);
upb_def_unref(def);
upb_def_unref(def2);
}
int main()
{
#define TEST(func) do { \
@ -254,10 +52,6 @@ int main()
printf("ok (%d assertions).\n", num_assertions - assertions_before); \
} while (0)
TEST(test_get_v_uint64_t);
//TEST(test_get_v_uint32_t);
//TEST(test_skip_v_uint64_t);
//TEST(test_get_f_uint32_t);
TEST(test_upb_symtab);
printf("All tests passed (%d assertions).\n", num_assertions);
return 0;

Loading…
Cancel
Save