Tons of work: we're close to passing test_vs_proto2 again.

pull/13171/head
Joshua Haberman 14 years ago
parent e170259e4a
commit 3affb31926
  1. 7
      Makefile
  2. 97
      core/upb.h
  3. 2
      core/upb_def.c
  4. 38
      core/upb_glue.c
  5. 42
      core/upb_glue.h
  6. 192
      core/upb_msg.c
  7. 133
      core/upb_msg.h
  8. 15
      core/upb_stream_vtbl.h
  9. 2
      core/upb_string.c
  10. 42
      stream/upb_decoder.c
  11. 38
      stream/upb_decoder.h
  12. 44
      stream/upb_strstream.c
  13. 12
      stream/upb_strstream.h
  14. 9
      tests/test_decoder.c
  15. 79
      tests/test_vs_proto2.cc

@ -72,6 +72,7 @@ STREAM= \
stream/upb_textprinter.c \
stream/upb_strstream.c \
core/upb_msg.c \
core/upb_glue.c \
SRC=$(CORE) $(STREAM)
@ -121,9 +122,9 @@ TESTS=tests/test_string \
tests/test_table \
tests/test_def \
tests/test_stream \
tests/test_decoder \
# tests/t.test_vs_proto2.googlemessage1 \
# tests/t.test_vs_proto2.googlemessage2 \
tests/t.test_vs_proto2.googlemessage1 \
tests/t.test_vs_proto2.googlemessage2 \
# tests/test_decoder \
# tests/test.proto.pb
tests: $(LIBUPB) $(TESTS)

@ -204,97 +204,6 @@ INLINE upb_atomic_refcount_t *upb_value_getrefcount(upb_value val) {
return val.val.refcount;
}
// A pointer to a .proto value. The owner must have an out-of-band way of
// knowing the type, so it knows which union member to use.
typedef union {
double *_double;
float *_float;
int32_t *int32;
int64_t *int64;
uint8_t *uint8;
uint32_t *uint32;
uint64_t *uint64;
bool *_bool;
upb_string **str;
upb_msg **msg;
upb_array **arr;
void *_void;
} upb_valueptr;
INLINE upb_valueptr upb_value_addrof(upb_value *val) {
upb_valueptr ptr = {&val->val._double};
return ptr;
}
// Reads or writes a upb_value from an address represented by a upb_value_ptr.
// We need to know the value type to perform this operation, because we need to
// know how much memory to copy (and for big-endian machines, we need to know
// where in the upb_value the data goes).
//
// For little endian-machines where we didn't mind overreading, we could make
// upb_value_read simply use memcpy().
INLINE upb_value upb_value_read(upb_valueptr ptr, upb_fieldtype_t ft) {
upb_value val;
#define CASE(t, member_name) \
case UPB_TYPE(t): val.val.member_name = *ptr.member_name; break;
switch(ft) {
CASE(DOUBLE, _double)
CASE(FLOAT, _float)
CASE(INT32, int32)
CASE(INT64, int64)
CASE(UINT32, uint32)
CASE(UINT64, uint64)
CASE(SINT32, int32)
CASE(SINT64, int64)
CASE(FIXED32, uint32)
CASE(FIXED64, uint64)
CASE(SFIXED32, int32)
CASE(SFIXED64, int64)
CASE(BOOL, _bool)
CASE(ENUM, int32)
CASE(STRING, str)
CASE(BYTES, str)
CASE(MESSAGE, msg)
CASE(GROUP, msg)
default: break;
}
return val;
#undef CASE
}
INLINE void upb_value_write(upb_valueptr ptr, upb_value val,
upb_fieldtype_t ft) {
#define CASE(t, member_name) \
case UPB_TYPE(t): *ptr.member_name = val.val.member_name; break;
switch(ft) {
CASE(DOUBLE, _double)
CASE(FLOAT, _float)
CASE(INT32, int32)
CASE(INT64, int64)
CASE(UINT32, uint32)
CASE(UINT64, uint64)
CASE(SINT32, int32)
CASE(SINT64, int64)
CASE(FIXED32, uint32)
CASE(FIXED64, uint64)
CASE(SFIXED32, int32)
CASE(SFIXED64, int64)
CASE(BOOL, _bool)
CASE(ENUM, int32)
CASE(STRING, str)
CASE(BYTES, str)
CASE(MESSAGE, msg)
CASE(GROUP, msg)
default: break;
}
#undef CASE
}
// Status codes used as a return value. Codes >0 are not fatal and can be
// resumed.
enum upb_status_code {
@ -317,10 +226,12 @@ enum upb_status_code {
// TODO: consider adding error space and code, to let ie. errno be stored
// as a proper code, or application-specific error codes.
typedef struct {
struct _upb_status {
char code;
upb_string *str;
} upb_status;
};
typedef struct _upb_status upb_status;
#define UPB_STATUS_INIT {UPB_OK, NULL}
#define UPB_ERRORMSG_MAXLEN 256

@ -1019,6 +1019,7 @@ bool upb_symtab_add_defs(upb_symtab *s, upb_def **defs, int num_defs,
upb_symtab_ent *tmptab_e;
for(tmptab_e = upb_strtable_begin(&tmptab); tmptab_e;
tmptab_e = upb_strtable_next(&tmptab, &tmptab_e->e)) {
printf("Inserting def for " UPB_STRFMT "\n", UPB_STRARG(tmptab_e->def->fqname));
upb_symtab_ent *symtab_e =
upb_strtable_lookup(&s->symtab, tmptab_e->def->fqname);
if(symtab_e) {
@ -1132,6 +1133,7 @@ void upb_symtab_addfds(upb_symtab *s, upb_src *src, upb_status *status)
if (upb_ok(status))
upb_symtab_add_defs(s, b.defs.defs, b.defs.len, false, status);
upb_defbuilder_uninit(&b);
upb_handlers_uninit(&handlers);
}

@ -0,0 +1,38 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2010 Joshua Haberman. See LICENSE for details.
*/
#include "upb_glue.h"
#include "upb_msg.h"
#include "upb_decoder.h"
#include "upb_strstream.h"
void upb_strtomsg(upb_string *str, upb_msg *msg, upb_msgdef *md,
upb_status *status) {
upb_stringsrc strsrc;
upb_stringsrc_init(&strsrc);
upb_stringsrc_reset(&strsrc, str);
upb_decoder d;
upb_decoder_init(&d, md);
upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc));
upb_src *src = upb_decoder_src(&d);
upb_msgpopulator p;
upb_msgpopulator_init(&p);
upb_msgpopulator_reset(&p, msg, md);
upb_handlers h;
upb_handlers_init(&h);
upb_msgpopulator_register_handlers(&p, &h);
upb_src_sethandlers(src, &h);
upb_src_run(src, status);
upb_stringsrc_uninit(&strsrc);
upb_decoder_uninit(&d);
upb_msgpopulator_uninit(&p);
upb_handlers_uninit(&h);
}

@ -0,0 +1,42 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* upb's core components like upb_decoder and upb_msg are carefully designed to
* avoid depending on each other for maximum orthogonality. In other words,
* you can use a upb_decoder to decode into *any* kind of structure; upb_msg is
* just one such structure. You can use upb_decoder without having to link in
* upb_msg.
*
* However, for convenience we provide functions here for doing common
* operations like deserializing protobuf binary format into a upb_msg. The
* compromise is that this file drags in almost all of upb as a dependency,
* which could be undesirable if you're trying to use a trimmed-down build of
* upb.
*
* Copyright (c) 2011 Joshua Haberman. See LICENSE for details.
*/
#ifndef UPB_GLUE_H
#define UPB_GLUE_H
#ifdef __cplusplus
extern "C" {
#endif
// Forward-declares so we don't have to include everything in this .h file.
// Clients should use the regular, typedef'd names (eg. upb_string).
struct _upb_string;
struct _upb_msg;
struct _upb_msgdef;
struct _upb_status;
// Decodes the given string, which must be in protobuf binary format, to the
// given upb_msg with msgdef "md", storing the status of the operation in "s".
void upb_strtomsg(struct _upb_string *str, struct _upb_msg *msg,
struct _upb_msgdef *md, struct _upb_status *s);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif

@ -10,6 +10,18 @@
#include "upb_decoder.h"
#include "upb_strstream.h"
static uint32_t upb_round_up_pow2(uint32_t v) {
// http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
v--;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
v++;
return v;
}
static void upb_elem_free(upb_value v, upb_fielddef *f) {
switch(f->type) {
case UPB_TYPE(MESSAGE):
@ -47,6 +59,58 @@ static void upb_field_unref(upb_value v, upb_fielddef *f) {
upb_field_free(v, f);
}
/* upb_array ******************************************************************/
upb_array *upb_array_new(void) {
upb_array *arr = malloc(sizeof(*arr));
upb_atomic_refcount_init(&arr->refcount, 1);
arr->size = 0;
arr->len = 0;
arr->ptr = NULL;
return arr;
}
void upb_array_recycle(upb_array **_arr, upb_fielddef *f) {
upb_array *arr = *_arr;
if(arr && upb_atomic_read(&arr->refcount) == 1) {
arr->len = 0;
} else {
upb_array_unref(arr, f);
*_arr = upb_array_new();
}
}
void _upb_array_free(upb_array *arr, upb_fielddef *f) {
if (upb_elem_ismm(f)) {
// Need to release refs on sub-objects.
upb_valuetype_t type = upb_elem_valuetype(f);
for (upb_arraylen_t i = 0; i < arr->size; i++) {
upb_valueptr p = _upb_array_getptr(arr, f, i);
upb_elem_unref(upb_value_read(p, type), f);
}
}
free(arr->ptr);
free(arr);
}
void upb_array_resize(upb_array *arr, upb_fielddef *f, upb_arraylen_t len) {
size_t type_size = upb_types[f->type].size;
upb_arraylen_t old_size = arr->size;
if (old_size < len) {
// Need to resize.
size_t new_size = upb_round_up_pow2(len);
arr->ptr = realloc(arr->ptr, new_size * type_size);
arr->size = new_size;
memset(arr->ptr + (old_size * type_size), 0,
(new_size - old_size) * type_size);
}
arr->len = len;
}
/* upb_msg ********************************************************************/
upb_msg *upb_msg_new(upb_msgdef *md) {
upb_msg *msg = malloc(md->size);
// Clear all set bits and cached pointers.
@ -67,35 +131,123 @@ void _upb_msg_free(upb_msg *msg, upb_msgdef *md) {
free(msg);
}
void upb_msg_recycle(upb_msg **_msg, upb_msgdef *msgdef) {
upb_msg *msg = *_msg;
if(msg && upb_atomic_read(&msg->refcount) == 1) {
upb_msg_clear(msg, msgdef);
} else {
upb_msg_unref(msg, msgdef);
*_msg = upb_msg_new(msgdef);
}
}
INLINE void upb_msg_sethas(upb_msg *msg, upb_fielddef *f) {
msg->data[f->field_index/8] |= (1 << (f->field_index % 8));
}
static upb_valueptr upb_msg_getappendptr(upb_msg *msg, upb_fielddef *f) {
upb_valueptr p = _upb_msg_getptr(msg, f);
if (upb_isarray(f)) {
// Create/recycle/resize the array if necessary, and find a pointer to
// a newly-appended element.
if (!upb_msg_has(msg, f)) {
upb_array_recycle(p.arr, f);
upb_msg_sethas(msg, f);
}
assert(*p.arr != NULL);
upb_arraylen_t oldlen = upb_array_len(*p.arr);
upb_array_resize(*p.arr, f, oldlen + 1);
p = _upb_array_getptr(*p.arr, f, oldlen);
}
return p;
}
upb_array *upb_array_new(void) {
upb_array *arr = malloc(sizeof(*arr));
upb_atomic_refcount_init(&arr->refcount, 1);
arr->size = 0;
arr->len = 0;
arr->elements._void = NULL;
return arr;
static void upb_msg_appendval(upb_msg *msg, upb_fielddef *f, upb_value val) {
upb_valueptr p = upb_msg_getappendptr(msg, f);
if (upb_isstring(f)) {
// We do:
// - upb_string_recycle(), upb_string_substr() instead of
// - upb_string_unref(), upb_string_getref()
// because we have a convenient place of caching these string objects for
// reuse, where as the upb_src who is sending us these strings may not.
// This saves the upb_src from allocating new upb_strings all the time to
// give us.
//
// If you were using this to copy one upb_msg to another this would
// allocate string objects whereas a upb_string_getref could have avoided
// those allocations completely; if this is an issue, we could make it an
// option of the upb_msgpopulator which behavior is desired.
upb_string *src = upb_value_getstr(val);
upb_string_recycle(p.str);
upb_string_substr(*p.str, src, 0, upb_string_len(src));
} else {
upb_value_write(p, val, f->type);
}
}
void _upb_array_free(upb_array *arr, upb_fielddef *f) {
if (upb_elem_ismm(f)) {
// Need to release refs on sub-objects.
upb_valuetype_t type = upb_elem_valuetype(f);
for (upb_arraylen_t i = 0; i < arr->size; i++) {
upb_valueptr p = _upb_array_getptr(arr, f, i);
upb_elem_unref(upb_value_read(p, type), f);
}
upb_msg *upb_msg_appendmsg(upb_msg *msg, upb_fielddef *f, upb_msgdef *msgdef) {
upb_valueptr p = upb_msg_getappendptr(msg, f);
if (upb_isarray(f) || !upb_msg_has(msg, f)) {
upb_msg_recycle(p.msg, msgdef);
upb_msg_clear(*p.msg, msgdef);
upb_msg_sethas(msg, f);
}
if (arr->elements._void) free(arr->elements._void);
free(arr);
return *p.msg;
}
void upb_msg_register_handlers(upb_msg *msg, upb_msgdef *md,
upb_handlers *handlers, bool merge) {
static upb_handlerset handlerset = {
/* upb_msgpopulator ***********************************************************/
void upb_msgpopulator_init(upb_msgpopulator *p) {
upb_status_init(&p->status);
}
void upb_msgpopulator_reset(upb_msgpopulator *p, upb_msg *m, upb_msgdef *md) {
p->top = p->stack;
p->limit = p->stack + sizeof(p->stack);
p->top->msg = m;
p->top->msgdef = md;
}
void upb_msgpopulator_uninit(upb_msgpopulator *p) {
upb_status_uninit(&p->status);
}
static upb_flow_t upb_msgpopulator_value(void *_p, upb_fielddef *f, upb_value val) {
upb_msgpopulator *p = _p;
upb_msg_appendval(p->top->msg, f, val);
return UPB_CONTINUE;
}
static upb_flow_t upb_msgpopulator_startsubmsg(void *_p, upb_fielddef *f,
upb_handlers *delegate_to) {
upb_msgpopulator *p = _p;
(void)delegate_to;
upb_msg *oldmsg = p->top->msg;
if (++p->top == p->limit) {
upb_seterr(&p->status, UPB_ERROR, "Exceeded maximum nesting");
return UPB_BREAK;
}
upb_msgdef *msgdef = upb_downcast_msgdef(f->def);
p->top->msgdef = msgdef;
p->top->msg = upb_msg_appendmsg(oldmsg, f, msgdef);
return UPB_CONTINUE;
}
static upb_flow_t upb_msgpopulator_endsubmsg(void *_p) {
upb_msgpopulator *p = _p;
--p->top;
return UPB_CONTINUE;
}
void upb_msgpopulator_register_handlers(upb_msgpopulator *p, upb_handlers *h) {
static upb_handlerset handlerset = {
NULL, // startmsg
NULL, // endmsg
&upb_msgpopulator_value,
&upb_msgpopulator_startsubmsg,
&upb_msgpopulator_endsubmsg,
};
upb_register_handlerset(h, &handlerset);
upb_set_handler_closure(h, p, &p->status);
}

@ -23,21 +23,119 @@
extern "C" {
#endif
// A pointer to a .proto value. The owner must have an out-of-band way of
// knowing the type, so it knows which union member to use.
typedef union {
double *_double;
float *_float;
int32_t *int32;
int64_t *int64;
uint8_t *uint8;
uint32_t *uint32;
uint64_t *uint64;
bool *_bool;
upb_string **str;
upb_msg **msg;
upb_array **arr;
void *_void;
} upb_valueptr;
INLINE upb_valueptr upb_value_addrof(upb_value *val) {
upb_valueptr ptr = {&val->val._double};
return ptr;
}
// Reads or writes a upb_value from an address represented by a upb_value_ptr.
// We need to know the value type to perform this operation, because we need to
// know how much memory to copy (and for big-endian machines, we need to know
// where in the upb_value the data goes).
//
// For little endian-machines where we didn't mind overreading, we could make
// upb_value_read simply use memcpy().
INLINE upb_value upb_value_read(upb_valueptr ptr, upb_fieldtype_t ft) {
upb_value val;
#ifdef NDEBUG
#define CASE(t, member_name) \
case UPB_TYPE(t): val.val.member_name = *ptr.member_name; break;
#else
#define CASE(t, member_name) \
case UPB_TYPE(t): val.val.member_name = *ptr.member_name; val.type = ft; break;
#endif
switch(ft) {
CASE(DOUBLE, _double)
CASE(FLOAT, _float)
CASE(INT32, int32)
CASE(INT64, int64)
CASE(UINT32, uint32)
CASE(UINT64, uint64)
CASE(SINT32, int32)
CASE(SINT64, int64)
CASE(FIXED32, uint32)
CASE(FIXED64, uint64)
CASE(SFIXED32, int32)
CASE(SFIXED64, int64)
CASE(BOOL, _bool)
CASE(ENUM, int32)
CASE(STRING, str)
CASE(BYTES, str)
CASE(MESSAGE, msg)
CASE(GROUP, msg)
default: assert(false);
}
return val;
#undef CASE
}
INLINE void upb_value_write(upb_valueptr ptr, upb_value val,
upb_fieldtype_t ft) {
assert(val.type == ft);
#define CASE(t, member_name) \
case UPB_TYPE(t): *ptr.member_name = val.val.member_name; break;
switch(ft) {
CASE(DOUBLE, _double)
CASE(FLOAT, _float)
CASE(INT32, int32)
CASE(INT64, int64)
CASE(UINT32, uint32)
CASE(UINT64, uint64)
CASE(SINT32, int32)
CASE(SINT64, int64)
CASE(FIXED32, uint32)
CASE(FIXED64, uint64)
CASE(SFIXED32, int32)
CASE(SFIXED64, int64)
CASE(BOOL, _bool)
CASE(ENUM, int32)
CASE(STRING, str)
CASE(BYTES, str)
CASE(MESSAGE, msg)
CASE(GROUP, msg)
default: assert(false);
}
#undef CASE
}
/* upb_array ******************************************************************/
typedef uint32_t upb_arraylen_t;
struct _upb_array {
upb_atomic_refcount_t refcount;
// "len" and "size" are measured in elements, not bytes.
upb_arraylen_t len;
upb_arraylen_t size;
upb_valueptr elements;
char *ptr;
};
void _upb_array_free(upb_array *a, upb_fielddef *f);
INLINE upb_valueptr _upb_array_getptr(upb_array *a, upb_fielddef *f,
uint32_t elem) {
upb_valueptr p;
p._void = &a->elements.uint8[elem * upb_types[f->type].size];
p._void = &a->ptr[elem * upb_types[f->type].size];
return p;
}
@ -47,10 +145,18 @@ INLINE void upb_array_unref(upb_array *a, upb_fielddef *f) {
if (upb_atomic_unref(&a->refcount)) _upb_array_free(a, f);
}
void upb_array_recycle(upb_array **arr, upb_fielddef *f);
INLINE uint32_t upb_array_len(upb_array *a) {
return a->len;
}
INLINE upb_value upb_array_get(upb_array *arr, upb_fielddef *f,
upb_arraylen_t i) {
assert(i < upb_array_len(arr));
return upb_value_read(_upb_array_getptr(arr, f, i), f->type);
}
/* upb_msg ********************************************************************/
struct _upb_msg {
@ -74,20 +180,37 @@ INLINE void upb_msg_unref(upb_msg *msg, upb_msgdef *md) {
if (msg && upb_atomic_unref(&msg->refcount)) _upb_msg_free(msg, md);
}
void upb_msg_recycle(upb_msg **msg, upb_msgdef *msgdef);
// Tests whether the given field is explicitly set, or whether it will return a
// default.
INLINE bool upb_msg_has(upb_msg *msg, upb_fielddef *f) {
return (msg->data[f->field_index/8] & (1 << (f->field_index % 8))) != 0;
}
INLINE upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) {
return upb_value_read(_upb_msg_getptr(msg, f), f->type);
}
// Unsets all field values back to their defaults.
INLINE void upb_msg_clear(upb_msg *msg, upb_msgdef *md) {
memset(msg->data, 0, md->set_flags_bytes);
}
// Registers a set of handlers that will populate this msgdef.
void upb_msg_register_handlers(upb_msg *msg, upb_msgdef *md,
upb_handlers *handlers);
typedef struct {
upb_msg *msg;
upb_msgdef *msgdef;
} upb_msgpopulator_frame;
typedef struct {
upb_msgpopulator_frame stack[UPB_MAX_NESTING], *top, *limit;
upb_status status;
} upb_msgpopulator;
void upb_msgpopulator_init(upb_msgpopulator *p);
void upb_msgpopulator_uninit(upb_msgpopulator *p);
void upb_msgpopulator_reset(upb_msgpopulator *p, upb_msg *m, upb_msgdef *md);
void upb_msgpopulator_register_handlers(upb_msgpopulator *p, upb_handlers *h);
#ifdef __cplusplus
} /* extern "C" */

@ -62,14 +62,10 @@ typedef struct {
struct _upb_bytesrc {
upb_bytesrc_vtbl *vtbl;
upb_status status;
bool eof;
};
struct _upb_bytesink {
upb_bytesink_vtbl *vtbl;
upb_status status;
bool eof;
};
struct _upb_src {
@ -78,14 +74,10 @@ struct _upb_src {
INLINE void upb_bytesrc_init(upb_bytesrc *s, upb_bytesrc_vtbl *vtbl) {
s->vtbl = vtbl;
s->eof = false;
upb_status_init(&s->status);
}
INLINE void upb_bytesink_init(upb_bytesink *s, upb_bytesink_vtbl *vtbl) {
s->vtbl = vtbl;
s->eof = false;
upb_status_init(&s->status);
}
INLINE void upb_src_init(upb_src *s, upb_src_vtbl *vtbl) {
@ -132,9 +124,6 @@ INLINE bool upb_bytesrc_getfullstr(upb_bytesrc *src, upb_string *str,
return true;
}
INLINE upb_status *upb_bytesrc_status(upb_bytesrc *src) { return &src->status; }
INLINE bool upb_bytesrc_eof(upb_bytesrc *src) { return src->eof; }
// upb_bytesink
INLINE upb_strlen_t upb_bytesink_write(upb_bytesink *sink, void *buf,
@ -146,10 +135,6 @@ INLINE upb_strlen_t upb_bytesink_putstr(upb_bytesink *sink, upb_string *str, upb
return sink->vtbl->putstr(sink, str, status);
}
INLINE upb_status *upb_bytesink_status(upb_bytesink *sink) {
return &sink->status;
}
INLINE upb_strlen_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status, const char *fmt, ...) {
va_list args;
va_start(args, fmt);

@ -56,7 +56,7 @@ static void upb_string_release(upb_string *str) {
}
void _upb_string_free(upb_string *str) {
if(str->cached_mem) free(str->cached_mem);
free(str->cached_mem);
upb_string_release(str);
free(str);
}

@ -47,36 +47,6 @@ done:
INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
// The decoder keeps a stack with one entry per level of recursion.
// upb_decoder_frame is one frame of that stack.
typedef struct {
upb_msgdef *msgdef;
size_t end_offset; // For groups, 0.
} upb_decoder_frame;
struct upb_decoder {
// Immutable state of the decoder.
upb_src src;
upb_dispatcher dispatcher;
upb_bytesrc *bytesrc;
upb_msgdef *toplevel_msgdef;
upb_decoder_frame stack[UPB_MAX_NESTING];
// Mutable state of the decoder.
// Where we will store any errors that occur.
upb_status *status;
// Stack entries store the offset where the submsg ends (for groups, 0).
upb_decoder_frame *top, *limit;
// Current input buffer.
upb_string *buf;
// The offset within the overall stream represented by the *beginning* of buf.
size_t buf_stream_offset;
};
typedef struct {
// Our current position in the data buffer.
const char *ptr;
@ -279,8 +249,8 @@ void upb_decoder_run(upb_src *src, upb_status *status) {
upb_string *str = NULL;
// TODO: handle UPB_SKIPSUBMSG
#define CHECK_FLOW(expr) if ((expr) != UPB_CONTINUE) goto err
#define CHECK(expr) if (!expr) goto err;
#define CHECK_FLOW(expr) if ((expr) == UPB_BREAK) { assert(!upb_ok(status)); goto err; }
#define CHECK(expr) if (!expr) { assert(!upb_ok(status)); goto err; }
CHECK_FLOW(upb_dispatch_startmsg(&d->dispatcher));
@ -300,6 +270,7 @@ void upb_decoder_run(upb_src *src, upb_status *status) {
if (!upb_decode_tag(d, &state, &tag)) {
if (status->code == UPB_EOF && d->top == d->stack) {
// Normal end-of-file.
printf("Normal end-of-file!\n");
upb_clearerr(status);
CHECK_FLOW(upb_dispatch_endmsg(&d->dispatcher));
upb_string_unref(str);
@ -397,18 +368,16 @@ void upb_decoder_sethandlers(upb_src *src, upb_handlers *handlers) {
d->top->end_offset = 0;
}
upb_decoder *upb_decoder_new(upb_msgdef *msgdef) {
void upb_decoder_init(upb_decoder *d, upb_msgdef *msgdef) {
static upb_src_vtbl vtbl = {
&upb_decoder_sethandlers,
&upb_decoder_run,
};
upb_decoder *d = malloc(sizeof(*d));
upb_src_init(&d->src, &vtbl);
upb_dispatcher_init(&d->dispatcher);
d->toplevel_msgdef = msgdef;
d->limit = &d->stack[UPB_MAX_NESTING];
d->buf = NULL;
return d;
}
void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) {
@ -421,9 +390,8 @@ void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) {
d->buf = NULL;
}
void upb_decoder_free(upb_decoder *d) {
void upb_decoder_uninit(upb_decoder *d) {
upb_string_unref(d->buf);
free(d);
}
upb_src *upb_decoder_src(upb_decoder *d) { return &d->src; }

@ -27,14 +27,44 @@ extern "C" {
/* upb_decoder *****************************************************************/
// The decoder keeps a stack with one entry per level of recursion.
// upb_decoder_frame is one frame of that stack.
typedef struct {
upb_msgdef *msgdef;
size_t end_offset; // For groups, 0.
} upb_decoder_frame;
struct _upb_decoder {
// Immutable state of the decoder.
upb_src src;
upb_dispatcher dispatcher;
upb_bytesrc *bytesrc;
upb_msgdef *toplevel_msgdef;
upb_decoder_frame stack[UPB_MAX_NESTING];
// Mutable state of the decoder.
// Where we will store any errors that occur.
upb_status *status;
// Stack entries store the offset where the submsg ends (for groups, 0).
upb_decoder_frame *top, *limit;
// Current input buffer.
upb_string *buf;
// The offset within the overall stream represented by the *beginning* of buf.
size_t buf_stream_offset;
};
// A upb_decoder decodes the binary protocol buffer format, writing the data it
// decodes to a upb_sink.
struct upb_decoder;
typedef struct upb_decoder upb_decoder;
struct _upb_decoder;
typedef struct _upb_decoder upb_decoder;
// Allocates and frees a upb_decoder, respectively.
upb_decoder *upb_decoder_new(upb_msgdef *md);
void upb_decoder_free(upb_decoder *d);
void upb_decoder_init(upb_decoder *d, upb_msgdef *md);
void upb_decoder_uninit(upb_decoder *d);
// Resets the internal state of an already-allocated decoder. This puts it in a
// state where it has not seen any data, and expects the next data to be from

@ -9,25 +9,6 @@
#include <stdlib.h>
#include "upb_string.h"
struct upb_stringsrc {
upb_bytesrc bytesrc;
upb_string *str;
upb_strlen_t offset;
};
void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str) {
if (str != s->str) {
if (s->str) upb_string_unref(s->str);
s->str = upb_string_getref(str);
}
s->bytesrc.eof = false;
}
void upb_stringsrc_free(upb_stringsrc *s) {
if (s->str) upb_string_unref(s->str);
free(s);
}
static upb_strlen_t upb_stringsrc_read(upb_bytesrc *_src, void *buf,
upb_strlen_t count, upb_status *status) {
upb_stringsrc *src = (upb_stringsrc*)_src;
@ -45,27 +26,40 @@ static upb_strlen_t upb_stringsrc_read(upb_bytesrc *_src, void *buf,
static bool upb_stringsrc_getstr(upb_bytesrc *_src, upb_string *str,
upb_status *status) {
upb_stringsrc *src = (upb_stringsrc*)_src;
if (src->offset == upb_string_len(str)) {
if (src->offset == upb_string_len(src->str)) {
upb_seterr(status, UPB_EOF, "");
return false;
} else {
upb_string_substr(str, src->str, 0, upb_string_len(src->str));
upb_strlen_t len = upb_string_len(src->str) - src->offset;
upb_string_substr(str, src->str, src->offset, len);
src->offset += len;
assert(src->offset == upb_string_len(src->str));
return true;
}
}
upb_stringsrc *upb_stringsrc_new() {
void upb_stringsrc_init(upb_stringsrc *s) {
static upb_bytesrc_vtbl bytesrc_vtbl = {
upb_stringsrc_read,
upb_stringsrc_getstr,
};
upb_stringsrc *s = malloc(sizeof(*s));
s->str = NULL;
upb_bytesrc_init(&s->bytesrc, &bytesrc_vtbl);
return s;
}
void upb_stringsrc_reset(upb_stringsrc *s, upb_string *str) {
if (str != s->str) {
upb_string_unref(s->str);
s->str = upb_string_getref(str);
}
s->offset = 0;
}
void upb_stringsrc_uninit(upb_stringsrc *s) {
upb_string_unref(s->str);
}
upb_bytesrc *upb_stringsrc_bytesrc(upb_stringsrc *s) {
return &s->bytesrc;
}

@ -18,12 +18,16 @@ extern "C" {
/* upb_stringsrc **************************************************************/
struct upb_stringsrc;
typedef struct upb_stringsrc upb_stringsrc;
struct _upb_stringsrc {
upb_bytesrc bytesrc;
upb_string *str;
upb_strlen_t offset;
};
typedef struct _upb_stringsrc upb_stringsrc;
// Create/free a stringsrc.
upb_stringsrc *upb_stringsrc_new();
void upb_stringsrc_free(upb_stringsrc *s);
void upb_stringsrc_init(upb_stringsrc *s);
void upb_stringsrc_uninit(upb_stringsrc *s);
// Resets the stringsrc to a state where it will vend the given string. The
// stringsrc will take a reference on the string, so the caller need not ensure

@ -13,13 +13,14 @@ int main() {
upb_stdio_reset(in, stdin);
upb_stdio *out = upb_stdio_new();
upb_stdio_reset(out, stdout);
upb_decoder *d = upb_decoder_new(upb_downcast_msgdef(fds));
upb_decoder_reset(d, upb_stdio_bytesrc(in));
upb_decoder d;
upb_decoder_init(&d, upb_downcast_msgdef(fds));
upb_decoder_reset(&d, upb_stdio_bytesrc(in));
upb_textprinter *p = upb_textprinter_new();
upb_handlers handlers;
upb_handlers_init(&handlers);
upb_textprinter_reset(p, &handlers, upb_stdio_bytesink(out), false);
upb_src *src = upb_decoder_src(d);
upb_src *src = upb_decoder_src(&d);
upb_src_sethandlers(src, &handlers);
upb_status status = UPB_STATUS_INIT;
@ -30,7 +31,7 @@ int main() {
upb_status_uninit(&status);
upb_stdio_free(in);
upb_stdio_free(out);
upb_decoder_free(d);
upb_decoder_uninit(&d);
upb_textprinter_free(p);
upb_def_unref(fds);
upb_symtab_unref(symtab);

@ -4,9 +4,10 @@
#include <stdio.h>
#include <stdlib.h>
#include <google/protobuf/descriptor.h>
#include "upb_msg.h"
#include "upb_def.h"
#include "upb_decoder.h"
#include "upb_def.h"
#include "upb_glue.h"
#include "upb_msg.h"
#include "upb_strstream.h"
int num_assertions = 0;
@ -26,7 +27,7 @@ void compare_arrays(const google::protobuf::Reflection *r,
upb_msg *upb_msg, upb_fielddef *upb_f)
{
ASSERT(upb_msg_has(upb_msg, upb_f));
upb_array *arr = upb_msg_get(upb_msg, upb_f).arr;
upb_array *arr = upb_value_getarr(upb_msg_get(upb_msg, upb_f));
ASSERT(upb_array_len(arr) == (upb_arraylen_t)r->FieldSize(proto2_msg, proto2_f));
for(upb_arraylen_t i = 0; i < upb_array_len(arr); i++) {
upb_value v = upb_array_get(arr, upb_f, i);
@ -34,37 +35,38 @@ void compare_arrays(const google::protobuf::Reflection *r,
default:
ASSERT(false);
case UPB_TYPE(DOUBLE):
ASSERT(r->GetRepeatedDouble(proto2_msg, proto2_f, i) == v._double);
ASSERT(r->GetRepeatedDouble(proto2_msg, proto2_f, i) == upb_value_getdouble(v));
break;
case UPB_TYPE(FLOAT):
ASSERT(r->GetRepeatedFloat(proto2_msg, proto2_f, i) == v._float);
ASSERT(r->GetRepeatedFloat(proto2_msg, proto2_f, i) == upb_value_getfloat(v));
break;
case UPB_TYPE(INT64):
case UPB_TYPE(SINT64):
case UPB_TYPE(SFIXED64):
ASSERT(r->GetRepeatedInt64(proto2_msg, proto2_f, i) == v.int64);
ASSERT(r->GetRepeatedInt64(proto2_msg, proto2_f, i) == upb_value_getint64(v));
break;
case UPB_TYPE(UINT64):
case UPB_TYPE(FIXED64):
ASSERT(r->GetRepeatedUInt64(proto2_msg, proto2_f, i) == v.uint64);
ASSERT(r->GetRepeatedUInt64(proto2_msg, proto2_f, i) == upb_value_getuint64(v));
break;
case UPB_TYPE(SFIXED32):
case UPB_TYPE(SINT32):
case UPB_TYPE(INT32):
case UPB_TYPE(ENUM):
ASSERT(r->GetRepeatedInt32(proto2_msg, proto2_f, i) == v.int32);
ASSERT(r->GetRepeatedInt32(proto2_msg, proto2_f, i) == upb_value_getint32(v));
break;
case UPB_TYPE(FIXED32):
case UPB_TYPE(UINT32):
ASSERT(r->GetRepeatedUInt32(proto2_msg, proto2_f, i) == v.uint32);
ASSERT(r->GetRepeatedUInt32(proto2_msg, proto2_f, i) == upb_value_getuint32(v));
break;
case UPB_TYPE(BOOL):
ASSERT(r->GetRepeatedBool(proto2_msg, proto2_f, i) == v._bool);
ASSERT(r->GetRepeatedBool(proto2_msg, proto2_f, i) == upb_value_getbool(v));
break;
case UPB_TYPE(STRING):
case UPB_TYPE(BYTES): {
std::string str = r->GetRepeatedString(proto2_msg, proto2_f, i);
std::string str2(upb_string_getrobuf(v.str), upb_string_len(v.str));
upb_string *upbstr = upb_value_getstr(v);
std::string str2(upb_string_getrobuf(upbstr), upb_string_len(upbstr));
ASSERT(str == str2);
break;
}
@ -72,7 +74,7 @@ void compare_arrays(const google::protobuf::Reflection *r,
case UPB_TYPE(MESSAGE):
ASSERT(upb_dyncast_msgdef(upb_f->def) != NULL);
compare(r->GetRepeatedMessage(proto2_msg, proto2_f, i),
v.msg, upb_downcast_msgdef(upb_f->def));
upb_value_getmsg(v), upb_downcast_msgdef(upb_f->def));
}
}
}
@ -87,44 +89,45 @@ void compare_values(const google::protobuf::Reflection *r,
default:
ASSERT(false);
case UPB_TYPE(DOUBLE):
ASSERT(r->GetDouble(proto2_msg, proto2_f) == v._double);
ASSERT(r->GetDouble(proto2_msg, proto2_f) == upb_value_getdouble(v));
break;
case UPB_TYPE(FLOAT):
ASSERT(r->GetFloat(proto2_msg, proto2_f) == v._float);
ASSERT(r->GetFloat(proto2_msg, proto2_f) == upb_value_getfloat(v));
break;
case UPB_TYPE(INT64):
case UPB_TYPE(SINT64):
case UPB_TYPE(SFIXED64):
ASSERT(r->GetInt64(proto2_msg, proto2_f) == v.int64);
ASSERT(r->GetInt64(proto2_msg, proto2_f) == upb_value_getint64(v));
break;
case UPB_TYPE(UINT64):
case UPB_TYPE(FIXED64):
ASSERT(r->GetUInt64(proto2_msg, proto2_f) == v.uint64);
ASSERT(r->GetUInt64(proto2_msg, proto2_f) == upb_value_getuint64(v));
break;
case UPB_TYPE(SFIXED32):
case UPB_TYPE(SINT32):
case UPB_TYPE(INT32):
case UPB_TYPE(ENUM):
ASSERT(r->GetInt32(proto2_msg, proto2_f) == v.int32);
ASSERT(r->GetInt32(proto2_msg, proto2_f) == upb_value_getint32(v));
break;
case UPB_TYPE(FIXED32):
case UPB_TYPE(UINT32):
ASSERT(r->GetUInt32(proto2_msg, proto2_f) == v.uint32);
ASSERT(r->GetUInt32(proto2_msg, proto2_f) == upb_value_getuint32(v));
break;
case UPB_TYPE(BOOL):
ASSERT(r->GetBool(proto2_msg, proto2_f) == v._bool);
ASSERT(r->GetBool(proto2_msg, proto2_f) == upb_value_getbool(v));
break;
case UPB_TYPE(STRING):
case UPB_TYPE(BYTES): {
std::string str = r->GetString(proto2_msg, proto2_f);
std::string str2(upb_string_getrobuf(v.str), upb_string_len(v.str));
upb_string *upbstr = upb_value_getstr(v);
std::string str2(upb_string_getrobuf(upbstr), upb_string_len(upbstr));
ASSERT(str == str2);
break;
}
case UPB_TYPE(GROUP):
case UPB_TYPE(MESSAGE):
compare(r->GetMessage(proto2_msg, proto2_f),
v.msg, upb_downcast_msgdef(upb_f->def));
upb_value_getmsg(v), upb_downcast_msgdef(upb_f->def));
}
}
@ -173,7 +176,7 @@ void parse_and_compare(MESSAGE_CIDENT *proto2_msg,
// Parse to both proto2 and upb.
ASSERT(proto2_msg->ParseFromArray(upb_string_getrobuf(str), upb_string_len(str)));
upb_status status = UPB_STATUS_INIT;
upb_msg_decodestr(upb_msg, upb_md, str, &status);
upb_strtomsg(str, upb_msg, upb_md, &status);
ASSERT(upb_ok(&status));
compare(*proto2_msg, upb_msg, upb_md);
}
@ -205,24 +208,28 @@ int main(int argc, char *argv[])
upb_symtab_add_descriptorproto(symtab);
upb_def *fds_msgdef = upb_symtab_lookup(
symtab, UPB_STRLIT("google.protobuf.FileDescriptorSet"));
assert(fds_msgdef);
upb_stringsrc *ssrc = upb_stringsrc_new();
upb_stringsrc_reset(ssrc, fds);
upb_decoder *decoder = upb_decoder_new(upb_downcast_msgdef(fds_msgdef));
upb_decoder_reset(decoder, upb_stringsrc_bytesrc(ssrc));
upb_symtab_addfds(symtab, upb_decoder_src(decoder), &status);
upb_stringsrc ssrc;
upb_stringsrc_init(&ssrc);
upb_stringsrc_reset(&ssrc, fds);
upb_decoder decoder;
upb_decoder_init(&decoder, upb_downcast_msgdef(fds_msgdef));
upb_decoder_reset(&decoder, upb_stringsrc_bytesrc(&ssrc));
upb_symtab_addfds(symtab, upb_decoder_src(&decoder), &status);
if(!upb_ok(&status)) {
fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ": ");
upb_printerr(&status);
return 1;
}
upb_string_unref(fds);
upb_decoder_free(decoder);
upb_stringsrc_free(ssrc);
upb_decoder_uninit(&decoder);
upb_stringsrc_uninit(&ssrc);
upb_string *proto_name = upb_strdupc(MESSAGE_NAME);
upb_msgdef *def = upb_downcast_msgdef(upb_symtab_lookup(symtab, proto_name));
if(!def) {
upb_def *def = upb_symtab_lookup(symtab, proto_name);
upb_msgdef *msgdef;
if(!def || !(msgdef = upb_dyncast_msgdef(def))) {
fprintf(stderr, "Error finding symbol '" UPB_STRFMT "'.\n",
UPB_STRARG(proto_name));
return 1;
@ -238,13 +245,13 @@ int main(int argc, char *argv[])
// Run twice to test proper object reuse.
MESSAGE_CIDENT proto2_msg;
upb_msg *upb_msg = upb_msg_new(def);
parse_and_compare(&proto2_msg, upb_msg, def, str);
parse_and_compare(&proto2_msg, upb_msg, def, str);
upb_msg *upb_msg = upb_msg_new(msgdef);
parse_and_compare(&proto2_msg, upb_msg, msgdef, str);
parse_and_compare(&proto2_msg, upb_msg, msgdef, str);
printf("All tests passed, %d assertions.\n", num_assertions);
upb_msg_unref(upb_msg, def);
upb_def_unref(UPB_UPCAST(def));
upb_msg_unref(upb_msg, msgdef);
upb_def_unref(UPB_UPCAST(msgdef));
upb_string_unref(str);
upb_symtab_unref(symtab);

Loading…
Cancel
Save