Tweaks to upb_src/upb_sink interfaces.

pull/13171/head
Joshua Haberman 15 years ago
parent 209dce5eb0
commit be5ddd8a64
  1. 7
      Makefile
  2. 23
      src/upb_atomic.h
  3. 171
      src/upb_decoder.c
  4. 2
      src/upb_decoder.h
  5. 28
      src/upb_def.c
  6. 2
      src/upb_def.h
  7. 22
      src/upb_stream.h
  8. 0
      src/upb_stream_vtbl.h
  9. 29
      src/upb_string.h
  10. 8
      tests/test_table.cc

@ -38,7 +38,7 @@ ALL=deps $(OBJ) $(LIBUPB) $(LIBUPB_PIC)
all: $(ALL)
clean:
rm -rf $(LIBUPB) $(LIBUPB_PIC)
rm -rf $(call rwildcard,,*.o) $(call rwildcard,,*.lo)
rm -rf $(call rwildcard,,*.o) $(call rwildcard,,*.lo) $(call rwildcard,,*.gc*)
rm -rf benchmark/google_messages.proto.pb benchmark/google_messages.pb.* benchmarks/b.* benchmarks/*.pb*
rm -rf tests/tests tests/t.* tests/test_table
rm -rf descriptor/descriptor.pb
@ -46,7 +46,7 @@ clean:
cd lang_ext/python && python setup.py clean --all
# The core library (src/libupb.a)
SRC=src/upb.c src/upb_decoder.c src/upb_table.c src/upb_def.c \
SRC=src/upb.c src/upb_decoder.c src/upb_table.c src/upb_def.c src/upb_string.c \
descriptor/descriptor.c
# Parts of core that are yet to be converted.
OTHERSRC=src/upb_encoder.c src/upb_text.c
@ -86,11 +86,12 @@ tests/test.proto.pb: tests/test.proto
# TODO: replace with upbc
protoc tests/test.proto -otests/test.proto.pb
tests: tests/tests \
TESTS=tests/tests \
tests/test_table \
tests/t.test_vs_proto2.googlemessage1 \
tests/t.test_vs_proto2.googlemessage2 \
tests/test.proto.pb
$(TESTS): src/libupb.a
#VALGRIND=valgrind --leak-check=full --error-exitcode=1
VALGRIND=

@ -29,7 +29,6 @@ extern "C" {
#define INLINE static inline
#endif
#define UPB_THREAD_UNSAFE
#ifdef UPB_THREAD_UNSAFE
/* Non-thread-safe implementations. ******************************************/
@ -65,15 +64,6 @@ INLINE int upb_atomic_fetch_and_add(upb_atomic_refcount_t *a, int val) {
return ret;
}
typedef struct {
} upb_rwlock_t;
INLINE void upb_rwlock_init(upb_rwlock_t *l) { (void)l; }
INLINE void upb_rwlock_destroy(upb_rwlock_t *l) { (void)l; }
INLINE void upb_rwlock_rdlock(upb_rwlock_t *l) { (void)l; }
INLINE void upb_rwlock_wrlock(upb_rwlock_t *l) { (void)l; }
INLINE void upb_rwlock_unlock(upb_rwlock_t *l) { (void)l; }
#endif
/* Atomic refcount ************************************************************/
@ -111,10 +101,6 @@ INLINE bool upb_atomic_read(upb_atomic_refcount_t *a) {
return __sync_fetch_and_add(&a->v, 0);
}
INLINE bool upb_atomic_write(upb_atomic_refcount_t *a, int val) {
a->v = val;
}
#elif defined(WIN32)
/* Windows defines atomic increment/decrement. */
@ -145,7 +131,14 @@ INLINE bool upb_atomic_unref(upb_atomic_refcount_t *a) {
#ifdef UPB_THREAD_UNSAFE
/* Already defined. */
typedef struct {
} upb_rwlock_t;
INLINE void upb_rwlock_init(upb_rwlock_t *l) { (void)l; }
INLINE void upb_rwlock_destroy(upb_rwlock_t *l) { (void)l; }
INLINE void upb_rwlock_rdlock(upb_rwlock_t *l) { (void)l; }
INLINE void upb_rwlock_wrlock(upb_rwlock_t *l) { (void)l; }
INLINE void upb_rwlock_unlock(upb_rwlock_t *l) { (void)l; }
#elif defined(UPB_USE_PTHREADS)

@ -68,9 +68,6 @@ struct upb_decoder {
upb_strlen_t packed_end_offset;
// String we return for string values. We try to recycle it if possible.
upb_string *str;
// We keep a stack of messages we have recursed into.
upb_decoder_frame *top, *limit, stack[UPB_MAX_NESTING];
};
@ -93,18 +90,19 @@ static bool upb_decoder_nextbuf(upb_decoder *d)
d->buf_bytesleft);
}
// Recycle old buffer, pull new one.
// Recycle old buffer.
if(d->buf) {
upb_bytesrc_recycle(d->bytesrc, d->buf);
d->buf = upb_string_tryrecycle(d->buf);
d->buf_offset -= upb_string_len(d->buf);
d->buf_stream_offset += upb_string_len(d->buf);
}
d->buf = upb_bytesrc_get(d->bytesrc, UPB_MAX_ENCODED_SIZE);
// Handle cases arising from error or EOF.
if(d->buf) {
// Pull next buffer.
if(upb_bytesrc_get(d->bytesrc, d->buf, UPB_MAX_ENCODED_SIZE)) {
d->buf_bytesleft += upb_string_len(d->buf);
return true;
} else {
// Error or EOF.
if(!upb_bytesrc_eof(d->bytesrc)) {
// Error from bytesrc.
upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc));
@ -113,9 +111,11 @@ static bool upb_decoder_nextbuf(upb_decoder *d)
// EOF from bytesrc and we don't have any residual bytes left.
d->src.eof = true;
return false;
} else {
// No more data left from the bytesrc, but we still have residual bytes.
return true;
}
}
return true;
}
static const uint8_t *upb_decoder_getbuf_full(upb_decoder *d, uint32_t *bytes)
@ -369,85 +369,86 @@ again:
bool upb_decoder_getval(upb_decoder *d, upb_valueptr val)
{
upb_wire_type_t native_wire_type = upb_types[d->field->type].native_wire_type;
if(native_wire_type == UPB_WIRE_TYPE_DELIMITED) {
// A string, bytes, or a length-delimited submessage. The latter isn't
// technically a string, but can be gotten as one to perform lazy parsing.
d->str = upb_string_tryrecycle(d->str);
const upb_strlen_t total_len = d->delimited_len;
if (d->buf_offset >= 0 && (int32_t)total_len <= d->buf_bytesleft) {
// The entire string is inside our current buffer, so we can just
// return a substring of the buffer without copying.
upb_string_substr(d->str, d->buf,
upb_string_len(d->buf) - d->buf_bytesleft,
total_len);
upb_decoder_skipbytes(d, total_len);
*val.str = d->str;
} else {
// The string spans buffers, so we must copy from the current buffer,
// the next buffer (if we have one), and finally from the bytesrc.
uint8_t *str = (uint8_t*)upb_string_getrwbuf(d->str, total_len);
upb_strlen_t len = 0;
if(d->buf_offset < 0) {
// Residual bytes we need to copy from tmpbuf.
memcpy(str, d->tmpbuf, -d->buf_offset);
len += -d->buf_offset;
}
if(d->buf) {
upb_strlen_t to_copy =
UPB_MIN(total_len - len, upb_string_len(d->buf) - d->buf_offset);
memcpy(str + len, upb_string_getrobuf(d->buf) + d->buf_offset, to_copy);
}
upb_decoder_skipbytes(d, len);
upb_string_getrwbuf(d->str, len); // Cheap resize.
if(len < total_len) {
if(!upb_bytesrc_append(d->bytesrc, d->str, total_len - len)) {
upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc));
return false;
}
d->buf_stream_offset += total_len - len;
}
switch(upb_types[d->field->type].native_wire_type) {
case UPB_WIRE_TYPE_VARINT: {
uint32_t low, high;
if(!upb_decoder_readv64(d, &low, &high)) return false;
uint64_t u64 = ((uint64_t)high << 32) | low;
if(d->field->type == UPB_TYPE(SINT64))
*val.int64 = upb_zzdec_64(u64);
else
*val.uint64 = u64;
break;
}
case UPB_WIRE_TYPE_32BIT_VARINT: {
uint32_t u32;
if(!upb_decoder_readv32(d, &u32)) return false;
if(d->field->type == UPB_TYPE(SINT32))
*val.int32 = upb_zzdec_32(u32);
else
*val.uint32 = u32;
break;
}
case UPB_WIRE_TYPE_64BIT:
if(!upb_decoder_readf64(d, val.uint64)) return false;
break;
case UPB_WIRE_TYPE_32BIT:
if(!upb_decoder_readf32(d, val.uint32)) return false;
break;
default:
upb_seterr(&d->src.status, UPB_STATUS_ERROR,
"Attempted to call getval on a group.");
return false;
}
// For a packed field where we have not reached the end, we leave the field
// in the decoder so we will return it again without parsing a key.
if(d->wire_type != UPB_WIRE_TYPE_DELIMITED ||
upb_decoder_offset(d) >= d->packed_end_offset) {
d->field = NULL;
}
return true;
}
bool upb_decoder_getstr(upb_decoder *d, upb_string *str) {
// A string, bytes, or a length-delimited submessage. The latter isn't
// technically a string, but can be gotten as one to perform lazy parsing.
const int32_t total_len = d->delimited_len;
if (d->buf_offset >= 0 && (int32_t)total_len <= d->buf_bytesleft) {
// The entire string is inside our current buffer, so we can just
// return a substring of the buffer without copying.
upb_string_substr(str, d->buf,
upb_string_len(d->buf) - d->buf_bytesleft,
total_len);
upb_decoder_skipbytes(d, total_len);
} else {
switch(native_wire_type) {
case UPB_WIRE_TYPE_VARINT: {
uint32_t low, high;
if(!upb_decoder_readv64(d, &low, &high)) return false;
uint64_t u64 = ((uint64_t)high << 32) | low;
if(d->field->type == UPB_TYPE(SINT64))
*val.int64 = upb_zzdec_64(u64);
else
*val.uint64 = u64;
break;
}
case UPB_WIRE_TYPE_32BIT_VARINT: {
uint32_t u32;
if(!upb_decoder_readv32(d, &u32)) return false;
if(d->field->type == UPB_TYPE(SINT32))
*val.int32 = upb_zzdec_32(u32);
else
*val.uint32 = u32;
break;
}
case UPB_WIRE_TYPE_64BIT:
if(!upb_decoder_readf64(d, val.uint64)) return false;
break;
case UPB_WIRE_TYPE_32BIT:
if(!upb_decoder_readf32(d, val.uint32)) return false;
break;
default:
upb_seterr(&d->src.status, UPB_STATUS_ERROR,
"Attempted to call getval on a group.");
return false;
// The string spans buffers, so we must copy from the residual buffer
// (if any bytes are there), then the buffer, and finally from the bytesrc.
uint8_t *ptr = (uint8_t*)upb_string_getrwbuf(
str, UPB_MIN(total_len, d->buf_bytesleft));
int32_t len = 0;
if(d->buf_offset < 0) {
// Residual bytes we need to copy from tmpbuf.
memcpy(ptr, d->tmpbuf, -d->buf_offset);
len += -d->buf_offset;
}
// For a packed field where we have not reached the end, we leave the field
// in the decoder so we will return it again without parsing a key.
if(d->wire_type != UPB_WIRE_TYPE_DELIMITED ||
upb_decoder_offset(d) >= d->packed_end_offset) {
d->field = NULL;
if(d->buf) {
// Bytes from the buffer.
memcpy(ptr + len, upb_string_getrobuf(d->buf) + d->buf_offset,
upb_string_len(str) - len);
}
upb_decoder_skipbytes(d, upb_string_len(str));
if(len < total_len) {
// Bytes from the bytesrc.
if(!upb_bytesrc_append(d->bytesrc, str, total_len - len)) {
upb_copyerr(&d->src.status, upb_bytesrc_status(d->bytesrc));
return false;
}
// Have to advance this since the buffering layer of the decoder will
// never see these bytes.
d->buf_stream_offset += total_len - len;
}
}
d->field = NULL;
return true;
}
@ -549,21 +550,19 @@ upb_decoder *upb_decoder_new(upb_msgdef *msgdef)
d->toplevel_msgdef = msgdef;
d->limit = &d->stack[UPB_MAX_NESTING];
d->buf = NULL;
d->str = upb_string_new();
upb_src_init(&d->src, &upb_decoder_src_vtbl);
return d;
}
void upb_decoder_free(upb_decoder *d)
{
upb_string_unref(d->str);
if(d->buf) upb_string_unref(d->buf);
upb_string_unref(d->buf);
free(d);
}
void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc)
{
if(d->buf) upb_bytesrc_recycle(d->bytesrc, d->buf);
upb_string_unref(d->buf);
d->top = d->stack;
d->top->msgdef = d->toplevel_msgdef;
// The top-level message is not delimited (we can keep receiving data for it

@ -19,7 +19,7 @@
#include <stdbool.h>
#include <stdint.h>
#include "upb_def.h"
#include "upb_srcsink.h"
#include "upb_stream.h"
#ifdef __cplusplus
extern "C" {

@ -215,10 +215,11 @@ typedef struct _upb_unresolveddef {
upb_string *name;
} upb_unresolveddef;
// Is passed a ref on the string.
static upb_unresolveddef *upb_unresolveddef_new(upb_string *str) {
upb_unresolveddef *def = malloc(sizeof(*def));
upb_def_init(&def->base, UPB_DEF_UNRESOLVED);
def->name = upb_string_getref(str);
def->name = str;
return def;
}
@ -258,7 +259,8 @@ static bool upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status)
CHECKSRC(upb_src_getint32(src, &number));
break;
case GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_FIELDNUM:
CHECKSRC(upb_src_getstr(src, &name));
name = upb_string_tryrecycle(name);
CHECKSRC(upb_src_getstr(src, name));
break;
default:
CHECKSRC(upb_src_skipval(src));
@ -274,11 +276,15 @@ static bool upb_addenum_val(upb_src *src, upb_enumdef *e, upb_status *status)
iton_ent iton_ent = {{number, 0}, name};
upb_strtable_insert(&e->ntoi, &ntoi_ent.e);
upb_inttable_insert(&e->iton, &iton_ent.e);
// We don't unref "name" because we pass our ref to the iton entry of the
// table. strtables can ref their keys, but the inttable doesn't know that
// the value is a string.
return true;
src_err:
upb_copyerr(status, upb_src_status(src));
err:
upb_string_unref(name);
return false;
}
@ -368,12 +374,12 @@ static bool upb_addfield(upb_src *src, upb_msgdef *m, upb_status *status)
f->number = tmp;
break;
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME_FIELDNUM:
CHECKSRC(upb_src_getstr(src, &f->name));
f->name = upb_string_getref(f->name);
f->name = upb_string_tryrecycle(f->name);
CHECKSRC(upb_src_getstr(src, f->name));
break;
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_FIELDNUM: {
upb_string *str;
CHECKSRC(upb_src_getstr(src, &str));
upb_string *str = upb_string_new();
CHECKSRC(upb_src_getstr(src, str));
if(f->def) upb_def_unref(f->def);
f->def = UPB_UPCAST(upb_unresolveddef_new(str));
f->owned = true;
@ -415,9 +421,8 @@ static bool upb_addmsg(upb_src *src, upb_deflist *defs, upb_status *status)
while((f = upb_src_getdef(src)) != NULL) {
switch(f->number) {
case GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_FIELDNUM:
upb_string_unref(m->base.fqname);
CHECKSRC(upb_src_getstr(src, &m->base.fqname));
m->base.fqname = upb_string_getref(m->base.fqname);
m->base.fqname = upb_string_tryrecycle(m->base.fqname);
CHECKSRC(upb_src_getstr(src, m->base.fqname));
break;
case GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_FIELDNUM:
CHECKSRC(upb_src_startmsg(src));
@ -487,9 +492,8 @@ static bool upb_addfd(upb_src *src, upb_deflist *defs, upb_status *status)
while((f = upb_src_getdef(src)) != NULL) {
switch(f->number) {
case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME_FIELDNUM:
upb_string_unref(package);
CHECKSRC(upb_src_getstr(src, &package));
package = upb_string_getref(package);
package = upb_string_tryrecycle(package);
CHECKSRC(upb_src_getstr(src, package));
break;
case GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_FIELDNUM:
CHECKSRC(upb_src_startmsg(src));

@ -27,7 +27,7 @@
#define UPB_DEF_H_
#include "upb_atomic.h"
#include "upb_srcsink.h"
#include "upb_stream.h"
#include "upb_table.h"
#ifdef __cplusplus

@ -1,8 +1,8 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* This file defines four general-purpose interfaces for pulling/pushing either
* protobuf data or bytes:
* This file defines four general-purpose streaming interfaces for protobuf
* data or bytes:
*
* - upb_src: pull interface for protobuf data.
* - upb_sink: push interface for protobuf data.
@ -19,7 +19,7 @@
#ifndef UPB_SRCSINK_H
#define UPB_SRCSINK_H
#include "upb_srcsink_vtbl.h"
#include "upb_stream_vtbl.h"
#ifdef __cplusplus
extern "C" {
@ -41,10 +41,10 @@ struct _upb_fielddef;
// error or end-of-stream.
struct _upb_fielddef *upb_src_getdef(upb_src *src);
// Retrieves and stores the next value in "val". For string types the caller
// does not own a ref to the returned type; you must ref it yourself if you
// want one. Returns false on error.
// Retrieves and stores the next value in "val". For string types "val" must
// be a newly-recycled string. Returns false on error.
bool upb_src_getval(upb_src *src, upb_valueptr val);
bool upb_src_getstr(upb_src *src, upb_string *val);
// Like upb_src_getval() but skips the value.
bool upb_src_skipval(upb_src *src);
@ -72,7 +72,6 @@ bool upb_src_getuint32(upb_src *src, uint32_t *val);
bool upb_src_getuint64(upb_src *src, uint64_t *val);
bool upb_src_getfloat(upb_src *src, float *val);
bool upb_src_getdouble(upb_src *src, double *val);
bool upb_src_getstr(upb_src *src, upb_string **val);
/* upb_sink *******************************************************************/
@ -93,14 +92,9 @@ upb_status *upb_sink_status(upb_sink *sink);
/* upb_bytesrc ****************************************************************/
// Returns the next string in the stream. NULL is returned on error or eof.
// Returns the next string in the stream. false is returned on error or eof.
// The string must be at least "minlen" bytes long unless the stream is eof.
//
// A ref is passed to the caller, though the caller is encouraged to pass the
// ref back to the bytesrc with upb_bytesrc_recycle(). This can help reduce
// memory allocation/deallocation.
upb_string *upb_bytesrc_get(upb_bytesrc *src, upb_strlen_t minlen);
void upb_bytesrc_recycle(upb_bytesrc *src, upb_string *str);
bool upb_bytesrc_get(upb_bytesrc *src, upb_string *str, upb_strlen_t minlen);
// Appends the next "len" bytes in the stream in-place to "str". This should
// be used when the caller needs to build a contiguous string of the existing

@ -38,7 +38,7 @@ extern "C" {
// the associated functions. Also, strings may *only* be allocated on the heap.
struct _upb_string {
char *ptr;
uint32_t len;
int32_t len;
uint32_t size;
upb_atomic_refcount_t refcount;
union {
@ -53,12 +53,22 @@ struct _upb_string {
// longer needed, it should be unref'd, never freed directly.
upb_string *upb_string_new();
// Releases a ref on the given string, which may free the memory.
void upb_string_unref(upb_string *str);
void _upb_string_free(upb_string *str);
// Releases a ref on the given string, which may free the memory. "str"
// can be NULL, in which case this is a no-op.
INLINE void upb_string_unref(upb_string *str) {
if (str && upb_atomic_unref(&str->refcount)) _upb_string_free(str);
}
// Returns a string with the same contents as "str". The caller owns a ref on
// the returned string, which may or may not be the same object as "str.
upb_string *upb_string_getref(upb_string *str);
INLINE upb_string *upb_string_getref(upb_string *str) {
// If/when we support stack-allocated strings, this will have to allocate
// a new string if the given string is on the stack.
upb_atomic_ref(&str->refcount);
return str;
}
// Returns the length of the string.
INLINE upb_strlen_t upb_string_len(upb_string *str) { return str->len; }
@ -75,6 +85,17 @@ INLINE void upb_string_endread(upb_string *str) { (void)str; }
// Attempts to recycle the string "str" so it may be reused and have different
// data written to it. The returned string is either "str" if it could be
// recycled or a newly created string if "str" has other references.
//
// As a special case, passing NULL will allocate a new string. This is
// convenient for the pattern:
//
// upb_string *str = NULL;
// while (x) {
// if (y) {
// str = upb_string_tryrecycle(str);
// upb_src_getstr(str);
// }
// }
upb_string *upb_string_tryrecycle(upb_string *str);
// The three options for setting the contents of a string. These may only be

@ -1,7 +1,7 @@
#undef NDEBUG /* ensure tests always assert. */
#include "upb_table.h"
#include "upb_data.h"
#include "upb_string.h"
#include "test_util.h"
#include <assert.h>
#include <map>
@ -45,7 +45,7 @@ void test_strtable(const vector<string>& keys, uint32_t num_to_insert)
all.insert(key);
strtable_entry e;
e.value = key[0];
upb_strptr str = upb_strduplen(key.c_str(), key.size());
upb_string *str = upb_strduplen(key.c_str(), key.size());
e.e.key = str;
upb_strtable_insert(&table, &e.e);
upb_string_unref(str); // The table still owns a ref.
@ -55,7 +55,7 @@ void test_strtable(const vector<string>& keys, uint32_t num_to_insert)
/* Test correctness. */
for(uint32_t i = 0; i < keys.size(); i++) {
const string& key = keys[i];
upb_strptr str = upb_strduplen(key.c_str(), key.size());
upb_string *str = upb_strduplen(key.c_str(), key.size());
strtable_entry *e = (strtable_entry*)upb_strtable_lookup(&table, str);
if(m.find(key) != m.end()) { /* Assume map implementation is correct. */
assert(e);
@ -71,7 +71,7 @@ void test_strtable(const vector<string>& keys, uint32_t num_to_insert)
strtable_entry *e;
for(e = (strtable_entry*)upb_strtable_begin(&table); e;
e = (strtable_entry*)upb_strtable_next(&table, &e->e)) {
string tmp(upb_string_getrobuf(e->e.key), upb_strlen(e->e.key));
string tmp(upb_string_getrobuf(e->e.key), upb_string_len(e->e.key));
std::set<string>::iterator i = all.find(tmp);
assert(i != all.end());
all.erase(i);

Loading…
Cancel
Save