Decoder redesign in preparation for packed fields and start/endseq.

pull/13171/head
Joshua Haberman 14 years ago
parent 4a99abba12
commit f74534b42a
  1. 13
      Makefile
  2. 1
      perf-regression-test.py
  3. 12
      perf-tests.sh
  4. 47
      src/upb.c
  5. 8
      src/upb.h
  6. 463
      src/upb_decoder.c
  7. 4
      src/upb_decoder.h
  8. 64
      src/upb_decoder_x86.dasc
  9. 10
      src/upb_def.h
  10. 9
      src/upb_msg.c
  11. 49
      src/upb_stream.c
  12. 46
      src/upb_stream.h
  13. 5
      src/upb_string.h
  14. 9
      src/upb_textprinter.c
  15. 54
      src/upb_varint.c
  16. 50
      src/upb_varint.h
  17. 67
      tests/test_decoder.c

@ -77,6 +77,7 @@ CORE= \
src/upb_string.c \
src/upb_def.c \
src/upb_msg.c \
src/upb_varint.c \
# Common encoders/decoders -- you're almost certain to want these.
STREAM= \
@ -211,9 +212,13 @@ SIMPLE_TESTS= \
tests/test_string \
tests/test_def \
tests/test_varint \
tests/tests
# tests/test_decoder \
tests/test_stream \
tests/tests \
INTERACTIVE_TESTS= \
tests/test_decoder \
# tests/test_stream \
SIMPLE_CXX_TESTS= \
tests/test_table
@ -225,7 +230,7 @@ VARIADIC_TESTS= \
TESTS=$(SIMPLE_TESTS) $(SIMPLE_CXX_TESTS) $(VARIADIC_TESTS)
tests: $(TESTS)
tests: $(TESTS) $(INTERACTIVE_TESTS)
$(TESTS): $(LIBUPB)
tests/tests: tests/test.proto.pb

@ -10,6 +10,7 @@ set -v
# Generate numbers for baseline.
rm -rf perf-tmp
git clone . perf-tmp
cp perf-tests.sh perf-tmp
(cd perf-tmp && ./perf-tests.sh upb)
cp perf-tmp/perf-tests.out perf-tests.baseline

@ -16,20 +16,16 @@ run_with_flags () {
NAME=$2
make clean
echo "$FLAGS -fprofile-generate" > perf-cppflags
make upb_benchmarks
make benchmark
make clean_leave_profile
echo "$FLAGS -fprofile-use" > perf-cppflags
echo "$FLAGS" > perf-cppflags
make upb_benchmarks
make benchmark | sed -e "s/^/$NAME./g" | tee -a perf-tests.out
}
if [ x`uname -m` = xx86_64 ]; then
#if [ x`uname -m` = xx86_64 ]; then
run_with_flags "-DNDEBUG -m32" "plain32"
run_with_flags "-DNDEBUG -fomit-frame-pointer -m32" "omitfp32"
fi
#fi
run_with_flags "-DNDEBUG " "plain"
run_with_flags "-DNDEBUG -fomit-frame-pointer" "omitfp"
run_with_flags "-DNDEBUG -DUPB_USE_JIT_X64" "jit"

@ -13,31 +13,30 @@
#include "upb_string.h"
#define alignof(t) offsetof(struct { char c; t x; }, x)
#define TYPE_INFO(wire_type, ctype, allows_delimited, inmemory_type) \
{alignof(ctype), sizeof(ctype), wire_type, \
(1 << wire_type) | (allows_delimited << UPB_WIRE_TYPE_DELIMITED), \
UPB_TYPE(inmemory_type), #ctype},
#define TYPE_INFO(wire_type, ctype, inmemory_type) \
{alignof(ctype), sizeof(ctype), wire_type, UPB_TYPE(inmemory_type), #ctype},
const upb_type_info upb_types[] = {
{0, 0, 0, 0, 0, ""}, // There is no type 0.
TYPE_INFO(UPB_WIRE_TYPE_64BIT, double, 1, DOUBLE) // DOUBLE
TYPE_INFO(UPB_WIRE_TYPE_32BIT, float, 1, FLOAT) // FLOAT
TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, 1, INT64) // INT64
TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint64_t, 1, UINT64) // UINT64
TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, 1, INT32) // INT32
TYPE_INFO(UPB_WIRE_TYPE_64BIT, uint64_t, 1, UINT64) // FIXED64
TYPE_INFO(UPB_WIRE_TYPE_32BIT, uint32_t, 1, UINT32) // FIXED32
TYPE_INFO(UPB_WIRE_TYPE_VARINT, bool, 1, BOOL) // BOOL
TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1, STRING) // STRING
TYPE_INFO(UPB_WIRE_TYPE_START_GROUP, void*, 0, MESSAGE) // GROUP
TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1, MESSAGE) // MESSAGE
TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, 1, STRING) // BYTES
TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, 1, UINT32) // UINT32
TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, 1, INT32) // ENUM
TYPE_INFO(UPB_WIRE_TYPE_32BIT, int32_t, 1, INT32) // SFIXED32
TYPE_INFO(UPB_WIRE_TYPE_64BIT, int64_t, 1, INT64) // SFIXED64
TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, 1, INT32) // SINT32
TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, 1, INT64) // SINT64
{0, 0, 0, 0, ""}, // There is no type 0.
TYPE_INFO(UPB_WIRE_TYPE_64BIT, double, DOUBLE) // DOUBLE
TYPE_INFO(UPB_WIRE_TYPE_32BIT, float, FLOAT) // FLOAT
TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, INT64) // INT64
TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint64_t, UINT64) // UINT64
TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, INT32) // INT32
TYPE_INFO(UPB_WIRE_TYPE_64BIT, uint64_t, UINT64) // FIXED64
TYPE_INFO(UPB_WIRE_TYPE_32BIT, uint32_t, UINT32) // FIXED32
TYPE_INFO(UPB_WIRE_TYPE_VARINT, bool, BOOL) // BOOL
TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, STRING) // STRING
TYPE_INFO(UPB_WIRE_TYPE_START_GROUP, void*, MESSAGE) // GROUP
TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, MESSAGE) // MESSAGE
TYPE_INFO(UPB_WIRE_TYPE_DELIMITED, void*, STRING) // BYTES
TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, UINT32) // UINT32
TYPE_INFO(UPB_WIRE_TYPE_VARINT, uint32_t, INT32) // ENUM
TYPE_INFO(UPB_WIRE_TYPE_32BIT, int32_t, INT32) // SFIXED32
TYPE_INFO(UPB_WIRE_TYPE_64BIT, int64_t, INT64) // SFIXED64
TYPE_INFO(UPB_WIRE_TYPE_VARINT, int32_t, INT32) // SINT32
TYPE_INFO(UPB_WIRE_TYPE_VARINT, int64_t, INT64) // SINT64
TYPE_INFO(UPB_WIRE_TYPE_END_GROUP, void*, INT64) // SINT64
};
#ifdef NDEBUG
@ -64,7 +63,7 @@ void upb_copyerr(upb_status *to, upb_status *from)
void upb_clearerr(upb_status *status) {
status->code = UPB_OK;
upb_string_recycle(&status->str);
if (status->str) upb_string_recycle(&status->str);
}
void upb_printerr(upb_status *status) {

@ -126,7 +126,6 @@ typedef struct {
uint8_t align;
uint8_t size;
upb_wire_type_t native_wire_type;
uint8_t allowed_wire_types; // For packable fields, also allows delimited.
uint8_t inmemory_type; // For example, INT32, SINT32, and SFIXED32 -> INT32
char *ctype;
} upb_type_info;
@ -168,11 +167,11 @@ typedef int32_t upb_strlen_t;
// The type of a upb_value. This is like a upb_fieldtype_t, but adds the
// constant UPB_VALUETYPE_ARRAY to represent an array.
typedef uint8_t upb_valuetype_t;
#define UPB_TYPE_ENDGROUP 19 // Need to increase if more real types are added!
#define UPB_VALUETYPE_ARRAY 32
#define UPB_VALUETYPE_BYTESRC 32
#define UPB_VALUETYPE_RAW 33
#define UPB_VALUETYPE_FIELDDEF 34
#define UPB_TYPE_ENDGROUP 35
// A single .proto value. The owner must have an out-of-band way of knowing
// the type, so that it knows which union member to use.
@ -231,11 +230,6 @@ UPB_VALUE_ACCESSORS(fielddef, fielddef, upb_fielddef*, UPB_VALUETYPE_FIELDDEF);
extern upb_value UPB_NO_VALUE;
INLINE void upb_value_setraw(upb_value *val, uint64_t cval) {
SET_TYPE(val->type, UPB_VALUETYPE_RAW);
val->val.uint64 = cval;
}
INLINE upb_atomic_refcount_t *upb_value_getrefcount(upb_value val) {
assert(val.type == UPB_TYPE(MESSAGE) ||
val.type == UPB_TYPE(STRING) ||

@ -19,44 +19,50 @@
#include "upb_decoder_x86.h"
#endif
/* Decoding/Buffering of individual values ************************************/
// A group continues until an END_GROUP tag is seen.
#define UPB_GROUPEND UINT32_MAX
// A non-packed repeated field ends when a diff. field is seen (or submsg end).
#define UPB_REPEATEDEND (UINT32_MAX-1)
// Performs zig-zag decoding, which is used by sint32 and sint64.
INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
// It's unfortunate that we have to micro-manage the compiler this way,
// especially since this tuning is necessarily specific to one hardware
// configuration. But emperically on a Core i7, performance increases 30-50%
// with these annotations. Every instance where these appear, gcc 4.2.1 made
// the wrong decision and degraded performance in benchmarks.
#define FORCEINLINE static __attribute__((always_inline))
#define NOINLINE static __attribute__((noinline))
static void upb_decoder_exit(upb_decoder *d) { siglongjmp(d->exitjmp, 1); }
/* Decoding/Buffering of wire types *******************************************/
#define UPB_MAX_VARINT_ENCODED_SIZE 10
INLINE void upb_decoder_advance(upb_decoder *d, size_t len) {
d->ptr += len;
}
static void upb_decoder_advance(upb_decoder *d, size_t len) { d->ptr += len; }
static size_t upb_decoder_bufleft(upb_decoder *d) { return d->end - d->ptr; }
INLINE size_t upb_decoder_offset(upb_decoder *d) {
size_t upb_decoder_offset(upb_decoder *d) {
size_t offset = d->buf_stream_offset;
if (d->buf) offset += (d->ptr - d->buf);
return offset;
}
INLINE size_t upb_decoder_bufleft(upb_decoder *d) {
return d->end - d->ptr;
}
INLINE void upb_dstate_setmsgend(upb_decoder *d) {
uint32_t end_offset = d->dispatcher.top->end_offset;
d->submsg_end = (end_offset == UINT32_MAX) ?
(void*)UINTPTR_MAX : d->buf + end_offset;
static void upb_decoder_setmsgend(upb_decoder *d) {
uint32_t end = d->dispatcher.top->end_offset;
d->submsg_end = (end == UINT32_MAX) ? (void*)UINTPTR_MAX : d->buf + end;
}
// Pulls the next buffer from the bytesrc. Should be called only when the
// current buffer is completely empty.
static bool upb_pullbuf(upb_decoder *d) {
static void upb_pullbuf(upb_decoder *d, bool need) {
assert(upb_decoder_bufleft(d) == 0);
int32_t last_buf_len = d->buf ? upb_string_len(d->bufstr) : -1;
upb_string_recycle(&d->bufstr);
if (!upb_bytesrc_getstr(d->bytesrc, d->bufstr, d->status)) {
d->buf = NULL;
d->end = NULL;
return false;
if (need) upb_seterr(d->status, UPB_ERROR, "Unexpected EOF.");
upb_decoder_exit(d);
}
if (last_buf_len != -1) {
d->buf_stream_offset += last_buf_len;
@ -70,290 +76,256 @@ static bool upb_pullbuf(upb_decoder *d) {
d->jit_end = d->end - 20;
upb_string_recycle(&d->tmp);
upb_string_substr(d->tmp, d->bufstr, 0, 0);
upb_dstate_setmsgend(d);
return true;
upb_decoder_setmsgend(d);
}
// Called only from the slow path, this function copies the next "len" bytes
// from the stream to "data", adjusting the dstate appropriately.
static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted) {
// from the stream to "data", adjusting the decoder state appropriately.
static void upb_getbuf(upb_decoder *d, void *data, size_t bytes, bool need) {
while (1) {
size_t to_copy = UPB_MIN(bytes_wanted, upb_decoder_bufleft(d));
size_t to_copy = UPB_MIN(bytes, upb_decoder_bufleft(d));
memcpy(data, d->ptr, to_copy);
upb_decoder_advance(d, to_copy);
bytes_wanted -= to_copy;
if (bytes_wanted == 0) return true;
if (!upb_pullbuf(d)) return false;
bytes -= to_copy;
if (bytes == 0) return;
upb_pullbuf(d, need);
}
}
// We use this path when we don't have UPB_MAX_VARINT_ENCODED_SIZE contiguous
// bytes available in our current buffer. We don't inline this because we
// accept that it will be slow and we don't want to pay for two copies of it.
static bool upb_decode_varint_slow(upb_decoder *d, upb_value *val) {
char byte = 0x80;
uint64_t val64 = 0;
NOINLINE uint64_t upb_decode_varint_slow(upb_decoder *d, bool need) {
uint8_t byte = 0x80;
uint64_t u64 = 0;
int bitpos;
for(bitpos = 0;
bitpos < 70 && (byte & 0x80) && upb_getbuf(d, &byte, 1);
bitpos += 7)
val64 |= ((uint64_t)byte & 0x7F) << bitpos;
if(bitpos == 70) {
upb_seterr(d->status, UPB_ERROR,
"Varint was unterminated after 10 bytes.\n");
return false;
} else if (d->status->code == UPB_EOF && bitpos == 0) {
// Regular EOF.
return false;
} else if (d->status->code == UPB_EOF && (byte & 0x80)) {
upb_seterr(d->status, UPB_ERROR,
"Provided data ended in the middle of a varint.\n");
return false;
} else {
// Success.
upb_value_setraw(val, val64);
return true;
for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
upb_getbuf(d, &byte, 1, need);
u64 |= ((uint64_t)byte & 0x7F) << bitpos;
}
}
typedef struct {
upb_wire_type_t wire_type;
upb_field_number_t field_number;
} upb_tag;
if(bitpos == 70 && (byte & 0x80)) {
upb_seterr(d->status, UPB_ERROR, "Unterminated varint.\n");
upb_decoder_exit(d);
}
return u64;
}
INLINE bool upb_decode_tag(upb_decoder *d, uint32_t *tag) {
// For tags and delimited lengths, which must be <=32bit and are usually small.
FORCEINLINE uint32_t upb_decode_varint32(upb_decoder *d, bool need) {
const char *p = d->ptr;
upb_value val;
// Nearly all tag varints will be either 1 byte (1-16) or 2 bytes (17-2048).
uint32_t ret;
uint64_t u64;
// Nearly all will be either 1 byte (1-16) or 2 bytes (17-2048).
if (upb_decoder_bufleft(d) < 2) goto slow; // unlikely.
*tag = *p & 0x7f;
ret = *p & 0x7f;
if ((*(p++) & 0x80) == 0) goto done; // predictable if fields are in order
*tag |= (*p & 0x7f) << 7;
ret |= (*p & 0x7f) << 7;
if ((*(p++) & 0x80) == 0) goto done; // likely
slow:
// Decode a full varint starting over from ptr.
if (!upb_decode_varint_slow(d, &val)) return false;
*tag = upb_value_getint64(val);
p = d->ptr; // Trick the next line into not overwriting us.
u64 = upb_decode_varint_slow(d, need);
if (u64 > 0xffffffff) {
upb_seterr(d->status, UPB_ERROR, "Unterminated 32-bit varint.\n");
upb_decoder_exit(d);
}
ret = (uint32_t)u64;
p = d->ptr; // Turn the next line into a nop.
done:
upb_decoder_advance(d, p - d->ptr);
return true;
return ret;
}
INLINE bool upb_decode_varint(upb_decoder *d, upb_value *val) {
FORCEINLINE uint64_t upb_decode_varint(upb_decoder *d) {
if (upb_decoder_bufleft(d) >= 16) {
// Common (fast) case.
upb_decoderet r = upb_vdecode_fast(d->ptr);
if (r.p == NULL) {
upb_seterr(d->status, UPB_ERROR, "Unterminated varint.\n");
return false;
upb_decoder_exit(d);
}
upb_value_setraw(val, r.val);
upb_decoder_advance(d, r.p - d->ptr);
return true;
return r.val;
} else {
return upb_decode_varint_slow(d, val);
return upb_decode_varint_slow(d, true);
}
}
INLINE bool upb_decode_fixed(upb_decoder *d, size_t bytes, upb_value *val) {
FORCEINLINE void upb_decode_fixed(upb_decoder *d, void *val, size_t bytes) {
if (upb_decoder_bufleft(d) >= bytes) {
// Common (fast) case.
memcpy(val, d->ptr, bytes);
upb_decoder_advance(d, bytes);
} else {
if (!upb_getbuf(d, val, bytes)) return false;
upb_getbuf(d, val, bytes, true);
}
return true;
}
// "val" initially holds the length of the string, this is replaced by the
// contents of the string.
INLINE bool upb_decode_string(upb_decoder *d, upb_value *val,
upb_string **str) {
upb_string_recycle(str);
uint32_t strlen = upb_value_getint32(*val);
FORCEINLINE uint32_t upb_decode_fixed32(upb_decoder *d) {
uint32_t u32;
upb_decode_fixed(d, &u32, sizeof(uint32_t));
return u32;
}
FORCEINLINE uint64_t upb_decode_fixed64(upb_decoder *d) {
uint64_t u64;
upb_decode_fixed(d, &u64, sizeof(uint64_t));
return u64;
}
INLINE upb_string *upb_decode_string(upb_decoder *d) {
upb_string_recycle(&d->tmp);
uint32_t strlen = upb_decode_varint32(d, true);
if (upb_decoder_bufleft(d) >= strlen) {
// Common (fast) case.
upb_string_substr(*str, d->bufstr, d->ptr - d->buf, strlen);
upb_string_substr(d->tmp, d->bufstr, d->ptr - d->buf, strlen);
upb_decoder_advance(d, strlen);
} else {
if (!upb_getbuf(d, upb_string_getrwbuf(*str, strlen), strlen))
return false;
upb_getbuf(d, upb_string_getrwbuf(d->tmp, strlen), strlen, true);
}
upb_value_setstr(val, *str);
return true;
return d->tmp;
}
/* The main decoding loop *****************************************************/
extern upb_wire_type_t upb_expected_wire_types[];
// Returns true if wt is the correct on-the-wire type for ft.
INLINE bool upb_check_type(upb_wire_type_t wt, upb_fieldtype_t ft) {
// This doesn't currently support packed arrays.
return upb_types[ft].native_wire_type == wt;
INLINE void upb_pop(upb_decoder *d) {
//if (d->dispatcher.top->end_offset == UPB_REPEATEDEND)
// upb_dispatch_endseq(&d->dispatcher);
d->f = d->dispatcher.top->f;
upb_dispatch_endsubmsg(&d->dispatcher);
upb_decoder_setmsgend(d);
}
static upb_flow_t upb_pop(upb_decoder *d) {
upb_flow_t ret = upb_dispatch_endsubmsg(&d->dispatcher);
upb_dstate_setmsgend(d);
return ret;
INLINE void upb_push(upb_decoder *d, upb_fieldent *f, uint32_t end) {
upb_dispatch_startsubmsg(&d->dispatcher, f, end);
upb_decoder_setmsgend(d);
}
static upb_flow_t upb_decoder_skipsubmsg(upb_decoder *d) {
if (d->dispatcher.top->f->type == UPB_TYPE(GROUP)) {
fprintf(stderr, "upb_decoder: Can't skip groups yet.\n");
abort();
}
upb_decoder_advance(d, d->dispatcher.top->end_offset - (d->ptr - d->buf));
/* Decoding of .proto types ***************************************************/
// Technically, we are losing data if we see a 32-bit varint that is not
// properly sign-extended. We could detect this and error about the data loss,
// but proto2 does not do this, so we pass.
#define T(type, wt, valtype, convfunc) \
INLINE void upb_decode_ ## type(upb_decoder *d, upb_fieldent *f) { \
upb_value val; \
upb_value_set ## valtype(&val, (convfunc)(upb_decode_ ## wt(d))); \
upb_dispatch_value(&d->dispatcher, f, val); \
} \
static double upb_asdouble(uint64_t n) { return *(double*)&n; }
static float upb_asfloat(uint32_t n) { return *(float*)&n; }
static int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
static int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
T(INT32, varint, int32, int32_t)
T(INT64, varint, int64, int64_t)
T(UINT32, varint, uint32, uint32_t)
T(UINT64, varint, uint64, uint64_t)
T(FIXED32, fixed32, uint32, uint32_t)
T(FIXED64, fixed64, uint64, uint64_t)
T(SFIXED32, fixed32, int32, int32_t)
T(SFIXED64, fixed64, int64, int64_t)
T(BOOL, varint, bool, bool)
T(ENUM, varint, int32, int32_t)
T(DOUBLE, fixed64, double, upb_asdouble)
T(FLOAT, fixed32, float, upb_asfloat)
T(SINT32, varint, int32, upb_zzdec_32)
T(SINT64, varint, int64, upb_zzdec_64)
T(STRING, string, str, upb_string*)
static void upb_decode_GROUP(upb_decoder *d, upb_fieldent *f) {
upb_push(d, f, UPB_GROUPEND);
}
static void upb_endgroup(upb_decoder *d, upb_fieldent *f) {
(void)f;
upb_pop(d);
return UPB_CONTINUE;
}
static upb_flow_t upb_push(upb_decoder *d, upb_handlers_fieldent *f,
uint32_t end_offset) {
upb_flow_t flow = upb_dispatch_startsubmsg(&d->dispatcher, f, end_offset);
upb_dstate_setmsgend(d);
return flow;
static void upb_decode_MESSAGE(upb_decoder *d, upb_fieldent *f) {
upb_push(d, f, upb_decode_varint32(d, true) + (d->ptr - d->buf));
}
void upb_decoder_decode(upb_decoder *d, upb_status *status) {
d->status = status;
#define CHECK_FLOW(expr) \
switch (expr) { \
case UPB_BREAK: goto callback_err; \
case UPB_SKIPSUBMSG: upb_decoder_skipsubmsg(d); continue; \
default: break; /* continue normally. */ \
}
#define CHECK(expr) if (!expr) { assert(!upb_ok(status)); goto err; }
CHECK(upb_pullbuf(d));
if (upb_dispatch_startmsg(&d->dispatcher) != UPB_CONTINUE) goto err;
// Main loop: executed once per tag/field pair.
while(1) {
// Check for end-of-submessage.
while (d->ptr >= d->submsg_end) {
if (d->ptr > d->submsg_end) {
upb_seterr(d->status, UPB_ERROR, "Bad submessage end.");
goto err;
}
CHECK_FLOW(upb_pop(d));
}
/* The main decoding loop *****************************************************/
static void upb_unwind(upb_decoder *d) {
// TODO.
(void)d;
}
static void upb_delimend(upb_decoder *d) {
if (d->ptr > d->submsg_end) {
upb_seterr(d->status, UPB_ERROR, "Bad submessage end.");
upb_decoder_exit(d);
}
upb_pop(d);
}
static void upb_decoder_enterjit(upb_decoder *d) {
(void)d;
#ifdef UPB_USE_JIT_X64
if (d->jit_code && d->dispatcher.top == d->dispatcher.stack && d->ptr < d->jit_end) {
// Decodes as many fields as possible, updating d->ptr appropriately,
// before falling through to the slow(er) path.
#ifdef UPB_USE_JIT_X64
void (*upb_jit_decode)(upb_decoder *d) = (void*)d->jit_code;
if (d->jit_code && d->dispatcher.top == d->dispatcher.stack && d->ptr < d->jit_end) {
//const char *before = d->ptr;
//fprintf(stderr, "Entering JIT, JIT bytes left: %zd\n", d->jit_end - d->ptr);
upb_jit_decode(d);
//fprintf(stderr, "Exiting JIT, parsed %zd bytes\n", d->ptr - before);
//fprintf(stderr, "ptr: %p, effective_end: %p, jit_end: %p, effective_end-ptr=%d\n",
// d->ptr, d->effective_end, d->jit_end, d->effective_end - d->ptr);
}
upb_jit_decode(d);
}
#endif
}
// Parse/handle tag.
uint32_t tag;
if (!upb_decode_tag(d, &tag)) {
if (status->code == UPB_EOF && upb_dispatcher_stackempty(&d->dispatcher)) {
// Normal end-of-file.
upb_clearerr(status);
upb_dispatch_endmsg(&d->dispatcher, status);
return;
} else {
if (status->code == UPB_EOF) {
upb_seterr(status, UPB_ERROR,
"Input ended in the middle of a submessage.");
}
goto err;
}
INLINE upb_fieldent *upb_decode_tag(upb_decoder *d) {
while (1) {
uint32_t tag = upb_decode_varint32(d, false);
upb_fieldent *f = upb_dispatcher_lookup(&d->dispatcher, tag);
if (f) {
d->f = f;
return f;
}
// Decode wire data. Hopefully this branch will predict pretty well
// since most types will read a varint here.
upb_value val;
uint8_t wire_type = tag & 0x7;
switch (wire_type) {
case UPB_WIRE_TYPE_START_GROUP:
break; // Nothing to do now, below we will push appropriately.
case UPB_WIRE_TYPE_END_GROUP:
// Strictly speaking we should also check the field number here.
if(d->dispatcher.top->f->type != UPB_TYPE(GROUP)) {
upb_seterr(status, UPB_ERROR, "Unexpected END_GROUP tag.");
goto err;
}
CHECK_FLOW(upb_pop(d));
continue; // We have no value to dispatch.
case UPB_WIRE_TYPE_VARINT:
switch (tag & 0x7) {
case UPB_WIRE_TYPE_VARINT: upb_decode_varint(d); break;
case UPB_WIRE_TYPE_32BIT: upb_decoder_advance(d, 4); break;
case UPB_WIRE_TYPE_64BIT: upb_decoder_advance(d, 8); break;
case UPB_WIRE_TYPE_DELIMITED:
// For the delimited case we are parsing the length.
CHECK(upb_decode_varint(d, &val));
break;
case UPB_WIRE_TYPE_32BIT:
CHECK(upb_decode_fixed(d, 4, &val));
break;
case UPB_WIRE_TYPE_64BIT:
CHECK(upb_decode_fixed(d, 8, &val));
upb_decoder_advance(d, upb_decode_varint32(d, true));
break;
}
// TODO: deliver to unknown field callback.
while (d->ptr >= d->submsg_end) upb_delimend(d);
}
// Look up field by tag number.
upb_dispatcher_field *f = upb_dispatcher_lookup(&d->dispatcher, tag);
if (!f) {
if (wire_type == UPB_WIRE_TYPE_DELIMITED)
CHECK(upb_decode_string(d, &val, &d->tmp));
// TODO.
CHECK_FLOW(upb_dispatch_unknownval(&d->dispatcher, 0, UPB_NO_VALUE));
continue;
}
// Have to handle both packed and non-packed sequences of primitives.
//if (d->dispatcher.top->end_offset == UPB_REPEATEDEND && d->f != f) {
// upb_dispatch_endseq(&d->dispatcher);
//} else if (f->is_repeated_primitive) {
// if ((tag & 0x7) == UPB_WIRE_TYPE_DELIMITED) {
// upb_pushseq(d, f, upb_decode_varint32(d, true) + (d->ptr - d->buf));
// } else if (d->f != f) {
// upb_dispatch_startseq(d, f, UPB_REPEATEDEND);
// }
//}
}
// Perform any further massaging of the data now that we have the field's
// type. Now we can distinguish strings from submessages, and we know
// about zig-zag-encoded types.
// TODO: handle packed encoding.
// TODO: if we were being paranoid, we could check for 32-bit-varint types
// that the top 32 bits all match the highest bit of the low 32 bits.
// If this is not true we are losing data. But the main protobuf library
// doesn't check this, and it would slow us down, so pass for now.
switch (f->type) {
case UPB_TYPE(GROUP):
CHECK_FLOW(upb_push(d, f, UINT32_MAX));
continue; // We have no value to dispatch.
case UPB_TYPE(MESSAGE):
CHECK_FLOW(upb_push(d, f, upb_value_getuint32(val) + (d->ptr - d->buf)));
continue; // We have no value to dispatch.
case UPB_TYPE(STRING):
case UPB_TYPE(BYTES):
CHECK(upb_decode_string(d, &val, &d->tmp));
break;
case UPB_TYPE(SINT32):
upb_value_setint32(&val, upb_zzdec_32(upb_value_getint32(val)));
break;
case UPB_TYPE(SINT64):
upb_value_setint64(&val, upb_zzdec_64(upb_value_getint64(val)));
break;
default:
#ifndef NDEBUG
val.type = upb_types[f->type].inmemory_type;
#endif
break; // Other types need no further processing at this point.
}
CHECK_FLOW(upb_dispatch_value(&d->dispatcher, f, val));
void upb_decoder_onexit(upb_decoder *d) {
if (d->status->code == UPB_EOF && upb_dispatcher_stackempty(&d->dispatcher)) {
// Normal end-of-file.
upb_clearerr(d->status);
upb_dispatch_endmsg(&d->dispatcher, d->status);
} else {
if (d->status->code == UPB_EOF)
upb_seterr(d->status, UPB_ERROR, "Input ended mid-submessage.");
}
}
callback_err:
if (upb_ok(status)) {
upb_seterr(status, UPB_ERROR, "Callback returned UPB_BREAK");
void upb_decoder_decode(upb_decoder *d, upb_status *status) {
if (sigsetjmp(d->exitjmp, 0)) {
upb_decoder_onexit(d);
return;
}
d->status = status;
upb_pullbuf(d, true);
upb_dispatch_startmsg(&d->dispatcher);
while(1) { // Main loop: executed once per tag/field pair.
while (d->ptr >= d->submsg_end) upb_delimend(d);
upb_decoder_enterjit(d);
// if (!d->dispatcher.top->is_packed)
upb_fieldent *f = upb_decode_tag(d);
f->decode(d, f);
}
err:
assert(!upb_ok(status));
}
void upb_decoder_init(upb_decoder *d, upb_handlers *handlers) {
@ -363,9 +335,38 @@ void upb_decoder_init(upb_decoder *d, upb_handlers *handlers) {
if (d->dispatcher.handlers->should_jit) upb_decoder_makejit(d);
#endif
d->bufstr = NULL;
d->buf = NULL;
d->tmp = NULL;
upb_string_recycle(&d->tmp);
// Set function pointers for each field's decode function.
for (int i = 0; i < handlers->msgs_len; i++) {
upb_msgent *m = &handlers->msgs[i];
for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
i = upb_inttable_next(&m->fieldtab, i)) {
upb_fieldent *f = upb_inttable_iter_value(i);
switch (f->type) {
case UPB_TYPE(INT32): f->decode = &upb_decode_INT32; break;
case UPB_TYPE(INT64): f->decode = &upb_decode_INT64; break;
case UPB_TYPE(UINT32): f->decode = &upb_decode_UINT32; break;
case UPB_TYPE(UINT64): f->decode = &upb_decode_UINT64; break;
case UPB_TYPE(FIXED32): f->decode = &upb_decode_FIXED32; break;
case UPB_TYPE(FIXED64): f->decode = &upb_decode_FIXED64; break;
case UPB_TYPE(SFIXED32): f->decode = &upb_decode_SFIXED32; break;
case UPB_TYPE(SFIXED64): f->decode = &upb_decode_SFIXED64; break;
case UPB_TYPE(BOOL): f->decode = &upb_decode_BOOL; break;
case UPB_TYPE(ENUM): f->decode = &upb_decode_ENUM; break;
case UPB_TYPE(DOUBLE): f->decode = &upb_decode_DOUBLE; break;
case UPB_TYPE(FLOAT): f->decode = &upb_decode_FLOAT; break;
case UPB_TYPE(SINT32): f->decode = &upb_decode_SINT32; break;
case UPB_TYPE(SINT64): f->decode = &upb_decode_SINT64; break;
case UPB_TYPE(STRING): f->decode = &upb_decode_STRING; break;
case UPB_TYPE(BYTES): f->decode = &upb_decode_STRING; break;
case UPB_TYPE(GROUP): f->decode = &upb_decode_GROUP; break;
case UPB_TYPE(MESSAGE): f->decode = &upb_decode_MESSAGE; break;
case UPB_TYPE_ENDGROUP: f->decode = &upb_endgroup; break;
}
}
}
}
void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, void *closure) {

@ -17,6 +17,7 @@
#ifndef UPB_DECODER_H_
#define UPB_DECODER_H_
#include <setjmp.h>
#include <stdbool.h>
#include <stdint.h>
#include "upb_stream.h"
@ -60,6 +61,8 @@ struct _upb_decoder {
// MIN(end, submsg_end)
const char *effective_end;
upb_fieldent *f;
// Where we will store any errors that occur.
upb_status *status;
@ -72,6 +75,7 @@ struct _upb_decoder {
char *debug_info;
struct dasm_State *dynasm;
sigjmp_buf exitjmp;
};
// A upb_decoder decodes the binary protocol buffer format, writing the data it

@ -135,7 +135,7 @@ void upb_reg_jit_gdb(upb_decoder *d) {
|// Checks PTR for end-of-buffer.
|.macro check_eob, m
| cmp PTR, DECODER->effective_end
|| if (m->endgroup_f) {
|| if (m->is_group) {
| jae ->exit_jit
|| } else {
| jae =>m->jit_endofbuf_pclabel
@ -194,7 +194,7 @@ void upb_reg_jit_gdb(upb_decoder *d) {
|
|.macro setmsgend, m
| mov rsi, DECODER->jit_end
|| if (m->endgroup_f) {
|| if (m->is_group) {
| mov64 rax, 0xffffffffffffffff
| mov qword DECODER->submsg_end, rax
| mov DECODER->effective_end, rsi
@ -253,8 +253,8 @@ void upb_reg_jit_gdb(upb_decoder *d) {
// PTR should point to the beginning of the tag.
static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, uint32_t next_tag,
upb_handlers_msgent *m,
upb_handlers_fieldent *f, upb_handlers_fieldent *next_f) {
upb_msgent *m,
upb_fieldent *f, upb_fieldent *next_f) {
int tag_size = upb_value_size(tag);
// PC-label for the dispatch table.
@ -388,7 +388,7 @@ static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, uint32_t next_ta
| mov DECODER->dispatcher.top, rax
| mov FRAME, rax
upb_handlers_msgent *sub_m = upb_handlers_getmsgent(d->dispatcher.handlers, f);
upb_msgent *sub_m = upb_handlers_getmsgent(d->dispatcher.handlers, f);
if (sub_m->jit_parent_field_done_pclabel != UPB_MULTIPLE) {
| jmp =>sub_m->jit_startmsg_pclabel;
} else {
@ -433,10 +433,11 @@ static void upb_decoder_jit_field(upb_decoder *d, uint32_t tag, uint32_t next_ta
}
static int upb_compare_uint32(const void *a, const void *b) {
// TODO: always put ENDGROUP at the end.
return *(uint32_t*)a - *(uint32_t*)b;
}
static void upb_decoder_jit_msg(upb_decoder *d, upb_handlers_msgent *m) {
static void upb_decoder_jit_msg(upb_decoder *d, upb_msgent *m) {
|=>m->jit_startmsg_pclabel:
// Call startmsg handler (if any):
if (m->startmsg != upb_startmsg_nop) {
@ -466,32 +467,24 @@ static void upb_decoder_jit_msg(upb_decoder *d, upb_handlers_msgent *m) {
}
qsort(keys, num_keys, sizeof(uint32_t), &upb_compare_uint32);
upb_handlers_fieldent *last_f = NULL;
upb_fieldent *last_f = NULL;
uint32_t last_tag = 0;
for(int i = 0; i < num_keys; i++) {
uint32_t key = keys[i];
upb_handlers_fieldent *f = upb_inttable_lookup(&m->fieldtab, key);
uint32_t tag = upb_vencode(key);
upb_fieldent *f = upb_inttable_lookup(&m->fieldtab, key);
uint32_t tag = upb_vencode32(key);
if (last_f) upb_decoder_jit_field(d, last_tag, tag, m, last_f, f);
last_tag = tag;
last_f = f;
}
upb_decoder_jit_field(d, last_tag, 0, m, last_f, NULL);
free(keys);
if (m->endgroup_f) {
uint32_t tag = m->endgroup_f->number << 3 | UPB_WIRE_TYPE_END_GROUP;
upb_decoder_jit_field(d, last_tag, tag, m, last_f, m->endgroup_f);
upb_decoder_jit_field(d, tag, 0, m, m->endgroup_f, NULL);
} else {
upb_decoder_jit_field(d, last_tag, 0, m, last_f, NULL);
}
// --------- New code section (does not fall through) ------------------------
// End-of-buf / end-of-message.
if (!m->endgroup_f) {
if (!m->is_group) {
// This case doesn't exist for groups, because there eob really means
// eob, so that case just exits the jit directly.
|=>m->jit_endofbuf_pclabel:
@ -560,16 +553,14 @@ static void upb_decoder_jit(upb_decoder *d) {
| callp abort
}
void upb_decoder_jit_assignfieldlabs(upb_handlers_fieldent *f,
void upb_decoder_jit_assignfieldlabs(upb_fieldent *f,
uint32_t *pclabel_count) {
f->jit_pclabel = (*pclabel_count)++;
f->jit_pclabel_notypecheck = (*pclabel_count)++;
f->jit_submsg_done_pclabel = (*pclabel_count)++;
}
void upb_decoder_jit_assignmsglabs(upb_handlers *h,
upb_handlers_msgent *m,
uint32_t *pclabel_count) {
void upb_decoder_jit_assignmsglabs(upb_msgent *m, uint32_t *pclabel_count) {
m->jit_startmsg_pclabel = (*pclabel_count)++;
m->jit_endofbuf_pclabel = (*pclabel_count)++;
m->jit_endofmsg_pclabel = (*pclabel_count)++;
@ -581,30 +572,22 @@ void upb_decoder_jit_assignmsglabs(upb_handlers *h,
i = upb_inttable_next(&m->fieldtab, i)) {
uint32_t key = upb_inttable_iter_key(i);
m->max_field_number = UPB_MAX(m->max_field_number, key);
upb_handlers_fieldent *f = upb_inttable_iter_value(i);
upb_fieldent *f = upb_inttable_iter_value(i);
upb_decoder_jit_assignfieldlabs(f, pclabel_count);
if (f->type == UPB_TYPE(GROUP)) {
upb_handlers_msgent *sub_m = upb_handlers_getmsgent(h, f);
sub_m->endgroup_f = malloc(sizeof(*sub_m->endgroup_f));
memcpy(sub_m->endgroup_f, f, sizeof(*f));
sub_m->endgroup_f->type = UPB_TYPE_ENDGROUP;
upb_decoder_jit_assignfieldlabs(sub_m->endgroup_f, pclabel_count);
}
}
// XXX: Won't work for large field numbers; will need to use a upb_table.
// +2 to cover group case, in case group number is larger than all tags.
m->tablearray = malloc((m->max_field_number + 2) * sizeof(void*));
m->tablearray = malloc((m->max_field_number + 1) * sizeof(void*));
}
// Second pass: for messages that have only one parent, link them to the field
// from which they are called.
void upb_decoder_jit_assignmsglabs2(upb_handlers *h, upb_handlers_msgent *m) {
void upb_decoder_jit_assignmsglabs2(upb_handlers *h, upb_msgent *m) {
upb_inttable_iter i;
for(i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
i = upb_inttable_next(&m->fieldtab, i)) {
upb_handlers_fieldent *f = upb_inttable_iter_value(i);
upb_fieldent *f = upb_inttable_iter_value(i);
if (upb_issubmsgtype(f->type)) {
upb_handlers_msgent *sub_m = upb_handlers_getmsgent(h, f);
upb_msgent *sub_m = upb_handlers_getmsgent(h, f);
if (sub_m->jit_parent_field_done_pclabel == UPB_NONE) {
sub_m->jit_parent_field_done_pclabel = f->jit_submsg_done_pclabel;
} else {
@ -621,7 +604,7 @@ void upb_decoder_makejit(upb_decoder *d) {
uint32_t pclabel_count = 1;
upb_handlers *h = d->dispatcher.handlers;
for (int i = 0; i < h->msgs_len; i++)
upb_decoder_jit_assignmsglabs(h, &h->msgs[i], &pclabel_count);
upb_decoder_jit_assignmsglabs(&h->msgs[i], &pclabel_count);
for (int i = 0; i < h->msgs_len; i++)
upb_decoder_jit_assignmsglabs2(h, &h->msgs[i]);
@ -648,9 +631,9 @@ void upb_decoder_makejit(upb_decoder *d) {
// Create dispatch tables.
for (int i = 0; i < h->msgs_len; i++) {
upb_handlers_msgent *m = &h->msgs[i];
upb_msgent *m = &h->msgs[i];
for (uint32_t j = 0; j <= m->max_field_number; j++) {
upb_handlers_fieldent *f = NULL;
upb_fieldent *f = NULL;
for (int k = 0; k < 8; k++) {
f = upb_inttable_lookup(&m->fieldtab, (j << 3) | k);
if (f) break;
@ -662,9 +645,6 @@ void upb_decoder_makejit(upb_decoder *d) {
m->tablearray[j] = d->jit_code + dasm_getpclabel(d, 0);
}
}
if (m->endgroup_f) {
m->tablearray[m->endgroup_f->number] = d->jit_code + dasm_getpclabel(d, m->endgroup_f->jit_pclabel);
}
}
dasm_free(d);

@ -117,12 +117,14 @@ struct _upb_fielddef {
INLINE bool upb_issubmsgtype(upb_fieldtype_t type) {
return type == UPB_TYPE(GROUP) || type == UPB_TYPE(MESSAGE);
}
INLINE bool upb_issubmsg(upb_fielddef *f) {
return upb_issubmsgtype(f->type);
INLINE bool upb_isstringtype(upb_fieldtype_t type) {
return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES);
}
INLINE bool upb_isstring(upb_fielddef *f) {
return f->type == UPB_TYPE(STRING) || f->type == UPB_TYPE(BYTES);
INLINE bool upb_isprimitivetype(upb_fieldtype_t type) {
return !upb_issubmsgtype(type) && !upb_isstringtype(type);
}
INLINE bool upb_issubmsg(upb_fielddef *f) { return upb_issubmsgtype(f->type); }
INLINE bool upb_isstring(upb_fielddef *f) { return upb_isstringtype(f->type); }
INLINE bool upb_isarray(upb_fielddef *f) {
return f->label == UPB_LABEL(REPEATED);
}

@ -207,7 +207,7 @@ static upb_flow_t upb_msg_dispatch(upb_msg *msg, upb_msgdef *md,
upb_dispatcher *d);
static upb_flow_t upb_msg_pushval(upb_value val, upb_fielddef *f,
upb_dispatcher *d, upb_handlers_fieldent *hf) {
upb_dispatcher *d, upb_fieldent *hf) {
#define CHECK_FLOW(x) do { \
upb_flow_t flow = x; if (flow != UPB_CONTINUE) return flow; \
} while(0)
@ -237,7 +237,7 @@ static upb_flow_t upb_msg_dispatch(upb_msg *msg, upb_msgdef *md,
for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) {
upb_fielddef *f = upb_msg_iter_field(i);
if (!upb_msg_has(msg, f)) continue;
upb_handlers_fieldent *hf = upb_dispatcher_lookup(d, f->number);
upb_fieldent *hf = upb_dispatcher_lookup(d, f->number);
if (!hf) continue;
upb_value val = upb_msg_get(msg, f);
if (upb_isarray(f)) {
@ -464,11 +464,12 @@ upb_sflow_t upb_msgsink_startsubmsg_r(void *_m, upb_value _fval) {
void upb_msg_regdhandlers(upb_handlers *h) {
upb_register_all(h, NULL, NULL, NULL, NULL, NULL, NULL);
for (int i = 0; i < h->msgs_len; i++) {
upb_handlers_msgent *m = &h->msgs[i];
upb_msgent *m = &h->msgs[i];
upb_inttable_iter iter = upb_inttable_begin(&m->fieldtab);
for(; !upb_inttable_done(iter);
iter = upb_inttable_next(&m->fieldtab, iter)) {
upb_handlers_fieldent *fe = upb_inttable_iter_value(iter);
upb_fieldent *fe = upb_inttable_iter_value(iter);
if (fe->type == UPB_TYPE_ENDGROUP) continue;
upb_fielddef *f = upb_value_getfielddef(fe->fval);
uint16_t msg_size = 0;
uint8_t set_flags_bytes = 0;

@ -47,13 +47,13 @@ upb_flow_t upb_unknownval_nop(void *closure, upb_field_number_t fieldnum,
return UPB_CONTINUE;
}
static void upb_msgent_init(upb_handlers_msgent *e) {
upb_inttable_init(&e->fieldtab, 8, sizeof(upb_handlers_fieldent));
static void upb_msgent_init(upb_msgent *e) {
upb_inttable_init(&e->fieldtab, 8, sizeof(upb_fieldent));
e->startmsg = &upb_startmsg_nop;
e->endmsg = &upb_endmsg_nop;
e->unknownval = &upb_unknownval_nop;
e->endgroup_f = NULL;
e->tablearray = NULL;
e->is_group = false;
}
void upb_handlers_init(upb_handlers *h, upb_msgdef *md) {
@ -76,20 +76,19 @@ void upb_handlers_uninit(upb_handlers *h) {
for (int i = 0; i < h->msgs_len; i++) {
upb_inttable_free(&h->msgs[i].fieldtab);
free(h->msgs[i].tablearray);
free(h->msgs[i].endgroup_f);
}
free(h->msgs);
upb_msgdef_unref(h->toplevel_msgdef);
}
static upb_handlers_fieldent *upb_handlers_getorcreate_without_fval(
static upb_fieldent *upb_handlers_getorcreate_without_fval(
upb_handlers *h, upb_field_number_t fieldnum, upb_fieldtype_t type, bool repeated) {
uint32_t tag = fieldnum << 3 | upb_types[type].native_wire_type;
upb_handlers_fieldent *f =
upb_inttable_lookup(&h->msgent->fieldtab, tag);
upb_fieldent *f = upb_inttable_lookup(&h->msgent->fieldtab, tag);
if (!f) {
upb_handlers_fieldent new_f = {false, type, repeated, fieldnum, -1, UPB_NO_VALUE,
{&upb_value_nop}, &upb_endsubmsg_nop, 0, 0, 0};
upb_fieldent new_f = {false, type, repeated,
repeated && upb_isprimitivetype(type), fieldnum, -1, UPB_NO_VALUE,
{&upb_value_nop}, &upb_endsubmsg_nop, 0, 0, 0, NULL};
if (upb_issubmsgtype(type)) new_f.cb.startsubmsg = &upb_startsubmsg_nop;
upb_inttable_insert(&h->msgent->fieldtab, tag, &new_f);
@ -100,10 +99,10 @@ static upb_handlers_fieldent *upb_handlers_getorcreate_without_fval(
return f;
}
static upb_handlers_fieldent *upb_handlers_getorcreate(
static upb_fieldent *upb_handlers_getorcreate(
upb_handlers *h, upb_field_number_t fieldnum,
upb_fieldtype_t type, bool repeated, upb_value fval) {
upb_handlers_fieldent *f =
upb_fieldent *f =
upb_handlers_getorcreate_without_fval(h, fieldnum, type, repeated);
f->fval = fval;
return f;
@ -164,7 +163,7 @@ void upb_register_typed_submsg(upb_handlers *h, upb_field_number_t fieldnum,
upb_startsubmsg_handler_t start,
upb_endsubmsg_handler_t end,
upb_value fval) {
upb_handlers_fieldent *f = upb_handlers_getorcreate(h, fieldnum, type, repeated, fval);
upb_fieldent *f = upb_handlers_getorcreate(h, fieldnum, type, repeated, fval);
f->cb.startsubmsg = start ? start : &upb_startsubmsg_nop;
f->endsubmsg = end ? end : &upb_endsubmsg_nop;
}
@ -172,14 +171,14 @@ void upb_register_typed_submsg(upb_handlers *h, upb_field_number_t fieldnum,
void upb_handlers_typed_link(upb_handlers *h, upb_field_number_t fieldnum,
upb_fieldtype_t type, bool repeated, int frames) {
assert(frames <= (h->top - h->stack));
upb_handlers_fieldent *f =
upb_fieldent *f =
upb_handlers_getorcreate_without_fval(h, fieldnum, type, repeated);
f->msgent_index = (h->top - frames)->msgent_index;
}
void upb_handlers_typed_push(upb_handlers *h, upb_field_number_t fieldnum,
upb_fieldtype_t type, bool repeated) {
upb_handlers_fieldent *f =
upb_fieldent *f =
upb_handlers_getorcreate_without_fval(h, fieldnum, type, repeated);
if (h->top == h->limit) abort(); // TODO: make growable.
++h->top;
@ -201,6 +200,15 @@ void upb_handlers_typed_push(upb_handlers *h, upb_field_number_t fieldnum,
assert(f);
h->top->msgdef = upb_downcast_msgdef(f->def);
}
if (type == UPB_TYPE(GROUP)) {
// Insert a fieldent for ENDGROUP so we can easily dispatch endgroup when
// we see it in the submessage.
// TODO: assert that no other fields in the group are registered with the
// same name or number.
upb_register_typed_submsg(h, fieldnum, UPB_TYPE_ENDGROUP, false, NULL, NULL,
UPB_NO_VALUE);
h->msgent->is_group = true;
}
}
void upb_handlers_push(upb_handlers *h, upb_fielddef *f,
@ -226,15 +234,15 @@ void upb_handlers_pop(upb_handlers *h, upb_fielddef *f) {
/* upb_dispatcher *************************************************************/
static upb_handlers_fieldent toplevel_f = {
false, UPB_TYPE(GROUP), false, 0,
static upb_fieldent toplevel_f = {
false, UPB_TYPE(GROUP), false, false, 0,
0, // msgent_index
#ifdef NDEBUG
{{0}},
#else
{{0}, UPB_VALUETYPE_RAW},
#endif
{NULL}, NULL, 0, 0, 0};
{NULL}, NULL, 0, 0, 0, NULL};
void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h) {
d->handlers = h;
@ -255,6 +263,7 @@ void upb_dispatcher_reset(upb_dispatcher *d, void *top_closure, uint32_t top_end
d->top = d->stack;
d->top->closure = top_closure;
d->top->end_offset = top_end_offset;
d->top->is_packed = false;
}
void upb_dispatcher_uninit(upb_dispatcher *d) {
@ -285,8 +294,7 @@ void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status) {
upb_copyerr(status, &d->status);
}
upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d,
upb_dispatcher_field *f,
upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, upb_fieldent *f,
size_t userval) {
++d->current_depth;
if (upb_dispatcher_skipping(d)) return UPB_SKIPSUBMSG;
@ -308,6 +316,7 @@ upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d,
d->top->f = f;
d->top->end_offset = userval;
d->top->closure = sflow.closure;
d->top->is_packed = false;
d->msgent = upb_handlers_getmsgent(d->handlers, f);
d->dispatch_table = &d->msgent->fieldtab;
return upb_dispatch_startmsg(d);
@ -319,7 +328,7 @@ upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d) {
flow = UPB_SKIPSUBMSG;
} else {
assert(d->top > d->stack);
upb_dispatcher_field *old_f = d->top->f;
upb_fieldent *old_f = d->top->f;
d->msgent->endmsg(d->top->closure, &d->status);
--d->top;
d->msgent = upb_handlers_getmsgent(d->handlers, d->top->f);

@ -88,11 +88,12 @@ upb_sflow_t upb_startsubmsg_nop(void *closure, upb_value fval);
upb_flow_t upb_endsubmsg_nop(void *closure, upb_value fval);
upb_flow_t upb_unknownval_nop(void *closure, upb_field_number_t fieldnum,
upb_value val);
typedef struct {
struct _upb_decoder;
typedef struct _upb_fieldent {
bool junk;
upb_fieldtype_t type;
bool repeated;
bool is_repeated_primitive;
uint32_t number;
// For upb_issubmsg(f) only, the index into the msgdef array of the submsg.
// -1 if unset (indicates that submsg should be skipped).
@ -106,23 +107,26 @@ typedef struct {
uint32_t jit_pclabel;
uint32_t jit_pclabel_notypecheck;
uint32_t jit_submsg_done_pclabel;
} upb_handlers_fieldent;
void (*decode)(struct _upb_decoder *d, struct _upb_fieldent *f);
} upb_fieldent;
typedef struct _upb_handlers_msgent {
typedef struct _upb_msgent {
upb_startmsg_handler_t startmsg;
upb_endmsg_handler_t endmsg;
upb_unknownval_handler_t unknownval;
// Maps field number -> upb_handlers_fieldent.
// Maps field number -> upb_fieldent.
upb_inttable fieldtab;
uint32_t jit_startmsg_pclabel;
uint32_t jit_endofbuf_pclabel;
uint32_t jit_endofmsg_pclabel;
uint32_t jit_unknownfield_pclabel;
upb_handlers_fieldent *endgroup_f; // NULL if not a group.
bool is_group;
int32_t jit_parent_field_done_pclabel;
uint32_t max_field_number;
// Currently keyed on field number. Could also try keying it
// on encoded or decoded tag, or on encoded field number.
void **tablearray;
} upb_handlers_msgent;
} upb_msgent;
typedef struct {
upb_msgdef *msgdef;
@ -131,10 +135,10 @@ typedef struct {
struct _upb_handlers {
// Array of msgdefs, [0]=toplevel.
upb_handlers_msgent *msgs;
upb_msgent *msgs;
int msgs_len, msgs_size;
upb_msgdef *toplevel_msgdef; // We own a ref.
upb_handlers_msgent *msgent;
upb_msgent *msgent;
upb_handlers_frame stack[UPB_MAX_TYPE_DEPTH], *top, *limit;
bool should_jit;
};
@ -272,12 +276,11 @@ void upb_handlers_typed_push(upb_handlers *h, upb_field_number_t fieldnum,
upb_fieldtype_t type, bool repeated);
void upb_handlers_typed_pop(upb_handlers *h);
INLINE upb_handlers_msgent *upb_handlers_getmsgent(upb_handlers *h,
upb_handlers_fieldent *f) {
INLINE upb_msgent *upb_handlers_getmsgent(upb_handlers *h, upb_fieldent *f) {
assert(f->msgent_index != -1);
return &h->msgs[f->msgent_index];
}
upb_handlers_fieldent *upb_handlers_lookup(upb_inttable *dispatch_table, upb_field_number_t fieldnum);
upb_fieldent *upb_handlers_lookup(upb_inttable *dispatch_table, upb_field_number_t fieldnum);
/* upb_dispatcher *************************************************************/
@ -298,11 +301,12 @@ upb_handlers_fieldent *upb_handlers_lookup(upb_inttable *dispatch_table, upb_fie
// consumed, like if this is a submessage of a larger stream.
typedef struct {
upb_handlers_fieldent *f;
upb_fieldent *f;
void *closure;
// Relative to the beginning of this buffer.
// For groups and the top-level: UINT32_MAX.
uint32_t end_offset;
bool is_packed; // == !upb_issubmsg(f) && end_offset != UPB_REPATEDEND
} upb_dispatcher_frame;
typedef struct {
@ -311,7 +315,7 @@ typedef struct {
upb_handlers *handlers;
// Msg and dispatch table for the current level.
upb_handlers_msgent *msgent;
upb_msgent *msgent;
upb_inttable *dispatch_table;
// The number of startsubmsg calls without a corresponding endsubmsg call.
@ -342,8 +346,6 @@ INLINE bool upb_dispatcher_noframe(upb_dispatcher *d) {
}
typedef upb_handlers_fieldent upb_dispatcher_field;
void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h);
void upb_dispatcher_reset(upb_dispatcher *d, void *top_closure, uint32_t top_end_offset);
void upb_dispatcher_uninit(upb_dispatcher *d);
@ -352,20 +354,20 @@ upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d);
void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status);
// Looks up a field by number for the current message.
INLINE upb_dispatcher_field *upb_dispatcher_lookup(upb_dispatcher *d,
upb_field_number_t n) {
return (upb_dispatcher_field*)upb_inttable_fastlookup(
d->dispatch_table, n, sizeof(upb_dispatcher_field));
INLINE upb_fieldent *upb_dispatcher_lookup(upb_dispatcher *d,
upb_field_number_t n) {
return (upb_fieldent*)upb_inttable_fastlookup(
d->dispatch_table, n, sizeof(upb_fieldent));
}
// Dispatches values or submessages -- the client is responsible for having
// previously looked up the field.
upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d,
upb_dispatcher_field *f,
upb_fieldent *f,
size_t userval);
upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d);
INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, upb_dispatcher_field *f,
INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, upb_fieldent *f,
upb_value val) {
if (upb_dispatcher_skipping(d)) return UPB_SKIPSUBMSG;
upb_flow_t flow = f->cb.value(d->top->closure, f->fval, val);

@ -182,6 +182,11 @@ INLINE void upb_string_recycle(upb_string **_str) {
str->len = 0;
_upb_string_release(str);
} else {
//if (!str) {
// printf("!str\n");
//}
//else if (upb_atomic_read(&str->refcount) != 1) { printf("refcount: %d\n", upb_atomic_read(&str->refcount)); }
//else { printf("Some other reason.\n"); }
upb_string_unref(str);
*_str = upb_string_new();
}

@ -7,9 +7,10 @@
#include "upb_textprinter.h"
#include <ctype.h>
#include <float.h>
#include <inttypes.h>
#include <stdlib.h>
#include <ctype.h>
struct _upb_textprinter {
upb_bytesink *bytesink;
@ -99,10 +100,12 @@ static upb_flow_t upb_textprinter_value(void *_p, upb_value fval,
#define CASE(fmtstr, member) \
CHECK(upb_bytesink_printf(p->bytesink, &p->status, fmtstr, upb_value_get ## member(val))); break;
switch(f->type) {
// TODO: figure out what we should really be doing for these
// floating-point formats.
case UPB_TYPE(DOUBLE):
CASE("%0.f", double);
CHECK(upb_bytesink_printf(p->bytesink, &p->status, "%.*g", DBL_DIG, upb_value_getdouble(val))); break;
case UPB_TYPE(FLOAT):
CASE("%0.f", float)
CHECK(upb_bytesink_printf(p->bytesink, &p->status, "%.*g", FLT_DIG+2, upb_value_getfloat(val))); break;
case UPB_TYPE(INT64):
case UPB_TYPE(SFIXED64):
case UPB_TYPE(SINT64):

@ -0,0 +1,54 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2011 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*/
#include "upb_varint.h"
// Given an encoded varint v, returns an integer with a single bit set that
// indicates the end of the varint. Subtracting one from this value will
// yield a mask that leaves only bits that are part of the varint. Returns
// 0 if the varint is unterminated.
INLINE uint64_t upb_get_vstopbit(uint64_t v) {
uint64_t cbits = v | 0x7f7f7f7f7f7f7f7fULL;
return ~cbits & (cbits+1);
}
INLINE uint64_t upb_get_vmask(uint64_t v) { return upb_get_vstopbit(v) - 1; }
upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r) {
uint64_t b;
memcpy(&b, r.p, sizeof(b));
uint64_t stop_bit = upb_get_vstopbit(b);
b = (b & 0x7f7f7f7f7f7f7f7fULL) & (stop_bit - 1);
b += b & 0x007f007f007f007fULL;
b += 3 * (b & 0x0000ffff0000ffffULL);
b += 15 * (b & 0x00000000ffffffffULL);
if (stop_bit == 0) {
// Error: unterminated varint.
upb_decoderet err_r = {(void*)0, 0};
return err_r;
}
upb_decoderet my_r = {r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
r.val | (b << 7)};
return my_r;
}
upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) {
uint64_t b;
memcpy(&b, r.p, sizeof(b));
uint64_t stop_bit = upb_get_vstopbit(b);
b &= (stop_bit - 1);
b = ((b & 0x7f007f007f007f00) >> 1) | (b & 0x007f007f007f007f);
b = ((b & 0xffff0000ffff0000) >> 2) | (b & 0x0000ffff0000ffff);
b = ((b & 0xffffffff00000000) >> 4) | (b & 0x00000000ffffffff);
if (stop_bit == 0) {
// Error: unterminated varint.
upb_decoderet err_r = {(void*)0, 0};
return err_r;
}
upb_decoderet my_r = {r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
r.val | (b << 14)};
return my_r;
}

@ -75,53 +75,11 @@ done:
return r;
}
// Given an encoded varint v, returns an integer with a single bit set that
// indicates the end of the varint. Subtracting one from this value will
// yield a mask that leaves only bits that are part of the varint. Returns
// 0 if the varint is unterminated.
INLINE uint64_t upb_get_vstopbit(uint64_t v) {
uint64_t cbits = v | 0x7f7f7f7f7f7f7f7fULL;
return ~cbits & (cbits+1);
}
INLINE uint64_t upb_get_vmask(uint64_t v) { return upb_get_vstopbit(v) - 1; }
// Decodes a varint of at most 8 bytes without branching (except for error).
INLINE upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) {
uint64_t b;
memcpy(&b, r.p, sizeof(b));
uint64_t stop_bit = upb_get_vstopbit(b);
b &= (stop_bit - 1);
b = ((b & 0x7f007f007f007f00) >> 1) | (b & 0x007f007f007f007f);
b = ((b & 0xffff0000ffff0000) >> 2) | (b & 0x0000ffff0000ffff);
b = ((b & 0xffffffff00000000) >> 4) | (b & 0x00000000ffffffff);
if (stop_bit == 0) {
// Error: unterminated varint.
upb_decoderet err_r = {(void*)0, 0};
return err_r;
}
upb_decoderet my_r = {r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
r.val | (b << 14)};
return my_r;
}
upb_decoderet upb_vdecode_max8_wright(upb_decoderet r);
// Another implementation of the previous.
INLINE upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r) {
uint64_t b;
memcpy(&b, r.p, sizeof(b));
uint64_t stop_bit = upb_get_vstopbit(b);
b = (b & 0x7f7f7f7f7f7f7f7fULL) & (stop_bit - 1);
b += b & 0x007f007f007f007fULL;
b += 3 * (b & 0x0000ffff0000ffffULL);
b += 15 * (b & 0x00000000ffffffffULL);
if (stop_bit == 0) {
// Error: unterminated varint.
upb_decoderet err_r = {(void*)0, 0};
return err_r;
}
upb_decoderet my_r = {r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
r.val | (b << 7)};
return my_r;
}
upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r);
// Template for a function that checks the first two bytes with branching
// and dispatches 2-10 bytes with a separate function.
@ -169,8 +127,8 @@ INLINE size_t upb_value_size(uint64_t val) {
return val == 0 ? 1 : high_bit / 8 + 1;
}
// Currently only works with 32-bit varints.
INLINE uint64_t upb_vencode(uint32_t val) {
// Encodes a 32-bit varint, *not* sign-extended.
INLINE uint64_t upb_vencode32(uint32_t val) {
uint64_t ret = 0;
for (int bitpos = 0; val; bitpos+=8, val >>=7) {
if (bitpos > 0) ret |= (1 << (bitpos-1));

@ -2,38 +2,73 @@
#include "upb_decoder.h"
#include "upb_textprinter.h"
#include "upb_stdio.h"
#include "upb_glue.h"
int main(int argc, char *argv[]) {
if (argc < 3) {
fprintf(stderr, "Usage: test_decoder <descfile> <msgname>\n");
return 1;
}
int main() {
upb_symtab *symtab = upb_symtab_new();
upb_symtab_add_descriptorproto(symtab);
upb_def *fds = upb_symtab_lookup(
symtab, UPB_STRLIT("google.protobuf.FileDescriptorSet"));
upb_string *desc = upb_strreadfile(argv[1]);
if (!desc) {
fprintf(stderr, "Couldn't open descriptor file: %s\n", argv[1]);
return 1;
}
upb_status status = UPB_STATUS_INIT;
upb_parsedesc(symtab, desc, &status);
if (!upb_ok(&status)) {
fprintf(stderr, "Error parsing descriptor: ");
upb_printerr(&status);
return 1;
}
upb_string_unref(desc);
upb_string *name = upb_strdupc(argv[2]);
upb_def *md = upb_symtab_lookup(symtab, name);
upb_string_unref(name);
if (!md) {
fprintf(stderr, "Descriptor did not contain message: %s\n", argv[2]);
return 1;
}
upb_msgdef *m = upb_dyncast_msgdef(md);
if (!m) {
fprintf(stderr, "Def was not a msgdef.\n");
return 1;
}
upb_stdio *in = upb_stdio_new();
upb_stdio_reset(in, stdin);
upb_stdio *out = upb_stdio_new();
upb_stdio_reset(out, stdout);
upb_decoder d;
upb_decoder_init(&d, upb_downcast_msgdef(fds));
upb_decoder_reset(&d, upb_stdio_bytesrc(in));
upb_textprinter *p = upb_textprinter_new();
upb_handlers handlers;
upb_handlers_init(&handlers);
upb_textprinter_reset(p, &handlers, upb_stdio_bytesink(out), false);
upb_src *src = upb_decoder_src(&d);
upb_src_sethandlers(src, &handlers);
upb_handlers_init(&handlers, m);
upb_textprinter *p = upb_textprinter_new();
upb_textprinter_reset(p, upb_stdio_bytesink(out), false);
upb_textprinter_reghandlers(&handlers);
upb_status status = UPB_STATUS_INIT;
upb_src_run(src, &status);
upb_decoder d;
upb_decoder_init(&d, &handlers);
upb_decoder_reset(&d, upb_stdio_bytesrc(in), p);
upb_clearerr(&status);
upb_decoder_decode(&d, &status);
assert(upb_ok(&status));
if (!upb_ok(&status)) {
fprintf(stderr, "Error parsing input: ");
upb_printerr(&status);
}
upb_status_uninit(&status);
upb_stdio_free(in);
upb_stdio_free(out);
upb_decoder_uninit(&d);
upb_textprinter_free(p);
upb_def_unref(fds);
upb_def_unref(UPB_UPCAST(m));
upb_symtab_unref(symtab);
// Prevent C library from holding buffers open, so Valgrind doesn't see

Loading…
Cancel
Save