Protocol Buffers - Google's data interchange format (grpc依赖) https://developers.google.com/protocol-buffers/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

439 lines
14 KiB

16 years ago
/*
* upb - a minimalist implementation of protocol buffers.
16 years ago
*
* Copyright (c) 2008-2011 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
16 years ago
*/
#include <inttypes.h>
#include <stddef.h>
#include <stdlib.h>
#include "upb/bytestream.h"
#include "upb/msg.h"
#include "upb/pb/decoder.h"
#include "upb/pb/varint.h"
#ifdef UPB_USE_JIT_X64
#define Dst_DECL upb_decoder *d
#define Dst_REF (d->dynasm)
#define Dst (d)
#include "dynasm/dasm_proto.h"
#include "upb/pb/decoder_x64.h"
#endif
// It's unfortunate that we have to micro-manage the compiler this way,
// especially since this tuning is necessarily specific to one hardware
// configuration. But emperically on a Core i7, performance increases 30-50%
// with these annotations. Every instance where these appear, gcc 4.2.1 made
// the wrong decision and degraded performance in benchmarks.
#define FORCEINLINE static __attribute__((always_inline))
#define NOINLINE static __attribute__((noinline))
static void upb_decoder_exit(upb_decoder *d) {
13 years ago
// Resumable decoder would back out to completed_ptr (and possibly get a
// previous buffer).
siglongjmp(d->exitjmp, 1);
}
static void upb_decoder_exit2(void *_d) {
upb_decoder *d = _d;
upb_decoder_exit(d);
}
static void upb_decoder_abort(upb_decoder *d, const char *msg) {
upb_status_seterrliteral(d->status, msg);
upb_decoder_exit(d);
}
/* Buffering ******************************************************************/
// We operate on one buffer at a time, which may be a subset of the currently
// loaded byteregion data. When data for the buffer is completely gone we pull
// the next one. When we've committed our progress we discard any previous
// buffers' regions.
static uint32_t upb_decoder_bufleft(upb_decoder *d) { return d->end - d->ptr; }
static void upb_decoder_advance(upb_decoder *d, uint32_t len) {
assert(upb_decoder_bufleft(d) >= len);
d->ptr += len;
}
upb_stream: all callbacks registered ahead-of-time. This is a significant change to the upb_stream protocol, and should hopefully be the last significant change. All callbacks are now registered ahead-of-time instead of having delegated callbacks registered at runtime, which makes it much easier to aggressively optimize ahead-of-time (like with a JIT). Other impacts of this change: - You no longer need to have loaded descriptor.proto as a upb_def to load other descriptors! This means the special-case code we used for bootstrapping is no longer necessary, and we no longer need to link the descriptor for descriptor.proto into upb. - A client can now register any upb_value as what will be delivered to their value callback, not just a upb_fielddef*. This should allow for other clients to get more bang out of the streaming decoder. This change unfortunately causes a bit of a performance regression -- I think largely due to highly suboptimal code that GCC generates when structs are returned by value. See: http://blog.reverberate.org/2011/03/19/when-a-compilers-slow-code-actually-bites-you/ On the other hand, once we have a JIT this should no longer matter. Performance numbers: plain.parsestream_googlemessage1.upb_table: 374 -> 396 (5.88) plain.parsestream_googlemessage2.upb_table: 616 -> 449 (-27.11) plain.parsetostruct_googlemessage1.upb_table_byref: 268 -> 269 (0.37) plain.parsetostruct_googlemessage1.upb_table_byval: 215 -> 204 (-5.12) plain.parsetostruct_googlemessage2.upb_table_byref: 307 -> 281 (-8.47) plain.parsetostruct_googlemessage2.upb_table_byval: 297 -> 272 (-8.42) omitfp.parsestream_googlemessage1.upb_table: 423 -> 410 (-3.07) omitfp.parsestream_googlemessage2.upb_table: 679 -> 483 (-28.87) omitfp.parsetostruct_googlemessage1.upb_table_byref: 287 -> 282 (-1.74) omitfp.parsetostruct_googlemessage1.upb_table_byval: 226 -> 219 (-3.10) omitfp.parsetostruct_googlemessage2.upb_table_byref: 315 -> 298 (-5.40) omitfp.parsetostruct_googlemessage2.upb_table_byval: 297 -> 287 (-3.37)
14 years ago
uint64_t upb_decoder_offset(upb_decoder *d) {
return d->bufstart_ofs + (d->ptr - d->buf);
}
uint64_t upb_decoder_bufendofs(upb_decoder *d) {
return d->bufstart_ofs + (d->end - d->buf);
}
static void upb_decoder_setmsgend(upb_decoder *d) {
upb_dispatcher_frame *f = d->dispatcher.top;
uint32_t delimlen = f->end_ofs - d->bufstart_ofs;
uint32_t buflen = d->end - d->buf;
13 years ago
d->delim_end = (f->end_ofs != UPB_NONDELIMITED && delimlen <= buflen) ?
d->buf + delimlen : NULL; // NULL if not in this buf.
d->top_is_packed = f->is_packed;
}
static bool upb_trypullbuf(upb_decoder *d) {
assert(upb_decoder_bufleft(d) == 0);
d->bufstart_ofs = upb_decoder_offset(d);
d->buf = NULL;
d->ptr = NULL;
d->end = NULL;
if (upb_byteregion_available(d->input, upb_decoder_offset(d)) == 0 &&
!upb_byteregion_fetch(d->input, d->status)) {
if (upb_eof(d->status)) return false;
upb_decoder_exit(d); // Non-EOF error.
}
uint32_t len;
d->buf = upb_byteregion_getptr(d->input, d->bufstart_ofs, &len);
assert(len > 0);
d->ptr = d->buf;
d->end = d->buf + len;
#ifdef UPB_USE_JIT_X64
// If we start parsing a value, we can parse up to 20 bytes without
// having to bounds-check anything (2 10-byte varints). Since the
// JIT bounds-checks only *between* values (and for strings), the
// JIT bails if there are not 20 bytes available.
d->jit_end = d->end - 20;
#endif
upb_decoder_setmsgend(d);
return true;
}
static void upb_pullbuf(upb_decoder *d) {
if (!upb_trypullbuf(d)) upb_decoder_abort(d, "Unexpected EOF");
}
void upb_decoder_skipto(upb_decoder *d, uint64_t ofs) {
if (ofs < upb_decoder_bufendofs(d)) {
upb_decoder_advance(d, ofs - upb_decoder_offset(d));
} else {
d->buf = NULL;
d->ptr = NULL;
d->end = NULL;
d->bufstart_ofs = ofs;
}
}
void upb_decoder_checkpoint(upb_decoder *d) {
upb_byteregion_discard(d->input, upb_decoder_offset(d));
}
/* Decoding of wire types *****************************************************/
NOINLINE uint64_t upb_decode_varint_slow(upb_decoder *d) {
uint8_t byte = 0x80;
uint64_t u64 = 0;
int bitpos;
const char *ptr = d->ptr;
for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
if (upb_decoder_bufleft(d) == 0) {
upb_pullbuf(d);
ptr = d->ptr;
}
u64 |= ((uint64_t)(byte = *ptr++) & 0x7F) << bitpos;
}
if(bitpos == 70 && (byte & 0x80)) upb_decoder_abort(d, "Unterminated varint");
return u64;
}
// For tags and delimited lengths, which must be <=32bit and are usually small.
FORCEINLINE uint32_t upb_decode_varint32(upb_decoder *d) {
Remove upb_dstate and specialize upb_decode_fixed for perf improvement. The compiler wasn't keeping upb_dstate in memory anyway (which was the original goal). This simplifies the decoder. upb_decode_fixed was intended to minimize the number of branches, but since it was calling out to memcpy as a function, this turned out to be a pessimization. Performance is encouraging: plain32.parsestream_googlemessage1.upb_table: 254 -> 242 (-4.72) plain32.parsestream_googlemessage2.upb_table: 357 -> 400 (12.04) plain32.parsetostruct_googlemessage1.upb_table_byref: 143 -> 144 (0.70) plain32.parsetostruct_googlemessage1.upb_table_byval: 122 -> 118 (-3.28) plain32.parsetostruct_googlemessage2.upb_table_byref: 189 -> 200 (5.82) plain32.parsetostruct_googlemessage2.upb_table_byval: 198 -> 200 (1.01) omitfp32.parsestream_googlemessage1.upb_table: 267 -> 265 (-0.75) omitfp32.parsestream_googlemessage2.upb_table: 377 -> 465 (23.34) omitfp32.parsetostruct_googlemessage1.upb_table_byref: 140 -> 151 (7.86) omitfp32.parsetostruct_googlemessage1.upb_table_byval: 131 -> 131 (0.00) omitfp32.parsetostruct_googlemessage2.upb_table_byref: 204 -> 214 (4.90) omitfp32.parsetostruct_googlemessage2.upb_table_byval: 200 -> 206 (3.00) plain.parsestream_googlemessage1.upb_table: 313 -> 317 (1.28) plain.parsestream_googlemessage2.upb_table: 476 -> 541 (13.66) plain.parsetostruct_googlemessage1.upb_table_byref: 189 -> 189 (0.00) plain.parsetostruct_googlemessage1.upb_table_byval: 165 -> 165 (0.00) plain.parsetostruct_googlemessage2.upb_table_byref: 263 -> 270 (2.66) plain.parsetostruct_googlemessage2.upb_table_byval: 248 -> 255 (2.82) omitfp.parsestream_googlemessage1.upb_table: 306 -> 305 (-0.33) omitfp.parsestream_googlemessage2.upb_table: 471 -> 531 (12.74) omitfp.parsetostruct_googlemessage1.upb_table_byref: 189 -> 190 (0.53) omitfp.parsetostruct_googlemessage1.upb_table_byval: 166 -> 172 (3.61) omitfp.parsetostruct_googlemessage2.upb_table_byref: 258 -> 270 (4.65) omitfp.parsetostruct_googlemessage2.upb_table_byval: 248 -> 265 (6.85)
14 years ago
const char *p = d->ptr;
uint32_t ret;
uint64_t u64;
// Nearly all will be either 1 byte (1-16) or 2 bytes (17-2048).
if (upb_decoder_bufleft(d) < 2) goto slow; // unlikely.
ret = *p & 0x7f;
if ((*(p++) & 0x80) == 0) goto done; // predictable if fields are in order
ret |= (*p & 0x7f) << 7;
if ((*(p++) & 0x80) == 0) goto done; // likely
slow:
u64 = upb_decode_varint_slow(d);
if (u64 > 0xffffffff) upb_decoder_abort(d, "Unterminated 32-bit varint");
ret = (uint32_t)u64;
p = d->ptr; // Turn the next line into a nop.
done:
Remove upb_dstate and specialize upb_decode_fixed for perf improvement. The compiler wasn't keeping upb_dstate in memory anyway (which was the original goal). This simplifies the decoder. upb_decode_fixed was intended to minimize the number of branches, but since it was calling out to memcpy as a function, this turned out to be a pessimization. Performance is encouraging: plain32.parsestream_googlemessage1.upb_table: 254 -> 242 (-4.72) plain32.parsestream_googlemessage2.upb_table: 357 -> 400 (12.04) plain32.parsetostruct_googlemessage1.upb_table_byref: 143 -> 144 (0.70) plain32.parsetostruct_googlemessage1.upb_table_byval: 122 -> 118 (-3.28) plain32.parsetostruct_googlemessage2.upb_table_byref: 189 -> 200 (5.82) plain32.parsetostruct_googlemessage2.upb_table_byval: 198 -> 200 (1.01) omitfp32.parsestream_googlemessage1.upb_table: 267 -> 265 (-0.75) omitfp32.parsestream_googlemessage2.upb_table: 377 -> 465 (23.34) omitfp32.parsetostruct_googlemessage1.upb_table_byref: 140 -> 151 (7.86) omitfp32.parsetostruct_googlemessage1.upb_table_byval: 131 -> 131 (0.00) omitfp32.parsetostruct_googlemessage2.upb_table_byref: 204 -> 214 (4.90) omitfp32.parsetostruct_googlemessage2.upb_table_byval: 200 -> 206 (3.00) plain.parsestream_googlemessage1.upb_table: 313 -> 317 (1.28) plain.parsestream_googlemessage2.upb_table: 476 -> 541 (13.66) plain.parsetostruct_googlemessage1.upb_table_byref: 189 -> 189 (0.00) plain.parsetostruct_googlemessage1.upb_table_byval: 165 -> 165 (0.00) plain.parsetostruct_googlemessage2.upb_table_byref: 263 -> 270 (2.66) plain.parsetostruct_googlemessage2.upb_table_byval: 248 -> 255 (2.82) omitfp.parsestream_googlemessage1.upb_table: 306 -> 305 (-0.33) omitfp.parsestream_googlemessage2.upb_table: 471 -> 531 (12.74) omitfp.parsetostruct_googlemessage1.upb_table_byref: 189 -> 190 (0.53) omitfp.parsetostruct_googlemessage1.upb_table_byval: 166 -> 172 (3.61) omitfp.parsetostruct_googlemessage2.upb_table_byref: 258 -> 270 (4.65) omitfp.parsetostruct_googlemessage2.upb_table_byval: 248 -> 265 (6.85)
14 years ago
upb_decoder_advance(d, p - d->ptr);
return ret;
}
// Returns true on success or false if we've hit a valid EOF.
FORCEINLINE bool upb_trydecode_varint32(upb_decoder *d, uint32_t *val) {
if (upb_decoder_bufleft(d) == 0 &&
upb_dispatcher_islegalend(&d->dispatcher) &&
!upb_trypullbuf(d)) {
return false;
}
*val = upb_decode_varint32(d);
return true;
}
FORCEINLINE uint64_t upb_decode_varint(upb_decoder *d) {
if (upb_decoder_bufleft(d) >= 10) {
// Fast case.
upb_decoderet r = upb_vdecode_fast(d->ptr);
if (r.p == NULL) upb_decoder_abort(d, "Unterminated varint");
upb_decoder_advance(d, r.p - d->ptr);
return r.val;
} else if (upb_decoder_bufleft(d) > 0) {
// Intermediate case -- worth it?
char tmpbuf[10];
memset(tmpbuf, 0x80, 10);
memcpy(tmpbuf, d->ptr, upb_decoder_bufleft(d));
upb_decoderet r = upb_vdecode_fast(tmpbuf);
if (r.p != NULL) {
upb_decoder_advance(d, r.p - tmpbuf);
return r.val;
}
}
// Slow case -- varint spans buffer seam.
return upb_decode_varint_slow(d);
}
FORCEINLINE void upb_decode_fixed(upb_decoder *d, char *buf, size_t bytes) {
if (upb_decoder_bufleft(d) >= bytes) {
// Fast case.
memcpy(buf, d->ptr, bytes);
Remove upb_dstate and specialize upb_decode_fixed for perf improvement. The compiler wasn't keeping upb_dstate in memory anyway (which was the original goal). This simplifies the decoder. upb_decode_fixed was intended to minimize the number of branches, but since it was calling out to memcpy as a function, this turned out to be a pessimization. Performance is encouraging: plain32.parsestream_googlemessage1.upb_table: 254 -> 242 (-4.72) plain32.parsestream_googlemessage2.upb_table: 357 -> 400 (12.04) plain32.parsetostruct_googlemessage1.upb_table_byref: 143 -> 144 (0.70) plain32.parsetostruct_googlemessage1.upb_table_byval: 122 -> 118 (-3.28) plain32.parsetostruct_googlemessage2.upb_table_byref: 189 -> 200 (5.82) plain32.parsetostruct_googlemessage2.upb_table_byval: 198 -> 200 (1.01) omitfp32.parsestream_googlemessage1.upb_table: 267 -> 265 (-0.75) omitfp32.parsestream_googlemessage2.upb_table: 377 -> 465 (23.34) omitfp32.parsetostruct_googlemessage1.upb_table_byref: 140 -> 151 (7.86) omitfp32.parsetostruct_googlemessage1.upb_table_byval: 131 -> 131 (0.00) omitfp32.parsetostruct_googlemessage2.upb_table_byref: 204 -> 214 (4.90) omitfp32.parsetostruct_googlemessage2.upb_table_byval: 200 -> 206 (3.00) plain.parsestream_googlemessage1.upb_table: 313 -> 317 (1.28) plain.parsestream_googlemessage2.upb_table: 476 -> 541 (13.66) plain.parsetostruct_googlemessage1.upb_table_byref: 189 -> 189 (0.00) plain.parsetostruct_googlemessage1.upb_table_byval: 165 -> 165 (0.00) plain.parsetostruct_googlemessage2.upb_table_byref: 263 -> 270 (2.66) plain.parsetostruct_googlemessage2.upb_table_byval: 248 -> 255 (2.82) omitfp.parsestream_googlemessage1.upb_table: 306 -> 305 (-0.33) omitfp.parsestream_googlemessage2.upb_table: 471 -> 531 (12.74) omitfp.parsetostruct_googlemessage1.upb_table_byref: 189 -> 190 (0.53) omitfp.parsetostruct_googlemessage1.upb_table_byval: 166 -> 172 (3.61) omitfp.parsetostruct_googlemessage2.upb_table_byref: 258 -> 270 (4.65) omitfp.parsetostruct_googlemessage2.upb_table_byval: 248 -> 265 (6.85)
14 years ago
upb_decoder_advance(d, bytes);
} else {
// Slow case.
size_t read = 0;
while (read < bytes) {
size_t avail = upb_decoder_bufleft(d);
memcpy(buf + read, d->ptr, avail);
upb_decoder_advance(d, avail);
read += avail;
upb_pullbuf(d);
}
}
}
FORCEINLINE uint32_t upb_decode_fixed32(upb_decoder *d) {
uint32_t u32;
upb_decode_fixed(d, (char*)&u32, sizeof(uint32_t));
return u32; // TODO: proper byte swapping
}
FORCEINLINE uint64_t upb_decode_fixed64(upb_decoder *d) {
uint64_t u64;
upb_decode_fixed(d, (char*)&u64, sizeof(uint64_t));
return u64; // TODO: proper byte swapping
}
INLINE upb_byteregion *upb_decode_string(upb_decoder *d) {
uint32_t strlen = upb_decode_varint32(d);
uint64_t offset = upb_decoder_offset(d);
upb_byteregion_reset(&d->str_byteregion, d->input, offset, strlen);
// Could make it an option on the callback whether we fetchall() first or not.
upb_byteregion_fetchall(&d->str_byteregion, d->status);
if (!upb_ok(d->status)) upb_decoder_exit(d);
upb_decoder_skipto(d, offset + strlen);
return &d->str_byteregion;
}
INLINE void upb_push(upb_decoder *d, upb_fhandlers *f, uint64_t end) {
upb_dispatch_startsubmsg(&d->dispatcher, f)->end_ofs = end;
upb_decoder_setmsgend(d);
}
/* Decoding of .proto types ***************************************************/
// Technically, we are losing data if we see a 32-bit varint that is not
// properly sign-extended. We could detect this and error about the data loss,
// but proto2 does not do this, so we pass.
#define T(type, wt, valtype, convfunc) \
14 years ago
INLINE void upb_decode_ ## type(upb_decoder *d, upb_fhandlers *f) { \
upb_value val; \
upb_value_set ## valtype(&val, (convfunc)(upb_decode_ ## wt(d))); \
upb_dispatch_value(&d->dispatcher, f, val); \
} \
static double upb_asdouble(uint64_t n) { double d; memcpy(&d, &n, 8); return d; }
static float upb_asfloat(uint32_t n) { float f; memcpy(&f, &n, 4); return f; }
static int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
static int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
T(INT32, varint, int32, int32_t)
T(INT64, varint, int64, int64_t)
T(UINT32, varint, uint32, uint32_t)
T(UINT64, varint, uint64, uint64_t)
T(FIXED32, fixed32, uint32, uint32_t)
T(FIXED64, fixed64, uint64, uint64_t)
T(SFIXED32, fixed32, int32, int32_t)
T(SFIXED64, fixed64, int64, int64_t)
T(BOOL, varint, bool, bool)
T(ENUM, varint, int32, int32_t)
T(DOUBLE, fixed64, double, upb_asdouble)
T(FLOAT, fixed32, float, upb_asfloat)
T(SINT32, varint, int32, upb_zzdec_32)
T(SINT64, varint, int64, upb_zzdec_64)
T(STRING, string, byteregion, upb_byteregion*)
14 years ago
static void upb_decode_GROUP(upb_decoder *d, upb_fhandlers *f) {
upb_push(d, f, UPB_NONDELIMITED);
}
14 years ago
static void upb_endgroup(upb_decoder *d, upb_fhandlers *f) {
(void)f;
upb_dispatch_endsubmsg(&d->dispatcher);
upb_decoder_setmsgend(d);
upb_stream: all callbacks registered ahead-of-time. This is a significant change to the upb_stream protocol, and should hopefully be the last significant change. All callbacks are now registered ahead-of-time instead of having delegated callbacks registered at runtime, which makes it much easier to aggressively optimize ahead-of-time (like with a JIT). Other impacts of this change: - You no longer need to have loaded descriptor.proto as a upb_def to load other descriptors! This means the special-case code we used for bootstrapping is no longer necessary, and we no longer need to link the descriptor for descriptor.proto into upb. - A client can now register any upb_value as what will be delivered to their value callback, not just a upb_fielddef*. This should allow for other clients to get more bang out of the streaming decoder. This change unfortunately causes a bit of a performance regression -- I think largely due to highly suboptimal code that GCC generates when structs are returned by value. See: http://blog.reverberate.org/2011/03/19/when-a-compilers-slow-code-actually-bites-you/ On the other hand, once we have a JIT this should no longer matter. Performance numbers: plain.parsestream_googlemessage1.upb_table: 374 -> 396 (5.88) plain.parsestream_googlemessage2.upb_table: 616 -> 449 (-27.11) plain.parsetostruct_googlemessage1.upb_table_byref: 268 -> 269 (0.37) plain.parsetostruct_googlemessage1.upb_table_byval: 215 -> 204 (-5.12) plain.parsetostruct_googlemessage2.upb_table_byref: 307 -> 281 (-8.47) plain.parsetostruct_googlemessage2.upb_table_byval: 297 -> 272 (-8.42) omitfp.parsestream_googlemessage1.upb_table: 423 -> 410 (-3.07) omitfp.parsestream_googlemessage2.upb_table: 679 -> 483 (-28.87) omitfp.parsetostruct_googlemessage1.upb_table_byref: 287 -> 282 (-1.74) omitfp.parsetostruct_googlemessage1.upb_table_byval: 226 -> 219 (-3.10) omitfp.parsetostruct_googlemessage2.upb_table_byref: 315 -> 298 (-5.40) omitfp.parsetostruct_googlemessage2.upb_table_byval: 297 -> 287 (-3.37)
14 years ago
}
14 years ago
static void upb_decode_MESSAGE(upb_decoder *d, upb_fhandlers *f) {
upb_push(d, f, upb_decode_varint32(d) + upb_decoder_offset(d));
}
/* The main decoding loop *****************************************************/
static void upb_decoder_checkdelim(upb_decoder *d) {
while (d->delim_end != NULL && d->ptr >= d->delim_end) {
13 years ago
if (d->ptr > d->delim_end) upb_decoder_abort(d, "Bad submessage end");
if (d->dispatcher.top->is_sequence) {
upb_dispatch_endseq(&d->dispatcher);
} else {
upb_dispatch_endsubmsg(&d->dispatcher);
}
upb_decoder_setmsgend(d);
}
}
static void upb_decoder_enterjit(upb_decoder *d) {
(void)d;
#ifdef UPB_USE_JIT_X64
if (d->jit_code && d->dispatcher.top == d->dispatcher.stack && d->ptr < d->jit_end) {
// Decodes as many fields as possible, updating d->ptr appropriately,
// before falling through to the slow(er) path.
void (*upb_jit_decode)(upb_decoder *d) = (void*)d->jit_code;
upb_jit_decode(d);
}
#endif
}
14 years ago
INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
while (1) {
uint32_t tag;
if (!upb_trydecode_varint32(d, &tag)) return NULL;
uint8_t wire_type = tag & 0x7;
14 years ago
upb_fhandlers *f = upb_dispatcher_lookup(&d->dispatcher, tag);
// There are no explicit "startseq" or "endseq" markers in protobuf
// streams, so we have to infer them by noticing when a repeated field
// starts or ends.
if (d->dispatcher.top->is_sequence && d->dispatcher.top->f != f) {
upb_dispatch_endseq(&d->dispatcher);
upb_decoder_setmsgend(d);
}
if (f && f->repeated && d->dispatcher.top->f != f) {
uint64_t old_end = d->dispatcher.top->end_ofs;
upb_dispatcher_frame *fr = upb_dispatch_startseq(&d->dispatcher, f);
if (wire_type != UPB_WIRE_TYPE_DELIMITED ||
upb_issubmsgtype(f->type) || upb_isstringtype(f->type)) {
// Non-packed field -- this tag pertains to only a single message.
fr->end_ofs = old_end;
} else {
// Packed primitive field.
fr->end_ofs = upb_decoder_offset(d) + upb_decode_varint(d);
fr->is_packed = true;
}
upb_decoder_setmsgend(d);
}
if (f) return f;
// Unknown field.
switch (wire_type) {
case UPB_WIRE_TYPE_VARINT: upb_decode_varint(d); break;
case UPB_WIRE_TYPE_32BIT: upb_decoder_advance(d, 4); break;
case UPB_WIRE_TYPE_64BIT: upb_decoder_advance(d, 8); break;
case UPB_WIRE_TYPE_DELIMITED:
upb_decoder_advance(d, upb_decode_varint32(d)); break;
default:
upb_decoder_abort(d, "Invalid wire type");
}
// TODO: deliver to unknown field callback.
upb_decoder_checkpoint(d);
upb_decoder_checkdelim(d);
}
}
void upb_decoder_decode(upb_decoder *d, upb_status *status) {
if (sigsetjmp(d->exitjmp, 0)) { assert(!upb_ok(status)); return; }
d->status = status;
upb_dispatch_startmsg(&d->dispatcher);
// Prime the buf so we can hit the JIT immediately.
upb_trypullbuf(d);
upb_fhandlers *f = d->dispatcher.top->f;
while(1) { // Main loop: executed once per tag/field pair.
upb_decoder_checkdelim(d);
upb_decoder_enterjit(d);
if (!d->top_is_packed) f = upb_decode_tag(d);
if (!f) {
// Sucessful EOF. We may need to dispatch a top-level implicit frame.
if (d->dispatcher.top == d->dispatcher.stack + 1) {
assert(d->dispatcher.top->is_sequence);
upb_dispatch_endseq(&d->dispatcher);
}
return;
}
f->decode(d, f);
upb_decoder_checkpoint(d);
}
}
static void upb_decoder_skip(void *_d, upb_dispatcher_frame *f) {
upb_decoder *d = _d;
if (f->end_ofs != UPB_NONDELIMITED) {
upb_decoder_skipto(d, d->dispatcher.top->end_ofs);
} else {
// TODO: how to support skipping groups? Dispatcher could drop callbacks,
// or it could be special-cased inside the decoder.
}
}
void upb_decoder_init(upb_decoder *d, upb_handlers *handlers) {
upb_dispatcher_init(
&d->dispatcher, handlers, upb_decoder_skip, upb_decoder_exit2, d);
#ifdef UPB_USE_JIT_X64
d->jit_code = NULL;
if (d->dispatcher.handlers->should_jit) upb_decoder_makejit(d);
#endif
// Set function pointers for each field's decode function.
for (int i = 0; i < handlers->msgs_len; i++) {
upb_mhandlers *m = handlers->msgs[i];
for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i);
i = upb_inttable_next(&m->fieldtab, i)) {
14 years ago
upb_fhandlers *f = upb_inttable_iter_value(i);
13 years ago
#define F(type) &upb_decode_ ## type
static void *fptrs[] = {&upb_endgroup, F(DOUBLE), F(FLOAT), F(INT64),
F(UINT64), F(INT32), F(FIXED64), F(FIXED32), F(BOOL), F(STRING),
F(GROUP), F(MESSAGE), F(STRING), F(UINT32), F(ENUM), F(SFIXED32),
F(SFIXED64), F(SINT32), F(SINT64)};
f->decode = fptrs[f->type];
}
}
}
void upb_decoder_reset(upb_decoder *d, upb_byteregion *input, void *closure) {
upb_dispatcher_frame *f = upb_dispatcher_reset(&d->dispatcher, closure);
f->end_ofs = UPB_NONDELIMITED;
d->input = input;
d->bufstart_ofs = upb_byteregion_startofs(input);
d->buf = NULL;
upb_stream: all callbacks registered ahead-of-time. This is a significant change to the upb_stream protocol, and should hopefully be the last significant change. All callbacks are now registered ahead-of-time instead of having delegated callbacks registered at runtime, which makes it much easier to aggressively optimize ahead-of-time (like with a JIT). Other impacts of this change: - You no longer need to have loaded descriptor.proto as a upb_def to load other descriptors! This means the special-case code we used for bootstrapping is no longer necessary, and we no longer need to link the descriptor for descriptor.proto into upb. - A client can now register any upb_value as what will be delivered to their value callback, not just a upb_fielddef*. This should allow for other clients to get more bang out of the streaming decoder. This change unfortunately causes a bit of a performance regression -- I think largely due to highly suboptimal code that GCC generates when structs are returned by value. See: http://blog.reverberate.org/2011/03/19/when-a-compilers-slow-code-actually-bites-you/ On the other hand, once we have a JIT this should no longer matter. Performance numbers: plain.parsestream_googlemessage1.upb_table: 374 -> 396 (5.88) plain.parsestream_googlemessage2.upb_table: 616 -> 449 (-27.11) plain.parsetostruct_googlemessage1.upb_table_byref: 268 -> 269 (0.37) plain.parsetostruct_googlemessage1.upb_table_byval: 215 -> 204 (-5.12) plain.parsetostruct_googlemessage2.upb_table_byref: 307 -> 281 (-8.47) plain.parsetostruct_googlemessage2.upb_table_byval: 297 -> 272 (-8.42) omitfp.parsestream_googlemessage1.upb_table: 423 -> 410 (-3.07) omitfp.parsestream_googlemessage2.upb_table: 679 -> 483 (-28.87) omitfp.parsetostruct_googlemessage1.upb_table_byref: 287 -> 282 (-1.74) omitfp.parsetostruct_googlemessage1.upb_table_byval: 226 -> 219 (-3.10) omitfp.parsetostruct_googlemessage2.upb_table_byref: 315 -> 298 (-5.40) omitfp.parsetostruct_googlemessage2.upb_table_byval: 297 -> 287 (-3.37)
14 years ago
d->ptr = NULL;
d->end = NULL; // Force a buffer pull.
d->delim_end = NULL; // But don't let end-of-message get triggered.
d->str_byteregion.bytesrc = input->bytesrc;
#ifdef UPB_USE_JIT_X64
d->jit_end = NULL;
#endif
}
void upb_decoder_uninit(upb_decoder *d) {
#ifdef UPB_USE_JIT_X64
if (d->dispatcher.handlers->should_jit) upb_decoder_freejit(d);
#endif
upb_dispatcher_uninit(&d->dispatcher);
}