Flesh out implementation of upb_sizebuilder.

pull/13171/head
Joshua Haberman 15 years ago
parent 611afe9c69
commit 036fe6bb06
  1. 8
      src/upb.h
  2. 20
      src/upb_decoder.c
  3. 161
      src/upb_encoder.c
  4. 37
      src/upb_encoder.h
  5. 22
      src/upb_sink.h

@ -272,14 +272,14 @@ enum upb_status_code {
// The input byte stream ended in the middle of a record.
UPB_STATUS_NEED_MORE_DATA = 1,
// The user value callback opted to stop parsing.
UPB_STATUS_USER_CANCELLED = 2,
// An unrecoverable error occurred.
UPB_STATUS_ERROR = -1,
// A varint went for 10 bytes without terminating.
UPB_ERROR_UNTERMINATED_VARINT = -2
UPB_ERROR_UNTERMINATED_VARINT = -2,
// The max nesting level (UPB_MAX_NESTING) was exceeded.
UPB_ERROR_MAX_NESTING_EXCEEDED = -3
};
#define UPB_ERRORMSG_MAXLEN 256

@ -207,10 +207,8 @@ INLINE const uint8_t *decode_tag(const uint8_t *buf, const uint8_t *end,
}
/**
* Parses a 64-bit varint that is known to be >= 2 bytes (the inline version
* handles 1 and 2 byte varints).
*/
// Parses a 64-bit varint that is known to be >= 2 bytes (the inline version
// handles 1 and 2 byte varints).
const uint8_t *upb_get_v_uint64_t_full(const uint8_t *buf, const uint8_t *end,
uint64_t *val, upb_status *status)
{
@ -367,10 +365,8 @@ INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) {
}
/**
* Pushes a new stack frame for a submessage with the given len (which will
* be zero if the submessage is a group).
*/
// Pushes a new stack frame for a submessage with the given len (which will
// be zero if the submessage is a group).
static const uint8_t *push(upb_decoder *d, const uint8_t *start,
uint32_t submsg_len, upb_fielddef *f,
upb_status *status)
@ -378,7 +374,7 @@ static const uint8_t *push(upb_decoder *d, const uint8_t *start,
d->top->field = f;
d->top++;
if(d->top >= d->limit) {
upb_seterr(status, UPB_STATUS_ERROR,
upb_seterr(status, UPB_ERROR_MAX_NESTING_EXCEEDED,
"Nesting exceeded maximum (%d levels)\n",
UPB_MAX_NESTING);
return NULL;
@ -391,10 +387,8 @@ static const uint8_t *push(upb_decoder *d, const uint8_t *start,
return get_msgend(d, start);
}
/**
* Pops a stack frame, returning a pointer for where the next submsg should
* end (or a pointer that is out of range for a group).
*/
// Pops a stack frame, returning a pointer for where the next submsg should
// end (or a pointer that is out of range for a group).
static const void *pop(upb_decoder *d, const uint8_t *start)
{
d->top--;

@ -5,9 +5,11 @@
*/
#include "upb_encoder.h"
#include <stdlib.h>
#include "descriptor.h"
/* Functions for calculating sizes. *******************************************/
/* Functions for calculating sizes of wire values. ****************************/
static size_t upb_v_uint64_t_size(uint64_t val) {
#ifdef __GNUC__
@ -103,9 +105,9 @@ static uint8_t *upb_put_f_uint64_t(uint8_t *buf, uint64_t val)
return uint64_end;
}
/* Functions to write .proto values. ******************************************/
/* Functions to write and calculate sizes for .proto values. ******************/
/* Performs zig-zag encoding, which is used by sint32 and sint64. */
// Performs zig-zag encoding, which is used by sint32 and sint64.
static uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); }
static uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); }
@ -167,7 +169,7 @@ T(FLOAT, f, uint32_t, float, _float) {
#undef PUT
#undef T
uint8_t *upb_encode_value(uint8_t *buf, upb_field_type_t ft, upb_value v)
static uint8_t *upb_encode_value(uint8_t *buf, upb_field_type_t ft, upb_value v)
{
#define CASE(t, member_name) \
case UPB_TYPE(t): return upb_put_ ## t(buf, v.member_name);
@ -191,11 +193,127 @@ uint8_t *upb_encode_value(uint8_t *buf, upb_field_type_t ft, upb_value v)
#undef CASE
}
uint8_t *_upb_put_tag(uint8_t *buf, upb_field_number_t fn, upb_wire_type_t wt)
static uint32_t _upb_get_value_size(upb_field_type_t ft, upb_value v)
{
#define CASE(t, member_name) \
case UPB_TYPE(t): return upb_get_ ## t ## _size(v.member_name);
switch(ft) {
CASE(DOUBLE, _double)
CASE(FLOAT, _float)
CASE(INT32, int32)
CASE(INT64, int64)
CASE(UINT32, uint32)
CASE(UINT64, uint64)
CASE(SINT32, int32)
CASE(SINT64, int64)
CASE(FIXED32, uint32)
CASE(FIXED64, uint64)
CASE(SFIXED32, int32)
CASE(SFIXED64, int64)
CASE(BOOL, _bool)
CASE(ENUM, int32)
default: assert(false); return 0;
}
#undef CASE
}
static uint8_t *_upb_put_tag(uint8_t *buf, upb_field_number_t num,
upb_wire_type_t wt)
{
return upb_put_UINT32(buf, wt | (num << 3));
}
static uint32_t _upb_get_tag_size(upb_field_number_t num)
{
return upb_get_UINT32_size(num << 3);
}
/* upb_sizebuilder ************************************************************/
struct upb_sizebuilder {
// Accumulating size for the current level.
uint32_t size;
// Stack of sizes for our current nesting.
uint32_t stack[UPB_MAX_NESTING], *top, *limit;
// Vector of sizes.
uint32_t *sizes;
int sizes_len;
int sizes_size;
upb_status status;
};
// upb_sink callbacks.
static upb_sink_status _upb_sizebuilder_valuecb(upb_sink *sink, upb_fielddef *f,
upb_value val)
{
upb_sizebuilder *sb = (upb_sizebuilder*)sink;
uint32_t size = 0;
size += _upb_get_tag_size(f->number);
size += _upb_get_value_size(f->type, val);
sb->size += size;
return UPB_SINK_CONTINUE;
}
static upb_sink_status _upb_sizebuilder_strcb(upb_sink *sink, upb_fielddef *f,
upb_strptr str,
int32_t start, uint32_t end)
{
(void)str; // String data itself is not used.
upb_sizebuilder *sb = (upb_sizebuilder*)sink;
if(start >= 0) {
uint32_t size = 0;
size += _upb_get_tag_size(f->number);
size += upb_get_UINT32_size(end - start);
sb->size += size;
}
return UPB_SINK_CONTINUE;
}
static upb_sink_status _upb_sizebuilder_startcb(upb_sink *sink, upb_fielddef *f)
{
(void)f; // Unused (we calculate tag size and delimiter in endcb).
upb_sizebuilder *sb = (upb_sizebuilder*)sink;
*sb->top = sb->size;
sb->top++;
sb->size = 0;
if(sb->top == sb->limit) {
upb_seterr(&sb->status, UPB_ERROR_MAX_NESTING_EXCEEDED,
"Nesting exceeded maximum (%d levels)\n",
UPB_MAX_NESTING);
return UPB_SINK_STOP;
}
return UPB_SINK_CONTINUE;
}
static upb_sink_status _upb_sizebuilder_endcb(upb_sink *sink, upb_fielddef *f)
{
return upb_put_UINT32(buf, wt | (fn << 3));
upb_sizebuilder *sb = (upb_sizebuilder*)sink;
if(sb->sizes_len == sb->sizes_size) {
sb->sizes_size *= 2;
sb->sizes = realloc(sb->sizes, sb->sizes_size * sizeof(*sb->sizes));
}
sb->sizes[sb->sizes_len++] = sb->size;
sb->top--;
// The size according to the parent includes the tag size and delimiter of
// the submessage.
sb->size += upb_get_UINT32_size(sb->size);
sb->size += _upb_get_tag_size(f->number);
// Include size accumulated in parent before child began.
sb->size += *sb->top;
return UPB_SINK_CONTINUE;
}
upb_sink_callbacks _upb_sizebuilder_sink_vtbl = {
_upb_sizebuilder_valuecb,
_upb_sizebuilder_strcb,
_upb_sizebuilder_startcb,
_upb_sizebuilder_endcb
};
/* upb_sink callbacks *********************************************************/
@ -283,34 +401,3 @@ upb_sink_callbacks _upb_encoder_sink_vtbl = {
_upb_encoder_endcb
};
/* Public Interface ***********************************************************/
size_t upb_get_encoded_size(upb_value v, upb_fielddef *f)
{
#define CASE(t, member_name) \
case UPB_TYPE(t): return upb_get_ ## t ## _size(v.member_name);
switch(f->type) {
CASE(DOUBLE, _double)
CASE(FLOAT, _float)
CASE(INT32, int32)
CASE(INT64, int64)
CASE(UINT32, uint32)
CASE(UINT64, uint64)
CASE(SINT32, int32)
CASE(SINT64, int64)
CASE(FIXED32, uint32)
CASE(FIXED64, uint64)
CASE(SFIXED32, int32)
CASE(SFIXED64, int64)
CASE(BOOL, _bool)
CASE(ENUM, int32)
default: assert(false); return 0;
}
#undef CASE
}
size_t upb_get_encoded_tag_size(uint32_t fieldnum) {
return upb_v_uint64_t_size((uint64_t)fieldnum << 3);
}

@ -20,13 +20,48 @@
extern "C" {
#endif
/* upb_sizebuilder ************************************************************/
// A upb_sizebuilder performs a pre-pass on data to be serialized that gathers
// the sizes of submessages. This size data is required for serialization,
// because we have to know at the beginning of a submessage how many encoded
// bytes the submessage will represent.
struct upb_sizebuilder;
typedef struct upb_sizebuilder upb_sizebuilder;
upb_sizebuilder *upb_sizebuilder_new();
void upb_sizebuilder_free(upb_sizebuilder *sb);
// Returns a sink that must be used to perform the pre-pass. Note that the
// pre-pass *must* occur in the opposite order from the actual encode that
// follows, and the data *must* be identical both times (except for the
// reversed order.
upb_sink *upb_sizebuilder_sink(upb_sizebuilder *sb);
/* upb_encoder ****************************************************************/
// A upb_encoder is a upb_sink that emits data to a upb_bytesink in the protocol
// buffer binary wire format.
struct upb_encoder;
typedef struct upb_encoder upb_encoder;
upb_encoder *upb_encoder_new();
void upb_encoder_free(upb_encoder *s);
void upb_encoder_reset(upb_encoder *s, uint32_t *sizes);
// Resets the given upb_encoder such that is is ready to begin encoding. The
// upb_sizebuilder "sb" is used to determine submessage sizes; it must have
// previously been initialized by feeding it the same data in reverse order.
// "sb" may be null if and only if the data contains no submessages; groups
// are ok and do not require sizes to be precalculated. The upb_bytesink
// "out" is where the encoded output data will be sent.
//
// Both "sb" and "out" must live until the encoder is either reset or freed.
void upb_encoder_reset(upb_encoder *s, upb_sizebuilder *sb, upb_bytesink *out);
// The upb_sink to which data can be sent to be encoded. Note that this data
// must be identical to the data that was previously given to the sizebuilder
// (if any).
upb_sink *upb_encoder_sink(upb_encoder *s);
#ifdef __cplusplus

@ -98,9 +98,15 @@ typedef struct upb_sink_callbacks {
upb_end_cb end_cb;
} upb_sink_callbacks;
// We could potentially define these later to also be capable of calling a C++
// virtual method instead of doing the virtual dispatch manually. This would
// make it possible to write C++ sinks in a more natural style without loss of
// These macros implement a mini virtual function dispatch for upb_sink instances.
// This allows functions that call upb_sinks to just write:
//
// upb_sink_onvalue(sink, field, val);
//
// The macro will handle the virtual function lookup and dispatch. We could
// potentially define these later to also be capable of calling a C++ virtual
// method instead of doing the virtual dispatch manually. This would make it
// possible to write C++ sinks in a more natural style without loss of
// efficiency. We could have a flag in upb_sink defining whether it is a C
// sink or a C++ one.
#define upb_sink_onvalue(s, f, val) s->vtbl->value_cb(s, f, val)
@ -123,10 +129,18 @@ INLINE void upb_sink_init(upb_sink *s, upb_sink_callbacks *vtbl) {
//
// The two simplest kinds of sinks are "write to string" and "write to FILE*".
// A forward declaration solely for the benefit of declaring upb_byte_cb below.
// Always prefer upb_bytesink (without the "struct" keyword) instead.
struct _upb_bytesink;
// The single bytesink callback; it takes the bytes to be written and returns
// how many were successfully written. If zero is returned, it indicates that
// no more bytes can be accepted right now.
//typedef size_t (*upb_byte_cb)(upb_bytesink *s, upb_strptr str);
typedef size_t (*upb_byte_cb)(struct _upb_bytesink *s, upb_strptr str);
typedef struct _upb_bytesink {
upb_byte_cb *cb;
} upb_bytesink;
#ifdef __cplusplus
} /* extern "C" */

Loading…
Cancel
Save