More decoder work, first attempts at compiling it.

pull/13171/head
Joshua Haberman 15 years ago
parent ed991c3b30
commit 992a03be55
  1. 28
      src/upb.c
  2. 26
      src/upb.h
  3. 90
      src/upb_decoder.c
  4. 18
      src/upb_def.h
  5. 16
      src/upb_srcsink.h
  6. 4
      src/upb_string.h
  7. 2
      src/upb_table.c
  8. 5
      src/upb_table.h

@ -10,34 +10,6 @@
#include "upb.h"
#define alignof(t) offsetof(struct { char c; t x; }, x)
#define TYPE_INFO(proto_type, wire_type, ctype) \
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## proto_type] = \
{alignof(ctype), sizeof(ctype), wire_type, #ctype},
// With packed fields, any type expecting 32-bit, 64-bit or varint can instead
// receive delimited.
upb_type_info upb_types[] = {
TYPE_INFO(DOUBLE, (1<<UPB_WIRE_TYPE_64BIT)|(1<<UPB_WIRE_TYPE_DELIMITED), double)
TYPE_INFO(FLOAT, (1<<UPB_WIRE_TYPE_32BIT|(1<<UPB_WIRE_TYPE_DELIMITED), float)
TYPE_INFO(INT64, (1<<UPB_WIRE_TYPE_VARINT|(1<<UPB_WIRE_TYPE_DELIMITED), int64_t)
TYPE_INFO(UINT64, (1<<UPB_WIRE_TYPE_VARINT|(1<<UPB_WIRE_TYPE_DELIMITED), uint64_t)
TYPE_INFO(INT32, (1<<UPB_WIRE_TYPE_VARINT|(1<<UPB_WIRE_TYPE_DELIMITED), int32_t)
TYPE_INFO(FIXED64, (1<<UPB_WIRE_TYPE_64BIT|(1<<UPB_WIRE_TYPE_DELIMITED), uint64_t)
TYPE_INFO(FIXED32, (1<<UPB_WIRE_TYPE_32BIT|(1<<UPB_WIRE_TYPE_DELIMITED), uint32_t)
TYPE_INFO(BOOL, (1<<UPB_WIRE_TYPE_VARINT|(1<<UPB_WIRE_TYPE_DELIMITED), bool)
TYPE_INFO(MESSAGE, (1<<UPB_WIRE_TYPE_DELIMITED, void*)
TYPE_INFO(GROUP, (1<<UPB_WIRE_TYPE_START_GROUP, void*)
TYPE_INFO(UINT32, (1<<UPB_WIRE_TYPE_VARINT)|(1<<UPB_WIRE_TYPE_DELIMITED), uint32_t)
TYPE_INFO(ENUM, (1<<UPB_WIRE_TYPE_VARINT)|(1<<UPB_WIRE_TYPE_DELIMITED), uint32_t)
TYPE_INFO(SFIXED32, (1<<UPB_WIRE_TYPE_32BIT)|(1<<UPB_WIRE_TYPE_DELIMITED), int32_t)
TYPE_INFO(SFIXED64, (1<<UPB_WIRE_TYPE_64BIT)|(1<<UPB_WIRE_TYPE_DELIMITED), int64_t)
TYPE_INFO(SINT32, (1<<UPB_WIRE_TYPE_VARINT)|(1<<UPB_WIRE_TYPE_DELIMITED), int32_t)
TYPE_INFO(SINT64, (1<<UPB_WIRE_TYPE_VARINT)|(1<<UPB_WIRE_TYPE_DELIMITED), int64_t)
TYPE_INFO(STRING, (1<<UPB_WIRE_TYPE_DELIMITED), upb_strptr)
TYPE_INFO(BYTES, (1<<UPB_WIRE_TYPE_DELIMITED), upb_strptr)
};
void upb_seterr(upb_status *status, enum upb_status_code code,
const char *msg, ...)
{

@ -94,24 +94,6 @@ INLINE bool upb_isstringtype(upb_field_type_t type) {
return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES);
}
// Info for a given field type.
typedef struct {
uint8_t align;
uint8_t size;
// A bit-field indicating whether each wire type is allowed.
uint8_t allowed_wire_types;
char *ctype;
} upb_type_info;
// A static array of info about all of the field types, indexed by type number.
extern upb_type_info upb_types[];
// Returns true if wt is the correct on-the-wire type for ft.
INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) {
// This doesn't currently support packed arrays.
return upb_types[ft] & (1 << wt);
}
// The number of a field, eg. "optional string foo = 3".
typedef int32_t upb_field_number_t;
@ -127,14 +109,6 @@ typedef union {
uint32_t _32bit;
} upb_wire_value;
// A key occurs before each value on-the-wire.
typedef uint32_t upb_key;
INLINE upb_key upb_make_key(upb_fieldnum_t fieldnum, upb_wiretype_t wiretype) {
return (fieldnum << 3) | wiretype;
}
INLINE upb_fieldnum_t upb_key_fieldnum(upb_key key) { return key >> 3; }
INLINE upb_wiretype_t upb_key_wiretype(upb_key key) { return key & 0x07; }
/* Polymorphic values of .proto types *****************************************/
// INTERNAL-ONLY: never refer to these types with a tag ("union", "struct").

@ -9,11 +9,10 @@
#include <inttypes.h>
#include <stddef.h>
#include <stdlib.h>
#include "upb_def.h"
/* Functions to read wire values. *********************************************/
const int8_t upb_get_v_uint64_full(const uint8_t *buf, uint64_t *val);
int8_t upb_get_v_uint64_full(const uint8_t *buf, uint64_t *val);
// Gets a varint (wire type: UPB_WIRE_TYPE_VARINT). Caller promises that >=10
// bytes are available at buf. Returns the number of bytes consumed, or 11 if
@ -22,13 +21,9 @@ INLINE uint8_t upb_get_v_uint64(const uint8_t *buf, uint64_t *val)
{
// We inline this common case (1-byte varints), if that fails we dispatch to
// the full (non-inlined) version.
int8_t ret = 1;
*val = *buf & 0x7f;
if(*buf & 0x80) {
// Varint is >1 byte.
ret += upb_get_v_uint64_full(buf + 1, val);
}
return ret;
if((*buf & 0x80) == 0) return 1;
return upb_get_v_uint64_full(buf + 1, val);
}
// Gets a varint -- called when we only need 32 bits of it. Note that a 32-bit
@ -36,7 +31,7 @@ INLINE uint8_t upb_get_v_uint64(const uint8_t *buf, uint64_t *val)
INLINE uint8_t upb_get_v_uint32(const uint8_t *buf, uint32_t *val)
{
uint64_t val64;
int8_t ret = upb_get_v_uint64(buf, end, &val64, status);
int8_t ret = upb_get_v_uint64(buf, &val64);
*val = (uint32_t)val64; // Discard the high bits.
return ret;
}
@ -56,7 +51,7 @@ INLINE void upb_get_f_uint32(const uint8_t *buf, uint32_t *val)
// Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). Caller
// promises that 8 bytes are available at buf.
INLINE void upb_get_f_uint64(const uint8_t *buf uint64_t *val)
INLINE void upb_get_f_uint64(const uint8_t *buf, uint64_t *val)
{
#if UPB_UNALIGNED_READS_OK
*val = *(uint64_t*)buf;
@ -71,32 +66,27 @@ INLINE void upb_get_f_uint64(const uint8_t *buf uint64_t *val)
// Skips a varint (wire type: UPB_WIRE_TYPE_VARINT). Caller promises that 10
// bytes are available at "buf". Returns the number of bytes that were
// skipped.
INLINE const uint8_t *upb_skip_v_uint64(const uint8_t *buf)
INLINE const uint8_t upb_skip_v_uint64(const uint8_t *buf)
{
const uint8_t *const maxend = buf + 10;
uint8_t last = 0x80;
for(; buf < (uint8_t*)end && (last & 0x80); buf++)
for(; buf < maxend && (last & 0x80); buf++)
last = *buf;
if(buf > maxend) return -1;
return buf;
return
}
// Parses a 64-bit varint that is known to be >= 2 bytes (the inline version
// handles 1 and 2 byte varints).
const uint8_t upb_get_v_uint64_full(const uint8_t *buf uint64_t *val)
// Parses remining bytes of a 64-bit varint that has already had its first byte
// parsed.
const uint8_t upb_get_v_uint64_full(const uint8_t *buf, uint64_t *val)
{
const uint8_t *const maxend = buf + 9;
uint8_t last = 0x80;
int bitpos;
uint8_t bytes = 0;
for(bitpos = 0; buf < (uint8_t*)maxend && (last & 0x80); buf++, bitpos += 7)
*val |= ((uint64_t)((last = *buf) & 0x7F)) << bitpos;
// bitpos starts at 7 because our caller already read one byte.
for(int bitpos = 7; bytes < 10 && (*buf & 0x80); buf++, bitpos += 7)
*val |= (uint64_t)(*buf & 0x7F) << bitpos;
if(buf >= maxend) {
return -11;
}
return buf;
return bytes;
}
// Performs zig-zag decoding, which is used by sint32 and sint64.
@ -136,6 +126,12 @@ struct upb_decoder {
// The overall stream offset of the end of "buf". If "buf" is NULL, it is as
// if "buf" was the empty string.
uint32_t buf_endoffset;
// Fielddef for the key we just read.
upb_fielddef *field;
// Wire type of the key we just read.
upb_wire_type_t wire_type;
};
@ -187,7 +183,7 @@ static void upb_decoder_advancebuf(upb_decoder *d)
}
}
static void upb_decoder_pullnextbuf(upb_decoder *d)
static bool upb_decoder_pullnextbuf(upb_decoder *d)
{
if(!d->nextbuf) {
d->nextbuf = upb_bytesrc_get(d->bytesrc);
@ -200,21 +196,28 @@ static void upb_decoder_pullnextbuf(upb_decoder *d)
return true;
}
static void upb_decoder_skipbytes(upb_decoder *d, int32_t bytes)
static bool upb_decoder_skipbytes(upb_decoder *d, int32_t bytes)
{
d->buf_bytesleft -= bytes;
while(d->buf_bytesleft <= 0 && !upb_bytesrc_eof(d->bytesrc)) {
upb_decoder_pullnextbuf(d);
if(!upb_decoder_pullnextbuf(d)) return false;
upb_decoder_advancebuf(d);
}
return true;
}
static void upb_decoder_skipgroup(upb_decoder *d)
static bool upb_decoder_skipgroup(upb_decoder *d)
{
// This will be mututally recursive if the group has sub-groups. If we
// wanted to handle EAGAIN in the future, this approach would not work;
// we would need to track the group depth explicitly.
while(upb_decoder_getdef(d)) upb_decoder_skipval(d);
// This will be mututally recursive with upb_decoder_skipval() if the group
// has sub-groups. If we wanted to handle EAGAIN in the future, this
// approach would not work; we would need to track the group depth
// explicitly.
while(upb_decoder_getdef(d)) {
if(!upb_decoder_skipval(d)) return false;
}
// If we are at the end of the group like we want to be, then
// upb_decoder_getdef() returned NULL because of eof, not error.
return upb_ok(&d->status);
}
static const uint8_t *upb_decoder_getbuf_full(upb_decoder *d, int32_t *bytes)
@ -266,14 +269,16 @@ upb_fielddef *upb_decoder_getdef(upb_decoder *d)
again:
uint32_t key;
upb_wire_type_t wire_type;
if(!upb_decoder_get_v_uint32(d, &key)) {
return NULL;
wire_type = key & 0x7;
if(d->key.wire_type == UPB_WIRE_TYPE_DELIMITED) {
if(wire_type == UPB_WIRE_TYPE_DELIMITED) {
// For delimited wire values we parse the length now, since we need it in
// all cases.
if(!upb_decoder_get_v_uint32(d, &d->delim_len)) return NULL;
} else if(upb_wiretype_from_key(key) == UPB_WIRE_TYPE_END_GROUP) {
} else if(wire_type == UPB_WIRE_TYPE_END_GROUP) {
if(isgroup(d->top->submsg_end)) {
d->eof = true;
} else {
@ -285,14 +290,14 @@ again:
}
// Look up field by tag number.
upb_fielddef *f = upb_msg_itof(d->top->msgdef, upb_fieldnum_from_key(key));
upb_fielddef *f = upb_msg_itof(d->top->msgdef, key >> 3);
if (!f) {
// Unknown field. If/when the upb_src interface supports reporting
// unknown fields we will implement that here.
upb_decoder_skipval(d);
goto again;
} else if (!upb_check_type(upb_wiretype_from_key(key), f->type)) {
} else if (!upb_check_type(wire_type, f->type)) {
// This is a recoverable error condition. We skip the value but also
// return NULL and report the error.
upb_decoder_skipval(d);
@ -301,6 +306,7 @@ again:
return NULL;
}
d->field = f;
d->wire_type = wire_type;
return f;
}
@ -379,14 +385,14 @@ bool upb_decoder_skipval(upb_decoder *d) {
case UPB_WIRE_TYPE_VARINT:
return upb_skip_v_uint64(buf, end, status);
case UPB_WIRE_TYPE_64BIT:
return upb_skip_bytes(8);
return upb_decoder_skipbytes(8);
case UPB_WIRE_TYPE_32BIT:
return upb_skip_bytes(4);
return upb_decoder_skipbytes(4);
case UPB_WIRE_TYPE_START_GROUP:
return upb_skip_groups(1);
return upb_decoder_skipgroup();
case UPB_WIRE_TYPE_DELIMITED:
// Works for both string/bytes *and* submessages.
return upb_skip_bytes(d->delimited_len);
return upb_decoder_skipbytes(d->delimited_len);
default:
// Including UPB_WIRE_TYPE_END_GROUP.
assert(false);

@ -53,7 +53,7 @@ enum upb_def_type {
typedef int8_t upb_def_type_t;
typedef struct {
upb_strptr fqname; // Fully qualified.
upb_string *fqname; // Fully qualified.
upb_atomic_refcount_t refcount;
upb_def_type_t type;
@ -90,7 +90,7 @@ typedef struct _upb_fielddef {
upb_field_type_t type;
upb_label_t label;
upb_field_number_t number;
upb_strptr name;
upb_string *name;
upb_value default_value;
// These are set only when this fielddef is part of a msgdef.
@ -163,7 +163,7 @@ INLINE upb_fielddef *upb_msg_itof(upb_msgdef *m, uint32_t num) {
return e ? e->f : NULL;
}
INLINE upb_fielddef *upb_msg_ntof(upb_msgdef *m, upb_strptr name) {
INLINE upb_fielddef *upb_msg_ntof(upb_msgdef *m, upb_string *name) {
upb_ntof_ent *e = (upb_ntof_ent*)upb_strtable_lookup(&m->ntof, name);
return e ? e->f : NULL;
}
@ -179,8 +179,8 @@ typedef struct _upb_enumdef {
typedef int32_t upb_enumval_t;
// Lookups from name to integer and vice-versa.
bool upb_enumdef_ntoi(upb_enumdef *e, upb_strptr name, upb_enumval_t *num);
upb_strptr upb_enumdef_iton(upb_enumdef *e, upb_enumval_t num);
bool upb_enumdef_ntoi(upb_enumdef *e, upb_string *name, upb_enumval_t *num);
upb_string *upb_enumdef_iton(upb_enumdef *e, upb_enumval_t num);
// Iteration over name/value pairs. The order is undefined.
// upb_enum_iter i;
@ -190,7 +190,7 @@ upb_strptr upb_enumdef_iton(upb_enumdef *e, upb_enumval_t num);
typedef struct {
upb_enumdef *e;
void *state; // Internal iteration state.
upb_strptr name;
upb_string *name;
upb_enumval_t val;
} upb_enum_iter;
void upb_enum_begin(upb_enum_iter *iter, upb_enumdef *e);
@ -232,11 +232,11 @@ INLINE void upb_symtab_unref(upb_symtab *s) {
//
// If a def is found, the caller owns one ref on the returned def. Otherwise
// returns NULL.
upb_def *upb_symtab_resolve(upb_symtab *s, upb_strptr base, upb_strptr symbol);
upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *sym);
// Find an entry in the symbol table with this exact name. If a def is found,
// the caller owns one ref on the returned def. Otherwise returns NULL.
upb_def *upb_symtab_lookup(upb_symtab *s, upb_strptr sym);
upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym);
// Gets an array of pointers to all currently active defs in this symtab. The
// caller owns the returned array (which is of length *count) as well as a ref
@ -249,7 +249,7 @@ upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_def_type_t type);
// defined in desc). desc may not attempt to define any names that are already
// defined in this symtab. Caller retains ownership of desc. status indicates
// whether the operation was successful or not, and the error message (if any).
void upb_symtab_add_desc(upb_symtab *s, upb_strptr desc, upb_status *status);
void upb_symtab_add_desc(upb_symtab *s, upb_string *desc, upb_status *status);
/* upb_def casts **************************************************************/

@ -28,6 +28,9 @@ extern "C" {
// TODO: decide how to handle unknown fields.
struct upb_src;
typedef struct upb_src upb_src;
// Retrieves the fielddef for the next field in the stream. Returns NULL on
// error or end-of-stream.
upb_fielddef *upb_src_getdef(upb_src *src);
@ -53,6 +56,9 @@ upb_status *upb_src_status(upb_src *src);
/* upb_sink *******************************************************************/
struct upb_sink;
typedef struct upb_sink upb_sink;
// Puts the given fielddef into the stream.
bool upb_sink_putdef(upb_sink *sink, upb_fielddef *def);
@ -70,6 +76,9 @@ upb_status *upb_sink_status(upb_sink *sink);
/* upb_bytesrc ****************************************************************/
struct upb_bytesrc;
typedef struct upb_bytesrc upb_bytesrc;
// Returns the next string in the stream. NULL is returned on error or eof.
// The string must be at least "minlen" bytes long unless the stream is eof.
//
@ -89,6 +98,9 @@ upb_status *upb_bytesrc_status(upb_src *src);
/* upb_bytesink ***************************************************************/
struct upb_bytesink;
typedef struct upb_bytesink upb_bytesink;
// Puts the given string. Returns the number of bytes that were actually,
// consumed, which may be fewer than were in the string, or <0 on error.
int32_t upb_bytesink_put(upb_bytesink *sink, upb_string *str);
@ -137,14 +149,14 @@ typedef struct {
// "Base Class" definitions; components that implement these interfaces should
// contain one of these structures.
typedef struct {
struct upb_src {
upb_src_vtable *vtbl;
upb_status status;
bool eof;
#ifndef NDEBUG
int state; // For debug-mode checking of API usage.
#endif
} upb_src;
};
INLINE void upb_sink_init(upb_src *s, upb_src_vtable *vtbl) {
s->vtbl = vtbl;

@ -44,7 +44,7 @@ typedef struct _upb_string {
// Used if this is a slice of another string.
struct _upb_string *src;
// Used if this string is referencing external unowned memory.
upb_stomic_refcount_t reader_count;
upb_atomic_refcount_t reader_count;
} extra;
} upb_string;
@ -126,7 +126,7 @@ INLINE void upb_strcpylen(upb_string *dest, const void *src, upb_strlen_t len) {
// Replaces the contents of "dest" with the contents of "src".
INLINE void upb_strcpy(upb_string *dest, upb_string *src) {
upb_strcpylen(dest, upb_string_getrobuf(src), upb_strlen(src));
upb_strcpylen(dest, upb_string_getrobuf(src), upb_string_len(src));
upb_string_endread(src);
}

@ -5,7 +5,7 @@
*/
#include "upb_table.h"
#include "upb_data.h"
#include "upb_string.h"
#include <assert.h>
#include <stdlib.h>

@ -17,6 +17,7 @@
#include <assert.h>
#include "upb.h"
#include "upb_string.h"
#ifdef __cplusplus
extern "C" {
@ -38,7 +39,7 @@ typedef struct {
// performance by letting us compare hashes before comparing lengths or the
// strings themselves.
typedef struct {
upb_strptr key; // We own a frozen ref.
upb_string *key; // We own a ref.
uint32_t next; // Internal chaining.
} upb_strtable_entry;
@ -114,7 +115,7 @@ INLINE void *upb_inttable_lookup(upb_inttable *t, uint32_t key) {
return upb_inttable_fastlookup(t, key, t->t.entry_size);
}
void *upb_strtable_lookup(upb_strtable *t, upb_strptr key);
void *upb_strtable_lookup(upb_strtable *t, upb_string *key);
/* Provides iteration over the table. The order in which the entries are
* returned is undefined. Insertions invalidate iterators. The _next

Loading…
Cancel
Save