Some cleanup and reformatting, fixed the benchmarks.

pull/13171/head
Joshua Haberman 16 years ago
parent b276aa78b6
commit f17ed90f77
  1. 8
      Makefile
  2. 51
      benchmarks/parsetostruct.upb_table.c
  3. 37
      src/upb.c
  4. 131
      src/upb.h
  5. 39
      src/upb_array.h
  6. 1
      src/upb_atomic.h
  7. 4
      src/upb_mm.c
  8. 45
      src/upb_msg.c
  9. 180
      src/upb_parse.c
  10. 5
      src/upb_parse.h
  11. 6
      src/upb_struct.h
  12. 2
      src/upb_text.c

@ -34,7 +34,7 @@ LDLIBS=-lpthread
LIBUPB=src/libupb.a
LIBUPB_PIC=src/libupb_pic.a
LIBUPB_SHARED=src/libupb.so
ALL=deps $(OBJ) $(LIBUPB) $(LIBUPB_PIC) $(LIBUPB_SHARED) tests/test_table tests/tests tools/upbc
ALL=deps $(OBJ) $(LIBUPB) $(LIBUPB_PIC) tests/test_table tests/tests tools/upbc
all: $(ALL)
clean:
rm -rf $(call rwildcard,,*.o) $(call rwildcard,,*.lo) $(ALL) benchmark/google_messages.proto.pb benchmark/google_messages.pb.* benchmarks/b.* benchmarks/*.pb*
@ -42,8 +42,9 @@ clean:
cd lang_ext/python && python setup.py clean --all
# The core library (src/libupb.a)
SRC=src/upb_parse.c src/upb_table.c src/upb_msg.c src/upb_mm.c src/upb_enum.c src/upb_context.c \
src/upb_string.c src/upb_text.c src/upb_serialize.c descriptor/descriptor.c
SRC=src/upb.c src/upb_parse.c src/upb_table.c src/upb_msg.c src/upb_mm.c \
src/upb_enum.c src/upb_context.c src/upb_string.c src/upb_text.c \
src/upb_serialize.c descriptor/descriptor.c
STATICOBJ=$(patsubst %.c,%.o,$(SRC))
SHAREDOBJ=$(patsubst %.c,%.lo,$(SRC))
# building shared objects is like building static ones, except -fPIC is added.
@ -91,6 +92,7 @@ upb_benchmarks: $(UPB_BENCHMARKS)
benchmarks: $(BENCHMARKS)
benchmark:
@rm -f benchmarks/results
@rm -rf benchmarks/*.dSYM
@for test in benchmarks/b.* ; do ./$$test ; done
benchmarks/google_messages.proto.pb: benchmarks/google_messages.proto

@ -3,45 +3,47 @@
#include "upb_context.h"
#include "upb_msg.h"
#include "upb_mm.h"
static struct upb_context c;
static struct upb_string str;
static struct upb_msg_parse_state s;
static struct upb_msg *m;
static void *data[NUM_MESSAGES];
static struct upb_context *c;
static struct upb_string *str;
static struct upb_msgdef *def;
static struct upb_msg *msgs[NUM_MESSAGES];
static bool initialize()
{
/* Initialize upb state, parse descriptor. */
upb_context_init(&c);
struct upb_string fds;
if(!upb_strreadfile(MESSAGE_DESCRIPTOR_FILE, &fds)) {
c = upb_context_new();
struct upb_string *fds = upb_strreadfile(MESSAGE_DESCRIPTOR_FILE);
if(!fds) {
fprintf(stderr, "Couldn't read " MESSAGE_DESCRIPTOR_FILE ".\n");
return false;
}
if(!upb_context_parsefds(&c, &fds)) {
if(!upb_context_parsefds(c, fds)) {
fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ".\n");
return false;
}
upb_strfree(fds);
upb_string_unref(fds);
char class_name[] = MESSAGE_NAME;
struct upb_string proto_name;
proto_name.ptr = class_name;
proto_name.byte_len = sizeof(class_name)-1;
struct upb_symtab_entry *e = upb_context_lookup(&c, &proto_name);
if(!e || e->type != UPB_SYM_MESSAGE) {
struct upb_symtab_entry e;
upb_status_t success = upb_context_lookup(c, &proto_name, &e);
if(!success || e.type != UPB_SYM_MESSAGE) {
fprintf(stderr, "Error finding symbol '" UPB_STRFMT "'.\n",
UPB_STRARG(proto_name));
UPB_STRARG(&proto_name));
return false;
}
m = e->ref.msg;
def = e.ref.msg;
for(int i = 0; i < 32; i++)
data[i] = upb_msgdata_new(m);
msgs[i] = upb_msg_new(def);
/* Read the message data itself. */
if(!upb_strreadfile(MESSAGE_FILE, &str)) {
str = upb_strreadfile(MESSAGE_FILE);
if(!str) {
fprintf(stderr, "Error reading " MESSAGE_FILE "\n");
return false;
}
@ -51,19 +53,18 @@ static bool initialize()
static void cleanup()
{
for(int i = 0; i < 32; i++)
upb_msgdata_free(data[i], m, true);
upb_strfree(str);
upb_context_free(&c);
upb_msg_unref(msgs[i]);
upb_string_unref(str);
upb_context_unref(c);
}
static size_t run(int i)
{
size_t read;
upb_msg_parse_reset(&s, data[i%NUM_MESSAGES], m, false, BYREF);
upb_status_t status = upb_msg_parse(&s, str.ptr, str.byte_len, &read);
if(status != UPB_STATUS_OK && read != str.byte_len) {
fprintf(stderr, "Error. :( error=%d, read=%zu\n", status, read);
upb_status_t status;
status = upb_msg_parsestr(msgs[i%NUM_MESSAGES], str->ptr, str->byte_len);
if(status != UPB_STATUS_OK) {
fprintf(stderr, "Error. :( error=%d\n", status);
return 0;
}
return read;
return str->byte_len;
}

@ -0,0 +1,37 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*
*/
#include <stddef.h>
#include "upb.h"
#define alignof(t) offsetof(struct { char c; t x; }, x)
#define TYPE_INFO(proto_type, wire_type, ctype) \
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## proto_type] = \
{alignof(ctype), sizeof(ctype), wire_type, #ctype},
struct upb_type_info upb_type_info[] = {
TYPE_INFO(DOUBLE, UPB_WIRE_TYPE_64BIT, double)
TYPE_INFO(FLOAT, UPB_WIRE_TYPE_32BIT, float)
TYPE_INFO(INT64, UPB_WIRE_TYPE_VARINT, int64_t)
TYPE_INFO(UINT64, UPB_WIRE_TYPE_VARINT, uint64_t)
TYPE_INFO(INT32, UPB_WIRE_TYPE_VARINT, int32_t)
TYPE_INFO(FIXED64, UPB_WIRE_TYPE_64BIT, uint64_t)
TYPE_INFO(FIXED32, UPB_WIRE_TYPE_32BIT, uint32_t)
TYPE_INFO(BOOL, UPB_WIRE_TYPE_VARINT, bool)
TYPE_INFO(MESSAGE, UPB_WIRE_TYPE_DELIMITED, void*)
TYPE_INFO(GROUP, UPB_WIRE_TYPE_START_GROUP, void*)
TYPE_INFO(UINT32, UPB_WIRE_TYPE_VARINT, uint32_t)
TYPE_INFO(ENUM, UPB_WIRE_TYPE_VARINT, uint32_t)
TYPE_INFO(SFIXED32, UPB_WIRE_TYPE_32BIT, int32_t)
TYPE_INFO(SFIXED64, UPB_WIRE_TYPE_64BIT, int64_t)
TYPE_INFO(SINT32, UPB_WIRE_TYPE_VARINT, int32_t)
TYPE_INFO(SINT64, UPB_WIRE_TYPE_VARINT, int64_t)
TYPE_INFO(STRING, UPB_WIRE_TYPE_DELIMITED, struct upb_string*)
TYPE_INFO(BYTES, UPB_WIRE_TYPE_DELIMITED, struct upb_string*)
};

@ -1,6 +1,6 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*
* This file contains shared definitions that are widely used across upb.
@ -11,14 +11,14 @@
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h> /* for size_t. */
#include <stdio.h> // only for size_t.
#include "descriptor_const.h"
#ifdef __cplusplus
extern "C" {
#endif
/* inline if possible, emit standalone code if required. */
// inline if possible, emit standalone code if required.
#ifndef INLINE
#define INLINE static inline
#endif
@ -26,21 +26,22 @@ extern "C" {
#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
/* The maximum that any submessages can be nested. Matches proto2's limit. */
// The maximum that any submessages can be nested. Matches proto2's limit.
#define UPB_MAX_NESTING 64
/* The maximum number of fields that any one .proto type can have. */
// The maximum number of fields that any one .proto type can have.
#define UPB_MAX_FIELDS (1<<16)
/* Nested type names are separated by periods. */
// Nested type names are separated by periods.
#define UPB_SYMBOL_SEPARATOR '.'
#define UPB_SYMBOL_MAX_LENGTH 256
#define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m)))
/* Fundamental types and type constants. **************************************/
/* A list of types as they are encoded on-the-wire. */
// A list of types as they are encoded on-the-wire.
enum upb_wire_type {
UPB_WIRE_TYPE_VARINT = 0,
UPB_WIRE_TYPE_64BIT = 1,
@ -49,26 +50,27 @@ enum upb_wire_type {
UPB_WIRE_TYPE_END_GROUP = 4,
UPB_WIRE_TYPE_32BIT = 5
};
typedef uint8_t upb_wire_type_t;
/* Value type as defined in a .proto file. eg. string, int32, etc.
*
* The values of this are defined by google_protobuf_FieldDescriptorProto_Type
* (from descriptor.proto). Note that descriptor.proto reserves "0" for
* errors, and we use it to represent exceptional circumstances. */
// Value type as defined in a .proto file. eg. string, int32, etc. The
// integers that represent this are defined by descriptor.proto. Note that
// descriptor.proto reserves "0" for errors, and we use it to represent
// exceptional circumstances.
typedef uint8_t upb_field_type_t;
// For referencing the type constants tersely.
#define UPB_TYPENUM(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## type
INLINE bool upb_issubmsgtype(upb_field_type_t type) {
return type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP ||
type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE;
return type == UPB_TYPENUM(GROUP) || type == UPB_TYPENUM(MESSAGE);
}
INLINE bool upb_isstringtype(upb_field_type_t type) {
return type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING ||
type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES;
return type == UPB_TYPENUM(STRING) || type == UPB_TYPENUM(BYTES);
}
/* Information about a given value type (upb_field_type_t). */
// Info for a given field type.
struct upb_type_info {
uint8_t align;
uint8_t size;
@ -76,87 +78,88 @@ struct upb_type_info {
char *ctype;
};
/* Contains information for all .proto types. Indexed by upb_field_type_t. */
// A static array of info about all of the field types, indexed by type number.
extern struct upb_type_info upb_type_info[];
/* The number of a field, eg. "optional string foo = 3". */
// The number of a field, eg. "optional string foo = 3".
typedef int32_t upb_field_number_t;
/* Label (optional, repeated, required) as defined in a .proto file. The values
* of this are defined by google.protobuf.FieldDescriptorProto.Label (from
* descriptor.proto). */
// Label (optional, repeated, required) as defined in a .proto file. The
// values of this are defined by google.protobuf.FieldDescriptorProto.Label
// (from descriptor.proto).
typedef uint8_t upb_label_t;
/* A value as it is encoded on-the-wire, except delimited, which is handled
* separately. */
// A scalar (non-string) wire value. Used only for parsing unknown fields.
union upb_wire_value {
uint64_t varint;
uint64_t _64bit;
uint32_t _32bit;
};
/* A tag occurs before each value on-the-wire. */
// A tag occurs before each value on-the-wire.
struct upb_tag {
upb_field_number_t field_number;
upb_wire_type_t wire_type;
};
/* Polymorphic values of .proto types *****************************************/
struct upb_string;
struct upb_array;
struct upb_msg;
/* A single .proto value. The owner must have an out-of-band way of knowing
* the type, so that it knows which union member to use. */
// A single .proto value. The owner must have an out-of-band way of knowing
// the type, so that it knows which union member to use.
union upb_value {
double _double;
float _float;
int32_t int32;
int64_t int64;
double _double;
float _float;
int32_t int32;
int64_t int64;
uint32_t uint32;
uint64_t uint64;
bool _bool;
bool _bool;
struct upb_string *str;
struct upb_array *arr;
struct upb_msg *msg;
};
/* A pointer to a .proto value. The owner must have an out-of-band way of
* knowing the type, so it knows which union member to use. */
// A pointer to a .proto value. The owner must have an out-of-band way of
// knowing the type, so it knows which union member to use.
union upb_value_ptr {
double *_double;
float *_float;
int32_t *int32;
int64_t *int64;
double *_double;
float *_float;
int32_t *int32;
int64_t *int64;
uint32_t *uint32;
uint64_t *uint64;
bool *_bool;
bool *_bool;
struct upb_string **str;
struct upb_array **arr;
struct upb_msg **msg;
void *_void;
void *_void;
};
/* Unfortunately there is no way to define this so that it can be used as a
* generic expression, a la:
* foo(UPB_VALUE_ADDROF(bar));
* ...you have to use it as the initializer of a upb_value_ptr:
* union upb_value_ptr p = UPB_VALUE_ADDROF(bar);
* foo(p);
*/
// Unfortunately there is no way to define this so that it can be used as a
// generic expression, a la:
// foo(UPB_VALUE_ADDROF(bar));
// ...you have to use it as the initializer of a upb_value_ptr:
// union upb_value_ptr p = UPB_VALUE_ADDROF(bar);
// foo(p);
#define UPB_VALUE_ADDROF(val) {(void*)&val._double}
/* Converts upb_value_ptr -> upb_value by "dereferencing" the pointer. We need
* to know the field type to perform this operation, because we need to know
* how much memory to copy. */
/**
* Converts upb_value_ptr -> upb_value by reading from the pointer. We need to
* know the field type to perform this operation, because we need to know how
* much memory to copy.
*/
INLINE union upb_value upb_value_read(union upb_value_ptr ptr,
upb_field_type_t ft) {
union upb_value val;
#define CASE(t, member_name) \
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## t: \
val.member_name = *ptr.member_name; \
break;
case UPB_TYPENUM(t): val.member_name = *ptr.member_name; break;
switch(ft) {
CASE(DOUBLE, _double)
CASE(FLOAT, _float)
@ -178,19 +181,21 @@ INLINE union upb_value upb_value_read(union upb_value_ptr ptr,
CASE(GROUP, msg)
default: break;
}
#undef CASE
return val;
#undef CASE
}
/* Converts upb_value_ptr -> upb_value by "dereferencing" the pointer. We need
* to know the field type to perform this operation, because we need to know
* how much memory to copy. */
/**
* Writes a upb_value to a upb_value_ptr location. We need to know the field
* type to perform this operation, because we need to know how much memory to
* copy.
*/
INLINE void upb_value_write(union upb_value_ptr ptr, union upb_value val,
upb_field_type_t ft) {
#define CASE(t, member_name) \
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## t: \
*ptr.member_name = val.member_name; \
break;
case UPB_TYPENUM(t): *ptr.member_name = val.member_name; break;
switch(ft) {
CASE(DOUBLE, _double)
CASE(FLOAT, _float)
@ -212,17 +217,19 @@ INLINE void upb_value_write(union upb_value_ptr ptr, union upb_value val,
CASE(GROUP, msg)
default: break;
}
#undef CASE
}
// All the different definitions that can occur in .proto files.
union upb_symbol_ref {
struct upb_msgdef *msg;
struct upb_enum *_enum;
struct upb_svc *svc;
};
/* Status codes used as a return value. Codes >0 are not fatal and can be
* resumed. */
// Status codes used as a return value. Codes >0 are not fatal and can be
// resumed.
typedef enum upb_status {
UPB_STATUS_OK = 0,

@ -32,11 +32,12 @@ extern "C" {
struct upb_string;
/* Returns a pointer to an array element. Does not perform a bounds check! */
INLINE union upb_value_ptr upb_array_getelementptr(
struct upb_array *arr, upb_arraylen_t n, upb_field_type_t type)
INLINE union upb_value_ptr upb_array_getelementptr(struct upb_array *arr,
upb_arraylen_t n)
{
union upb_value_ptr ptr;
ptr._void = (void*)((char*)arr->elements._void + n*upb_type_info[type].size);
ptr._void = UPB_INDEX(arr->elements._void, n,
upb_type_info[arr->fielddef->type].size);
return ptr;
}
@ -66,26 +67,22 @@ INLINE uint32_t upb_round_up_to_pow2(uint32_t v)
return v;
}
/* Resizes array to be "len" elements long (reallocating if necessary). */
INLINE bool upb_array_resize(struct upb_array *arr, upb_arraylen_t newlen)
INLINE union upb_value_ptr upb_array_append(struct upb_array *arr)
{
size_t type_size = upb_type_info[arr->fielddef->type].size;
bool dropped = false;
bool ref = arr->size == 0; /* Ref'ing external memory. */
void *data = arr->elements._void;
if(arr->size < newlen) {
/* Need to resize. */
arr->size = UPB_MAX(4, upb_round_up_to_pow2(newlen));
arr->elements._void = realloc(ref ? NULL : data, arr->size * type_size);
size_t size = upb_type_info[arr->fielddef->type].size;
upb_arraylen_t oldlen = arr->len;
if(oldlen == arr->size) {
arr->size = UPB_MAX(4, upb_round_up_to_pow2(oldlen+1));
arr->elements._void = realloc(arr->elements._void, arr->size * size);
memset((char*)arr->elements._void + (arr->len*size), 0, (arr->size - arr->len) * size);
}
if(ref) {
/* Need to take referenced data and copy it to memory we own. */
memcpy(arr->elements._void, data, UPB_MIN(arr->len, newlen) * type_size);
dropped = true;
}
/* TODO: fill with defaults. */
arr->len = newlen;
return dropped;
arr->len++;
return upb_array_getelementptr(arr, oldlen);
}
INLINE void upb_array_truncate(struct upb_array *arr)
{
arr->len = 0;
}
#ifdef __cplusplus

@ -29,6 +29,7 @@ extern "C" {
#define INLINE static inline
#endif
#define UPB_THREAD_UNSAFE
#ifdef UPB_THREAD_UNSAFE
/* Non-thread-safe implementations. ******************************************/

@ -27,7 +27,7 @@ void upb_array_destroy(struct upb_array *arr)
upb_arraylen_t i;
/* Unref elements. */
for(i = 0; i < arr->len; i++) {
union upb_value_ptr p = upb_array_getelementptr(arr, i, arr->fielddef->type);
union upb_value_ptr p = upb_array_getelementptr(arr, i);
upb_mm_ptrtype type = upb_elem_ptrtype(arr->fielddef);
union upb_mmptr mmptr = upb_mmptr_read(p, type);
upb_mm_unref(mmptr, type);
@ -120,7 +120,7 @@ struct upb_mm_ref *upb_mm_getelemref(struct upb_mm_ref *arrref, upb_arraylen_t i
struct upb_msg_fielddef *f = arr->fielddef;
assert(upb_elem_ismm(f));
assert(i < arr->len);
union upb_value_ptr p = upb_array_getelementptr(arr, i, f->type);
union upb_value_ptr p = upb_array_getelementptr(arr, i);
upb_mm_ptrtype type = upb_elem_ptrtype(f);
union upb_mmptr val = upb_mmptr_read(p, type);
return find_or_create_ref(arrref, arrref->mm, val, type, refcreated);

@ -156,11 +156,16 @@ static union upb_value_ptr get_value_ptr(struct upb_msg *msg,
{
union upb_value_ptr p = upb_msg_getptr(msg, f);
if(upb_isarray(f)) {
bool isset = upb_msg_isset(msg, f);
size_t len = isset ? (*p.arr)->len : 0;
if(!isset) *p.arr = upb_array_new(f);
upb_array_resize(*p.arr, len+1);
p = upb_array_getelementptr(*p.arr, len, f->type);
if(!upb_msg_isset(msg, f)) {
if(!*p.arr || !upb_mmhead_only(&((*p.arr)->mmhead))) {
if(*p.arr)
upb_array_unref(*p.arr);
*p.arr = upb_array_new(f);
}
upb_array_truncate(*p.arr);
upb_msg_set(msg, f);
}
p = upb_array_append(*p.arr);
}
return p;
}
@ -202,7 +207,11 @@ static void str_cb(void *udata, uint8_t *str,
upb_msg_set(msg, f);
if(avail_len != total_len) abort(); /* TODO: support streaming. */
//bool byref = avail_len == total_len && mp->byref;
*p.str = upb_string_new();
if(!*p.str || !upb_mmhead_only(&((*p.str)->mmhead))) {
if(*p.str)
upb_string_unref(*p.str);
*p.str = upb_string_new();
}
//if(byref) {
// upb_strdrop(*p.str);
// (*p.str)->ptr = (char*)str;
@ -220,16 +229,19 @@ static void submsg_start_cb(void *udata, void *user_field_desc)
struct upb_msg_fielddef *f = user_field_desc;
struct upb_msg *oldmsg = mp->top->msg;
union upb_value_ptr p = get_value_ptr(oldmsg, f);
struct upb_msg **submsg = p.msg;
//if(*submsg && upb_mmhead_only(&((*submsg)->mmhead))) {
// /* We can reuse the existing submsg. */
//} else {
*submsg = upb_msg_new(f->ref.msg);
//}
upb_msg_clear(*submsg);
upb_msg_set(oldmsg, f);
if(upb_isarray(f) || !upb_msg_isset(oldmsg, f)) {
if(!*p.msg || !upb_mmhead_only(&((*p.msg)->mmhead))) {
if(*p.msg)
upb_msg_unref(*p.msg);
*p.msg = upb_msg_new(f->ref.msg);
}
upb_msg_clear(*p.msg);
upb_msg_set(oldmsg, f);
}
mp->top++;
mp->top->msg = *submsg;
mp->top->msg = *p.msg;
}
static void submsg_end_cb(void *udata)
@ -248,6 +260,7 @@ upb_status_t upb_msg_parsestr(struct upb_msg *msg, void *buf, size_t len)
struct upb_msg_parser mp;
upb_msg_parser_reset(&mp, msg, false);
size_t read;
upb_msg_clear(msg);
upb_status_t ret = upb_msg_parser_parse(&mp, buf, len, &read);
return ret;
}
@ -337,7 +350,7 @@ static size_t get_msgsize(struct upb_msgsizes *sizes, struct upb_msg *m)
union upb_value_ptr p = upb_msg_getptr(m, f);
if(upb_isarray(f)) {
for(int32_t j = (*p.arr)->len - 1; j >= 0; j--) {
union upb_value_ptr elem = upb_array_getelementptr((*p.arr), j, f->type);
union upb_value_ptr elem = upb_array_getelementptr(*p.arr, j);
/* TODO: for packed arrays tag size goes outside the loop. */
size += upb_get_tag_size(fd->number);
size += get_valuesize(sizes, elem, f, fd);

@ -9,32 +9,10 @@
#include <stddef.h>
#include <stdlib.h>
/* May want to move this to upb.c if enough other things warrant it. */
#define alignof(t) offsetof(struct { char c; t x; }, x)
#define TYPE_INFO(proto_type, wire_type, ctype) [proto_type] = {alignof(ctype), sizeof(ctype), wire_type, #ctype},
struct upb_type_info upb_type_info[] = {
TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE, UPB_WIRE_TYPE_64BIT, double)
TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT, UPB_WIRE_TYPE_32BIT, float)
TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64, UPB_WIRE_TYPE_VARINT, int64_t)
TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64, UPB_WIRE_TYPE_VARINT, uint64_t)
TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32, UPB_WIRE_TYPE_VARINT, int32_t)
TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64, UPB_WIRE_TYPE_64BIT, uint64_t)
TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32, UPB_WIRE_TYPE_32BIT, uint32_t)
TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL, UPB_WIRE_TYPE_VARINT, bool)
TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE, UPB_WIRE_TYPE_DELIMITED, void*)
TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP, UPB_WIRE_TYPE_START_GROUP, void*)
TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32, UPB_WIRE_TYPE_VARINT, uint32_t)
TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM, UPB_WIRE_TYPE_VARINT, uint32_t)
TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32, UPB_WIRE_TYPE_32BIT, int32_t)
TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64, UPB_WIRE_TYPE_64BIT, int64_t)
TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32, UPB_WIRE_TYPE_VARINT, int32_t)
TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64, UPB_WIRE_TYPE_VARINT, int64_t)
TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING, UPB_WIRE_TYPE_DELIMITED, struct upb_string*)
TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES, UPB_WIRE_TYPE_DELIMITED, struct upb_string*)
};
/* This is called by the inline version of the function if the varint turns out
* to be >= 2 bytes. */
/**
* Parses a 64-bit varint that is known to be >= 2 bytes (the inline version
* handles 1 and 2 byte varints).
*/
upb_status_t upb_get_v_uint64_t_full(uint8_t *buf, uint8_t *end, uint64_t *val,
uint8_t **outbuf)
{
@ -42,10 +20,15 @@ upb_status_t upb_get_v_uint64_t_full(uint8_t *buf, uint8_t *end, uint64_t *val,
uint8_t last = 0x80;
*val = 0;
int bitpos;
for(bitpos = 0; buf < (uint8_t*)end && (last & 0x80); buf++, bitpos += 7)
*val |= ((uint64_t)((last = *buf) & 0x7F)) << bitpos;
if(buf >= end && buf <= maxend && (last & 0x80)) return UPB_STATUS_NEED_MORE_DATA;
if(buf > maxend) return UPB_ERROR_UNTERMINATED_VARINT;
if(buf >= end && buf <= maxend && (last & 0x80))
return UPB_STATUS_NEED_MORE_DATA;
if(buf > maxend)
return UPB_ERROR_UNTERMINATED_VARINT;
*outbuf = buf;
return UPB_STATUS_OK;
}
@ -54,23 +37,37 @@ upb_status_t upb_parse_wire_value(uint8_t *buf, uint8_t *end, upb_wire_type_t wt
union upb_wire_value *wv, uint8_t **outbuf)
{
switch(wt) {
case UPB_WIRE_TYPE_VARINT: return upb_get_v_uint64_t(buf, end, &wv->varint, outbuf);
case UPB_WIRE_TYPE_64BIT: return upb_get_f_uint64_t(buf, end, &wv->_64bit, outbuf);
case UPB_WIRE_TYPE_32BIT: return upb_get_f_uint32_t(buf, end, &wv->_32bit, outbuf);
default: return UPB_ERROR_ILLEGAL; /* Doesn't handle delimited, groups. */
case UPB_WIRE_TYPE_VARINT:
return upb_get_v_uint64_t(buf, end, &wv->varint, outbuf);
case UPB_WIRE_TYPE_64BIT:
return upb_get_f_uint64_t(buf, end, &wv->_64bit, outbuf);
case UPB_WIRE_TYPE_32BIT:
return upb_get_f_uint32_t(buf, end, &wv->_32bit, outbuf);
default:
return UPB_ERROR_ILLEGAL; // Doesn't handle delimited, groups.
}
}
/**
* Advances buf past the current wire value (of type wt), saving the result in
* outbuf.
*/
static upb_status_t skip_wire_value(uint8_t *buf, uint8_t *end, upb_wire_type_t wt,
uint8_t **outbuf)
{
switch(wt) {
case UPB_WIRE_TYPE_VARINT: return upb_skip_v_uint64_t(buf, end, outbuf);
case UPB_WIRE_TYPE_64BIT: return upb_skip_f_uint64_t(buf, end, outbuf);
case UPB_WIRE_TYPE_32BIT: return upb_skip_f_uint32_t(buf, end, outbuf);
case UPB_WIRE_TYPE_START_GROUP: /* TODO: skip to matching end group. */
case UPB_WIRE_TYPE_END_GROUP: return UPB_STATUS_OK;
default: return UPB_ERROR_ILLEGAL;
case UPB_WIRE_TYPE_VARINT:
return upb_skip_v_uint64_t(buf, end, outbuf);
case UPB_WIRE_TYPE_64BIT:
return upb_skip_f_uint64_t(buf, end, outbuf);
case UPB_WIRE_TYPE_32BIT:
return upb_skip_f_uint32_t(buf, end, outbuf);
case UPB_WIRE_TYPE_START_GROUP:
// TODO: skip to matching end group.
case UPB_WIRE_TYPE_END_GROUP:
return UPB_STATUS_OK;
default:
return UPB_ERROR_ILLEGAL;
}
}
@ -78,8 +75,8 @@ upb_status_t upb_parse_value(uint8_t *buf, uint8_t *end, upb_field_type_t ft,
union upb_value_ptr v, uint8_t **outbuf)
{
#define CASE(t, member_name) \
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## t: \
return upb_get_ ## t(buf, end, v.member_name, outbuf);
case UPB_TYPENUM(t): return upb_get_ ## t(buf, end, v.member_name, outbuf);
switch(ft) {
CASE(DOUBLE, _double)
CASE(FLOAT, _float)
@ -97,6 +94,7 @@ upb_status_t upb_parse_value(uint8_t *buf, uint8_t *end, upb_field_type_t ft,
CASE(ENUM, int32)
default: return UPB_ERROR_ILLEGAL;
}
#undef CASE
}
@ -104,55 +102,76 @@ void upb_stream_parser_reset(struct upb_stream_parser *state, void *udata)
{
state->top = state->stack;
state->limit = &state->stack[UPB_MAX_NESTING];
/* The top-level message is not delimited (we can keep receiving data for
* it indefinitely), so we treat it like a group. */
*state->top = 0;
state->completed_offset = 0;
state->udata = udata;
}
static void *pop_stack_frame(struct upb_stream_parser *s, uint8_t *buf)
{
if(s->submsg_end_cb) s->submsg_end_cb(s->udata);
s->top--;
return (char*)buf + (*s->top > 0 ? (*s->top - s->completed_offset) : 0);
// The top-level message is not delimited (we can keep receiving data for it
// indefinitely), so we treat it like a group.
*state->top = 0;
}
/* Returns the next end offset. */
static upb_status_t push_stack_frame(struct upb_stream_parser *s,
uint8_t *buf, uint32_t len,
void *user_field_desc, uint8_t **submsg_end)
/**
* Pushes a new stack frame for a submessage with the given len (which will
* be zero if the submessage is a group).
*/
static upb_status_t push(struct upb_stream_parser *s, uint8_t *start,
uint32_t submsg_len, void *user_field_desc,
uint8_t **submsg_end)
{
s->top++;
if(s->top > s->limit) return UPB_ERROR_STACK_OVERFLOW;
*s->top = s->completed_offset + len;
if(s->submsg_start_cb) s->submsg_start_cb(s->udata, user_field_desc);
*submsg_end = buf + (*s->top > 0 ? (*s->top - s->completed_offset) : 0);
if(s->top >= s->limit)
return UPB_ERROR_STACK_OVERFLOW;
*s->top = s->completed_offset + submsg_len;
if(s->submsg_start_cb)
s->submsg_start_cb(s->udata, user_field_desc);
*submsg_end = start + (*s->top > 0 ? (*s->top - s->completed_offset) : 0);
return UPB_STATUS_OK;
}
/**
* Pops a stack frame, returning a pointer for where the next submsg should
* end (or a pointer that is out of range for a group).
*/
static void *pop(struct upb_stream_parser *s, uint8_t *start)
{
if(s->submsg_end_cb)
s->submsg_end_cb(s->udata);
s->top--;
if(*s->top > 0)
return (char*)start + (*s->top - s->completed_offset);
else
return (char*)start; // group.
}
upb_status_t upb_stream_parser_parse(struct upb_stream_parser *s,
void *_buf, size_t len, size_t *read)
{
uint8_t *buf = _buf;
uint8_t *completed = buf;
uint8_t *const start = buf;
uint8_t *const start = buf; // ptr equivalent of s->completed_offset
uint8_t *end = buf + len;
uint8_t *submsg_end = buf + (*s->top > 0 ? *s->top : 0);
upb_status_t status = UPB_STATUS_OK;
/* Make local copies so optimizer knows they won't change. */
// Make local copies so optimizer knows they won't change.
upb_tag_cb tag_cb = s->tag_cb;
upb_str_cb str_cb = s->str_cb;
upb_value_cb value_cb = s->value_cb;
void *udata = s->udata;
/* Main loop: parse a tag, then handle the value. */
#define CHECK(exp) do { if((status = exp) != UPB_STATUS_OK) goto err; } while(0)
// Main loop: parse a tag, then handle the value.
while(buf < end) {
struct upb_tag tag;
UPB_CHECK(parse_tag(buf, end, &tag, &buf));
CHECK(parse_tag(buf, end, &tag, &buf));
if(tag.wire_type == UPB_WIRE_TYPE_END_GROUP) {
submsg_end = pop_stack_frame(s, start);
submsg_end = pop(s, start);
completed = buf;
continue;
}
@ -161,38 +180,39 @@ upb_status_t upb_stream_parser_parse(struct upb_stream_parser *s,
upb_field_type_t ft = tag_cb(udata, &tag, &udesc);
if(tag.wire_type == UPB_WIRE_TYPE_DELIMITED) {
int32_t delim_len;
UPB_CHECK(upb_get_INT32(buf, end, &delim_len, &buf));
CHECK(upb_get_INT32(buf, end, &delim_len, &buf));
uint8_t *delim_end = buf + delim_len;
if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE) {
UPB_CHECK(push_stack_frame(
s, start, delim_end - start, udesc, &submsg_end));
if(ft == UPB_TYPENUM(MESSAGE)) {
CHECK(push(s, start, delim_end - start, udesc, &submsg_end));
} else {
if(upb_isstringtype(ft))
str_cb(udata, buf, UPB_MIN(delim_end, end) - buf, delim_end - buf, udesc);
//else
// /* Set a marker for packed arrays. */
buf = delim_end; /* Note that this could be greater than end. */
if(upb_isstringtype(ft)) {
size_t avail_len = UPB_MIN(delim_end, end) - buf;
str_cb(udata, buf, avail_len, delim_end - buf, udesc);
} // else { TODO: packed arrays }
buf = delim_end; // Could be >end.
}
} else { /* Scalar (non-delimited) value. */
} else {
// Scalar (non-delimited) value.
switch(ft) {
case 0: /* Client elected to skip. */
UPB_CHECK(skip_wire_value(buf, end, tag.wire_type, &buf));
case 0: // Client elected to skip.
CHECK(skip_wire_value(buf, end, tag.wire_type, &buf));
break;
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP:
UPB_CHECK(push_stack_frame(s, start, 0, udesc, &submsg_end));
case UPB_TYPENUM(GROUP):
CHECK(push(s, start, 0, udesc, &submsg_end));
break;
default:
UPB_CHECK(value_cb(udata, buf, end, udesc, &buf));
CHECK(value_cb(udata, buf, end, udesc, &buf));
break;
}
}
while(buf == submsg_end) submsg_end = pop_stack_frame(s, start);
//while(buf < s->packed_end) /* packed arrays. */
// UPB_CHECK(value_cb(udata, buf, end, udesc, &buf));
while(buf == submsg_end)
submsg_end = pop(s, start);
// while(buf < s->packed_end) { TODO: packed arrays }
completed = buf;
}
err:
*read = (char*)completed - (char*)start;
s->completed_offset += *read;
return status;

@ -74,9 +74,8 @@ typedef void (*upb_submsg_start_cb)(void *udata,
typedef void (*upb_submsg_end_cb)(void *udata);
struct upb_stream_parser {
/* For delimited submsgs, counts from the submsg len down to zero.
* For group submsgs, counts from zero down to the negative len. */
uint32_t stack[UPB_MAX_NESTING], *top, *limit;
// Stack entries store the offset where the submsg ends (for groups, 0).
size_t stack[UPB_MAX_NESTING], *top, *limit;
size_t completed_offset;
void *udata;
upb_tag_cb tag_cb;

@ -29,6 +29,10 @@ INLINE bool upb_mmhead_norefs(struct upb_mmhead *head) {
return head->refcount == 0 && head->refs == NULL;
}
INLINE bool upb_mmhead_only(struct upb_mmhead *head) {
return head->refcount == 1 && head->refs == NULL;
}
INLINE bool upb_mmhead_unref(struct upb_mmhead *head) {
head->refcount--;
return upb_mmhead_norefs(head);
@ -57,7 +61,7 @@ struct upb_array {
struct upb_msg_fielddef *fielddef; /* Defines the type of the array. */
union upb_value_ptr elements;
upb_arraylen_t len; /* Number of elements in "elements". */
upb_arraylen_t size; /* Memory we own (0 if by reference). */
upb_arraylen_t size; /* Memory we own. */
};
struct upb_string {

@ -99,7 +99,7 @@ static void printmsg(struct upb_text_printer *printer, struct upb_msg *msg,
if(upb_isarray(f)) {
struct upb_array *arr = *p.arr;
for(uint32_t j = 0; j < arr->len; j++) {
union upb_value_ptr elem_p = upb_array_getelementptr(arr, j, f->type);
union upb_value_ptr elem_p = upb_array_getelementptr(arr, j);
printval(printer, elem_p, f, fd, stream);
}
} else {

Loading…
Cancel
Save