A bunch more work, a fast table for field lookup.

pull/13171/head
Joshua Haberman 16 years ago
parent 0c80c38475
commit d1f78c88fa
  1. 8
      Makefile
  2. 56
      upb.h
  3. 50
      upb_fieldmap.c
  4. 53
      upb_fieldmap.h
  5. 189
      upb_parse.c
  6. 110
      upb_parse.h
  7. 1
      upb_struct.c
  8. 4
      upb_struct.h
  9. 89
      upb_table.c
  10. 75
      upb_table.h

@ -1,16 +1,16 @@
.PHONY: all clean
CFLAGS=-std=c99 -O3 -Wall -Wextra -pedantic
OBJ=upb_parse.o upb_fieldmap.o upb_struct.o
all: $(OBJ) tests
OBJ=upb_parse.o upb_table.o upb_struct.o
all: $(OBJ)
clean:
rm -f $(OBJ) tests
upb_parse.o: upb_parse.c upb_parse.h
gcc $(CFLAGS) -o upb_parse.o -c upb_parse.c
upb_fieldmap.o: upb_fieldmap.c upb_fieldmap.h
gcc $(CFLAGS) -o upb_fieldmap.o -c upb_fieldmap.c
upb_table.o: upb_table.c upb_table.h
gcc $(CFLAGS) -o upb_table.o -c upb_table.c
upb_struct.o: upb_struct.c upb_struct.h
gcc $(CFLAGS) -o upb_struct.o -c upb_struct.c

56
upb.h

@ -19,29 +19,30 @@ extern "C" {
#define UPB_MAX_NESTING 64
/* A list of types as they are encoded on-the-wire. */
typedef enum upb_wire_type {
enum upb_wire_type {
UPB_WIRE_TYPE_VARINT = 0,
UPB_WIRE_TYPE_64BIT = 1,
UPB_WIRE_TYPE_DELIMITED = 2,
UPB_WIRE_TYPE_START_GROUP = 3,
UPB_WIRE_TYPE_END_GROUP = 4,
UPB_WIRE_TYPE_32BIT = 5,
} upb_wire_type_t;
struct upb_delimited {
size_t offset; /* relative to the beginning of the stream. */
uint32_t len;
};
typedef int8_t upb_wire_type_t;
/* A value as it is encoded on-the-wire. */
/* A value as it is encoded on-the-wire, except delimited, which is handled
* separately. */
union upb_wire_value {
uint64_t varint;
uint64_t _64bit;
uint32_t _32bit;
struct upb_delimited delimited;
};
/* A value as described in a .proto file. */
/* Value type as defined in a .proto file. The values of this are defined by
* google_protobuf_FieldDescriptorProto_Type (from descriptor.proto). */
typedef int32_t upb_field_type_t;
/* A value as described in a .proto file, except delimited, which is handled
* separately. */
union upb_value {
double _double;
float _float;
@ -50,9 +51,10 @@ union upb_value {
uint32_t uint32;
uint64_t uint64;
bool _bool;
struct upb_delimited delimited;
uint32_t delim_len;
};
/* The number of a field, eg. "optional string foo = 3". */
typedef int32_t upb_field_number_t;
/* A tag occurs before each value on-the-wire. */
@ -61,6 +63,40 @@ struct upb_tag {
upb_wire_type_t wire_type;
};
/* Status codes used as a return value. */
typedef enum upb_status {
UPB_STATUS_OK = 0,
UPB_STATUS_SUBMESSAGE_END = 1,
/** FATAL ERRORS: these indicate corruption, and cannot be recovered. */
// A varint did not terminate before hitting 64 bits.
UPB_ERROR_UNTERMINATED_VARINT = -1,
// A submessage ended in the middle of data.
UPB_ERROR_BAD_SUBMESSAGE_END = -2,
// Encountered a "group" on the wire (deprecated and unsupported).
UPB_ERROR_GROUP = -3,
// Input was nested more than UPB_MAX_NESTING deep.
UPB_ERROR_STACK_OVERFLOW = -4,
// The input data caused the pb's offset (a size_t) to overflow.
UPB_ERROR_OVERFLOW = -5,
// Generic error.
UPB_ERROR = -6,
/** NONFATAL ERRORS: the input was invalid, but we can continue if desired. */
// A value was encountered that was not defined in the .proto file.
UPB_ERROR_UNKNOWN_VALUE = 2,
// A field was encoded with the wrong wire type.
UPB_ERROR_MISMATCHED_TYPE = 3,
} upb_status_t;
#ifdef __cplusplus
} /* extern "C" */
#endif

@ -1,50 +0,0 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*/
#include "upb_fieldmap.h"
#include <stdlib.h>
void pbstream_init_fieldmap(struct pbstream_fieldmap *fieldmap,
struct pbstream_field *fields,
int num_fields)
{
qsort(fields, num_fields, sizeof(*fields), compare_fields);
/* Find the largest n for which at least half the fieldnums <n are used.
* Start at 8 to avoid noise of small numbers. */
pbstream_field_number_t n = 0, maybe_n;
for(int i = 0; i < num_fields; i++) {
maybe_n = fields[i].field_number;
if(maybe_n > 8 && maybe_n/(i+1) >= 2) break;
n = maybe_n;
}
fieldmap->num_fields = num_fields;
fieldmap->fields = malloc(sizeof(*fieldmap->fields)*num_fields);
memcpy(fieldmap->fields, fields, sizeof(*fields)*num_fields);
fieldmap->array_size = n;
fieldmap->array = malloc(sizeof(*fieldmap->array)*n);
memset(fieldmap->array, 0, sizeof(*fieldmap->array)*n);
for (int i = 0; i < num_fields && fields[i].field_number <= n; i++)
fieldmap->array[fields[i].field_number-1] = &fieldmap->fields[i];
/* Until we support the hashtable part... */
assert(n == fields[num_fields-1].field_number);
}
void pbstream_free_fieldmap(struct pbstream_fieldmap *fieldmap)
{
free(fieldmap->fields);
free(fieldmap->array);
}
/* Emit definition for inline function. */
extern void *upb_fieldmap_find(struct upb_fieldmap *fm,
pbstream_field_number_t num,
size_t info_size);

@ -1,53 +0,0 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*
* A fieldmap is a data structure that supports fast lookup of fields by
* number. It is logically a map of {field_number -> <field info>}, where
* <field info> is any struct that begins with the field number. Fast lookup
* is important, because it is in the critical path of parsing. */
#ifndef UPB_FIELDMAP_H_
#define UPB_FIELDMAP_H_
#include "upb.h"
#ifdef __cplusplus
extern "C" {
#endif
struct upb_fieldmap {
int array_size;
void *array;
/* TODO: the hashtable part. */
};
/* Takes an array of num_fields fields and builds an optimized table for fast
* lookup of fields by number. The input fields need not be sorted. This
* fieldmap must be freed with upb_free_fieldmap(). */
void upb_init_fieldmap(struct upb_fieldmap *fieldmap,
void *fields,
int num_fields,
int field_size);
void upb_free_fieldmap(struct upb_fieldmap *fieldmap);
/* Looks the given field number up in the fieldmap, and returns the
* corresponding field definition (or NULL if this field number does not exist
* in this fieldmap). */
inline void *upb_fieldmap_find(struct upb_fieldmap *fm,
upb_field_number_t num,
size_t info_size)
{
if (num < array_size) {
return (char*)fs->array + (num*info_size);
} else {
/* TODO: the hashtable part. */
}
}
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* UPB_PARSE_H_ */

@ -8,6 +8,7 @@
#include <assert.h>
#include <string.h>
#include "descriptor.h"
/* Branch prediction hints for GCC. */
#ifdef __GNUC__
@ -138,14 +139,10 @@ static int64_t zz_decode_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
static void wvtov_ ## type(wire_t s, val_t *d)
#define GET(type, v_or_f, wire_t, val_t, member_name) \
static upb_status_t get_ ## type(struct upb_parse_state *s, \
uint8_t *buf, \
struct upb_tagged_value *d) { \
static upb_status_t get_ ## type(uint8_t **buf, union upb_value *d) { \
wire_t tmp; \
uint8_t *b = buf; \
CHECK(get_ ## v_or_f ## _ ## wire_t(&b, &tmp)); \
wvtov_ ## type(tmp, &d->v.member_name); \
s->offset += (b-buf); \
CHECK(get_ ## v_or_f ## _ ## wire_t(buf, &tmp)); \
wvtov_ ## type(tmp, &d->member_name); \
return UPB_STATUS_OK; \
}
@ -172,65 +169,25 @@ T(ENUM, v, uint32_t, int32_t, int32) { *d = (int32_t)s; }
#undef GET
#undef T
static void wvtov_delimited(uint32_t s, struct upb_delimited *d, size_t o)
{
d->offset = o;
d->len = s;
}
/* Use BYTES version for both STRING and BYTES, leave UTF-8 checks to client. */
static upb_status_t get_BYTES(struct upb_parse_state *s, uint8_t *buf,
struct upb_tagged_value *d) {
uint32_t tmp;
uint8_t *b = buf;
CHECK(get_v_uint32_t(&b, &tmp));
s->offset += (b-buf); /* advance past length varint. */
wvtov_delimited(tmp, &d->v.delimited, s->offset);
size_t new_offset = s->offset + d->v.delimited.len; /* skip bytes */
if (unlikely(new_offset < s->offset)) return UPB_ERROR_OVERFLOW;
s->offset = new_offset;
return UPB_STATUS_OK;
}
static upb_status_t get_MESSAGE(struct upb_parse_state *s, uint8_t *buf,
struct upb_tagged_value *d) {
/* We're entering a sub-message. */
uint32_t tmp;
uint8_t *b = buf;
CHECK(get_v_uint32_t(&b, &tmp));
s->offset += (b-buf); /* advance past length varint. */
wvtov_delimited(tmp, &d->v.delimited, s->offset);
/* Unlike STRING and BYTES, we *don't* advance past delimited here. */
if (unlikely(++s->top == s->limit)) return UPB_ERROR_STACK_OVERFLOW;
s->top->fieldset = d->field->fieldset;
s->top->end_offset = d->v.delimited.offset + d->v.delimited.len;
if (unlikely(s->top->end_offset < s->offset)) return UPB_ERROR_OVERFLOW;
return UPB_STATUS_OK;
}
struct upb_type_info {
upb_wire_type_t expected_wire_type;
upb_status_t (*get)(struct upb_parse_state *s, uint8_t *buf,
struct upb_tagged_value *d);
};
static struct upb_type_info type_info[] = {
{UPB_WIRE_TYPE_64BIT, get_DOUBLE},
{UPB_WIRE_TYPE_32BIT, get_FLOAT},
{UPB_WIRE_TYPE_VARINT, get_INT32},
{UPB_WIRE_TYPE_VARINT, get_INT64},
{UPB_WIRE_TYPE_VARINT, get_UINT32},
{UPB_WIRE_TYPE_VARINT, get_UINT64},
{UPB_WIRE_TYPE_VARINT, get_SINT32},
{UPB_WIRE_TYPE_VARINT, get_SINT64},
{UPB_WIRE_TYPE_32BIT, get_FIXED32},
{UPB_WIRE_TYPE_64BIT, get_FIXED64},
{UPB_WIRE_TYPE_32BIT, get_SFIXED32},
{UPB_WIRE_TYPE_64BIT, get_SFIXED64},
{UPB_WIRE_TYPE_VARINT, get_BOOL},
{UPB_WIRE_TYPE_DELIMITED, get_BYTES},
{UPB_WIRE_TYPE_DELIMITED, get_BYTES},
{UPB_WIRE_TYPE_VARINT, get_ENUM},
{UPB_WIRE_TYPE_DELIMITED, get_MESSAGE}
upb_wire_type_t upb_expected_wire_types[] = {
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE] = UPB_WIRE_TYPE_64BIT,
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT] = UPB_WIRE_TYPE_32BIT,
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64] = UPB_WIRE_TYPE_VARINT,
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64] = UPB_WIRE_TYPE_VARINT,
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32] = UPB_WIRE_TYPE_VARINT,
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64] = UPB_WIRE_TYPE_64BIT,
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32] = UPB_WIRE_TYPE_32BIT,
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL] = UPB_WIRE_TYPE_VARINT,
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING] = UPB_WIRE_TYPE_DELIMITED,
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES] = UPB_WIRE_TYPE_DELIMITED,
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP] = -1, /* TODO */
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE] = UPB_WIRE_TYPE_DELIMITED,
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32] = UPB_WIRE_TYPE_VARINT,
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM] = UPB_WIRE_TYPE_VARINT,
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32] = UPB_WIRE_TYPE_32BIT,
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64] = UPB_WIRE_TYPE_64BIT,
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32] = UPB_WIRE_TYPE_VARINT,
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64] = UPB_WIRE_TYPE_VARINT,
};
upb_status_t parse_tag(uint8_t **buf, struct upb_tag *tag)
@ -249,22 +206,17 @@ upb_status_t parse_wire_value(uint8_t *buf, size_t *offset,
#define READ(expr) CHECK(expr); *offset += (b-buf)
uint8_t *b = buf;
switch(wt) {
case UPB_WIRE_TYPE_VARINT:
READ(get_v_uint64_t(&b, &wv->varint)); break;
case UPB_WIRE_TYPE_64BIT:
READ(get_f_uint64_t(&b, &wv->_64bit)); break;
case UPB_WIRE_TYPE_32BIT:
READ(get_f_uint32_t(&b, &wv->_32bit)); break;
case UPB_WIRE_TYPE_VARINT: READ(get_v_uint64_t(&b, &wv->varint)); break;
case UPB_WIRE_TYPE_64BIT: READ(get_f_uint64_t(&b, &wv->_64bit)); break;
case UPB_WIRE_TYPE_32BIT: READ(get_f_uint32_t(&b, &wv->_32bit)); break;
case UPB_WIRE_TYPE_DELIMITED:
wv->delimited.offset = *offset;
READ(get_v_uint32_t(&b, &wv->delimited.len));
size_t new_offset = *offset + wv->delimited.len;
READ(get_v_uint32_t(&b, &wv->_32bit));
size_t new_offset = *offset + wv->_32bit;
if (new_offset < *offset) return UPB_ERROR_OVERFLOW;
*offset += new_offset;
break;
case UPB_WIRE_TYPE_START_GROUP:
case UPB_WIRE_TYPE_END_GROUP:
return UPB_ERROR_GROUP; /* deprecated, no plans to support. */
case UPB_WIRE_TYPE_END_GROUP: return UPB_ERROR_GROUP; /* TODO */
}
return UPB_STATUS_OK;
}
@ -274,12 +226,9 @@ upb_status_t skip_wire_value(uint8_t *buf, size_t *offset,
{
uint8_t *b = buf;
switch(wt) {
case UPB_WIRE_TYPE_VARINT:
READ(skip_v_uint64_t(&b)); break;
case UPB_WIRE_TYPE_64BIT:
READ(skip_f_uint64_t(&b)); break;
case UPB_WIRE_TYPE_32BIT:
READ(skip_f_uint32_t(&b)); break;
case UPB_WIRE_TYPE_VARINT: READ(skip_v_uint64_t(&b)); break;
case UPB_WIRE_TYPE_64BIT: READ(skip_f_uint64_t(&b)); break;
case UPB_WIRE_TYPE_32BIT: READ(skip_f_uint32_t(&b)); break;
case UPB_WIRE_TYPE_DELIMITED: {
/* Have to get (not skip) the length to skip the bytes. */
uint32_t len;
@ -290,71 +239,27 @@ upb_status_t skip_wire_value(uint8_t *buf, size_t *offset,
break;
}
case UPB_WIRE_TYPE_START_GROUP:
case UPB_WIRE_TYPE_END_GROUP:
return UPB_ERROR_GROUP; /* deprecated, no plans to support. */
case UPB_WIRE_TYPE_END_GROUP: return UPB_ERROR_GROUP; /* TODO */
}
return UPB_STATUS_OK;
#undef READ
}
/* Parses and processes the next value from buf. */
upb_status_t upb_parse_field(struct upb_parse_state *s,
uint8_t *buf,
upb_field_number_t *fieldnum,
struct upb_tagged_value *val,
struct upb_tagged_wire_value *wv)
upb_status_t upb_parse_value(uint8_t **b, upb_field_type_t ft,
union upb_value *v)
{
/* Check for end-of-message at the current stack depth. */
if(unlikely(s->offset >= s->top->end_offset)) {
/* If the end offset isn't an exact field boundary, the pb is corrupt. */
if(unlikely(s->offset != s->top->end_offset))
return UPB_ERROR_BAD_SUBMESSAGE_END;
s->top--;
return UPB_STATUS_SUBMESSAGE_END;
#define CASE(t) \
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## t: return get_ ## t(b, v);
switch(ft) {
CASE(DOUBLE) CASE(FLOAT) CASE(INT64) CASE(UINT64) CASE(INT32) CASE(FIXED64)
CASE(FIXED32) CASE(BOOL) CASE(UINT32) CASE(ENUM) CASE(SFIXED32)
CASE(SFIXED64) CASE(SINT32) CASE(SINT64)
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES:
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING:
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE:
return get_UINT32(b, v);
default: return UPB_ERROR; /* Including GROUP. */
}
struct upb_tag tag;
uint8_t *b = buf;
CHECK(parse_tag(&b, &tag));
s->offset += (b-buf);
struct upb_field *fd = upb_find_field(s->top->fieldset,
tag.field_number);
upb_status_t unknown_value_status;
if(unlikely(!fd)) {
unknown_value_status = UPB_ERROR_UNKNOWN_VALUE;
goto unknown_value;
}
struct upb_type_info *info = &type_info[fd->type];
if(unlikely(tag.wire_type != info->expected_wire_type)) {
unknown_value_status = UPB_ERROR_MISMATCHED_TYPE;
goto unknown_value;
}
*fieldnum = tag.field_number;
val->field = fd;
CHECK(info->get(s, b, val));
return UPB_STATUS_OK;
unknown_value:
wv->type = tag.wire_type;
CHECK(parse_wire_value(buf, &s->offset, tag.wire_type, &wv->v));
return unknown_value_status;
}
void upb_init_parser(
struct upb_parse_state *state,
struct upb_fieldset *toplevel_fieldset)
{
state->offset = 0;
state->top = state->stack;
state->limit = state->top + UPB_MAX_STACK;
state->top->fieldset = toplevel_fieldset;
state->top->end_offset = SIZE_MAX;
}
static int compare_fields(const void *f1, const void *f2)
{
return ((struct upb_field*)f1)->field_number -
((struct upb_field*)f2)->field_number;
#undef CASE
}

@ -18,99 +18,6 @@
extern "C" {
#endif
/* A deserialized value as described in a .proto file. */
struct upb_tagged_value {
struct upb_field *field;
union upb_value v;
};
/* A value as it is encoded on-the-wire, before it has been interpreted as
* any particular .proto type. */
struct upb_tagged_wire_value {
upb_wire_type_t type;
union upb_wire_value v;
};
/* Definition of a single field in a message, for the purposes of the parser's
* fieldmap. Note that this does not include nearly all of the information
* that can be specified about a field in a .proto file. For example, we don't
* even know the field's name. We keep only the information necessary to parse
* the field. */
struct upb_field {
upb_field_number_t field_number;
int32_t type; /* google_protobuf_FieldDescriptorProto_Type */
struct upb_fieldset *fieldset; /* if type == MESSAGE */
};
struct upb_parse_stack_frame {
struct upb_fieldset *fieldset;
size_t end_offset; /* unknown for the top frame, so we set to SIZE_MAX */
};
/* The stream parser's state. */
struct upb_parse_state {
size_t offset;
struct upb_parse_stack_frame stack[UPB_MAX_STACK];
struct upb_parse_stack_frame *top, *limit;
};
/* Call this once before parsing to initialize the data structures.
* message_type can be NULL, in which case all fields will be reported as
* unknown. */
void upb_init_parser(struct upb_parse_state *state,
struct upb_fieldset *toplevel_fieldset);
/* Status as returned by upb_parse(). Status codes <0 are fatal errors
* that cannot be recovered. Status codes >0 are unusual but nonfatal events,
* which nonetheless must be handled differently since they do not return data
* in val. */
typedef enum upb_status {
UPB_STATUS_OK = 0,
UPB_STATUS_SUBMESSAGE_END = 1, // No data is stored in val or wv.
/** FATAL ERRORS: these indicate corruption, and cannot be recovered. */
// A varint did not terminate before hitting 64 bits.
UPB_ERROR_UNTERMINATED_VARINT = -1,
// A submessage ended in the middle of data.
UPB_ERROR_BAD_SUBMESSAGE_END = -2,
// Encountered a "group" on the wire (deprecated and unsupported).
UPB_ERROR_GROUP = -3,
// Input was nested more than UPB_MAX_NESTING deep.
UPB_ERROR_STACK_OVERFLOW = -4,
// The input data caused the pb's offset (a size_t) to overflow.
UPB_ERROR_OVERFLOW = -5,
/** NONFATAL ERRORS: the input was invalid, but we can continue if desired. */
// A value was encountered that was not defined in the .proto file. The
// unknown value is stored in wv.
UPB_ERROR_UNKNOWN_VALUE = 2,
// A field was encoded with the wrong wire type. The wire value is stored in
// wv.
UPB_ERROR_MISMATCHED_TYPE = 3,
} upb_status_t;
struct upb_parse_state;
/* The main parsing function. Parses the next value from buf, storing the
* parsed value in val. If val is of type UPB_TYPE_MESSAGE, then a
* submessage was entered.
*
* IMPORTANT NOTE: for efficiency, the parsing routines do not do bounds checks,
* and may read as much as far as buf+10. So the caller must ensure that buf is
* not within 10 bytes of unmapped memory, or the program will segfault. Clients
* are encouraged to overallocate their buffers by ten bytes to compensate. */
upb_status_t upb_parse_field(struct upb_parse_state *s,
uint8_t *buf,
upb_field_number_t *fieldnum,
struct upb_tagged_value *val,
struct upb_tagged_wire_value *wv);
/* Low-level parsing functions. ***********************************************/
/* Parses a single tag from the character data starting at buf, and updates
@ -118,6 +25,19 @@ upb_status_t upb_parse_field(struct upb_parse_state *s,
* by at most ten bytes. */
upb_status_t parse_tag(uint8_t **buf, struct upb_tag *tag);
extern upb_wire_type_t upb_expected_wire_types[];
/* Returns true if wt is the correct on-the-wire type for ft. */
inline bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) {
return upb_expected_wire_types[ft] == wt;
}
/* Parses and converts a value from the character data starting at buf. The
* caller must have previously checked that the wire type is appropriate for
* this field type. For delimited data, buf is advanced to the beginning of
* the delimited data, not the end. */
upb_status_t upb_parse_value(uint8_t **buf, upb_field_type_t ft,
union upb_value *value);
/* Parses a wire value with the given type (which must have been obtained from
* a tag that was just parsed) and adds the number of bytes that were consumed
* to *offset. For delimited types, offset is advanced past the delimited
@ -127,8 +47,8 @@ upb_status_t upb_parse_wire_value(uint8_t *buf, size_t *offset,
union upb_wire_value *wv);
/* Like the above, but discards the wire value instead of saving it. */
upb_status_t skip_wire_value(uint8_t *buf, size_t *offset,
upb_wire_type_t wt);
upb_status_t upb_skip_wire_value(uint8_t *buf, size_t *offset,
upb_wire_type_t wt);
#ifdef __cplusplus
} /* extern "C" */

@ -42,3 +42,4 @@ extern bool upb_struct_is_set(uint8_t *s, struct upb_struct_field *f);
extern bool upb_struct_all_required_fields_set(
uint8_t *s, struct upb_struct_definition *d);
extern void upb_struct_clear(uint8_t *s, struct upb_struct_definition *d);

@ -57,8 +57,8 @@ struct upb_struct_definition {
};
/* While these are written to be as fast as possible, it will still be faster
* cache the results of this lookup if possible. These return NULL if no such
* field is found. */
* to cache the results of this lookup if possible. These return NULL if no
* such field is found. */
struct upb_struct_field *upb_struct_find_field_by_name(
struct upb_struct_definition *d, char *name);
struct upb_struct_field *upb_struct_find_field_by_number(

@ -0,0 +1,89 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*/
#include "upb_table.h"
#include <stdlib.h>
static int compare_entries(const void *f1, const void *f2)
{
return ((struct upb_inttable_entry*)f1)->key -
((struct upb_inttable_entry*)f2)->key;
}
static uint32_t max(uint32_t a, uint32_t b) { return a > b ? a : b; }
static uint32_t round_up_to_pow2(uint32_t v)
{
/* cf. Bit Twiddling Hacks:
* http://www-graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */
v--;
v |= v >> 1; v |= v >> 2; v |= v >> 4; v |= v >> 8; v |= v >> 16;
v++;
return v;
}
void upb_inttable_init(struct upb_inttable *table, void *entries,
int num_entries, int entry_size)
{
qsort(entries, num_entries, entry_size, compare_entries);
/* Find the largest n for which at least half the keys <n are used. We
* make sure our table size is at least n. This allows all keys <n to be
* in their main position (as if it were an array) and only numbers >n might
* possibly have collisions. Start at 8 to avoid noise of small numbers. */
upb_inttable_key_t n = 0, maybe_n;
bool all_in_array = true;
for(int i = 0; i < num_entries; i++) {
struct upb_inttable_entry *e =
upb_inttable_entry_get(entries, i, entry_size);
maybe_n = e->key;
if(maybe_n > 8 && maybe_n/(i+1) >= 2) {
all_in_array = false;
break;
}
n = maybe_n;
}
/* TODO: measure, tweak, optimize this choice of table size. Possibly test
* (at runtime) maximum chain length for each proposed size. */
uint32_t min_size_by_load = all_in_array ? n : (double)num_entries / 0.85;
uint32_t min_size = max(n, min_size_by_load);
table->size = round_up_to_pow2(min_size);
table->entries = malloc(table->size * entry_size);
/* Initialize to empty. */
for(size_t i = 0; i < table->size; i++) {
struct upb_inttable_entry *e = upb_inttable_get(table, i, entry_size);
e->key = UPB_END_OF_CHAIN;
e->next = UPB_END_OF_CHAIN;
}
/* Insert the elements. */
for(int i = 0; i < num_entries; i++) {
struct upb_inttable_entry *e =
upb_inttable_entry_get(entries, i, entry_size);
int32_t hash = upb_inttable_hash(table, e->key);
struct upb_inttable_entry *table_e =
upb_inttable_get(table, hash, entry_size);
if(table_e->key != UPB_END_OF_CHAIN) { /* Collision. */
if(hash == upb_inttable_hash(table, table_e->key)) {
/* Existing element is in its main posisiton. Find an empty slot to
* place our new element. */
} else {
/* Existing element is not in its main position. Move it to an empty
* slot and put our element in its main position. */
}
}
}
}
void upb_inttable_free(struct upb_inttable *table)
{
free(table->entries);
}
/* Emit definition for inline functions. */

@ -0,0 +1,75 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*/
#ifndef UPB_TABLE_H_
#define UPB_TABLE_H_
#include "upb.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef int32_t upb_inttable_key_t;
#define UPB_END_OF_CHAIN (upb_inttable_key_t)-1
struct upb_inttable_entry {
upb_inttable_key_t key;
int32_t next;
};
struct upb_inttable {
uint32_t size; /* Is a power of two. */
void *entries;
};
/* Builds an int32_t -> <entry> table, optimized for very fast lookup by
* number. table is a pointer to a previously allocated upb_inttable.
* entries points to an array of the desired entries themselves, each of size
* entry_size. The table is allocated in dynamic memory, and does not reference
* the data in entries. Entries may be modified by the function.
*
* The table must be freed with upb_inttable_free. */
void upb_inttable_init(struct upb_inttable *table, void *entries,
int num_entries, int entry_size);
inline int32_t upb_inttable_hash(struct upb_inttable * table,
upb_inttable_key_t key) {
return key & (table->size-1);
}
/* Frees any data that was allocated by upb_inttable_init. */
void upb_inttable_free(struct upb_inttable *table);
inline struct upb_inttable_entry *upb_inttable_entry_get(
void *entries, int32_t pos, int entry_size) {
return (struct upb_inttable_entry*)((char*)entries) + pos*entry_size;
}
inline struct upb_inttable_entry *upb_inttable_get(
struct upb_inttable *table, int32_t pos, int entry_size) {
return upb_inttable_entry_get(table->entries, pos, entry_size);
}
/* Lookups up key in this table. Inlined because this is in the critical path
* of parsing. */
inline void *upb_inttable_lookup(struct upb_inttable *table, int32_t key,
int entry_size) {
int32_t pos = upb_inttable_hash(table, key);
do {
struct upb_inttable_entry *e = upb_inttable_get(table, pos, entry_size);
if (key == e->key) return e;
pos = e->next;
} while (pos != UPB_END_OF_CHAIN);
return NULL; /* Not found. */
}
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* UPB_TABLE_H_ */
Loading…
Cancel
Save