More refactoring of structures.

pbstream_internal.h is now where even lower-level parsing
functions go.
pull/13171/head
Joshua Haberman 16 years ago
parent 73ab650ad1
commit c8d67b2686
  1. 7
      Makefile
  2. 107
      pbstream.c
  3. 44
      pbstream.h
  4. 1
      pbstruct.c
  5. 4
      tests.c

@ -1,11 +1,14 @@
.PHONY: all clean .PHONY: all clean
all: pbstream.o tests all: pbstream.o pbstruct.o tests
clean: clean:
rm -f pbstream.o tests rm -f pbstream.o pbstruct.o tests
pbstream.o: pbstream.c pbstream.h pbstream.o: pbstream.c pbstream.h
gcc -std=c99 -O3 -Wall -o pbstream.o -c pbstream.c gcc -std=c99 -O3 -Wall -o pbstream.o -c pbstream.c
pbstruct.o: pbstruct.c pbstruct.h
gcc -std=c99 -O3 -Wall -o pbstruct.o -c pbstruct.c
tests: tests.c pbstream.c pbstream.h tests: tests.c pbstream.c pbstream.h
gcc -std=c99 -O3 -Wall -o tests tests.c gcc -std=c99 -O3 -Wall -o tests tests.c

@ -8,6 +8,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include "pbstream.h" #include "pbstream.h"
#include "pbstream_lowlevel.h"
/* Branch prediction hints for GCC. */ /* Branch prediction hints for GCC. */
#ifdef __GNUC__ #ifdef __GNUC__
@ -107,26 +108,35 @@ done:
static pbstream_status_t get_f_uint32_t(char **buf, uint32_t *val) static pbstream_status_t get_f_uint32_t(char **buf, uint32_t *val)
{ {
uint8_t *b = (uint8_t*)*buf; char *b = *buf;
#if __BYTE_ORDER == __LITTLE_ENDIAN #define SHL(val, bits) ((uint32_t)val << bits)
*val = *(uint32_t*)b; /* likely unaligned, TODO: verify performance. */ *val = SHL(b[0], 0) | SHL(b[1], 8) | SHL(b[2], 16) | SHL(b[3], 24);
#else #undef SHL
*val = b[0] | (b[1] << 8) | (b[2] << 16) | (b[3] << 24); *buf += sizeof(uint32_t);
#endif return PBSTREAM_STATUS_OK;
*buf = (char*)b + sizeof(uint32_t); }
static pbstream_status_t skip_f_uint32_t(char **buf)
{
*buf += sizeof(uint32_t);
return PBSTREAM_STATUS_OK; return PBSTREAM_STATUS_OK;
} }
static pbstream_status_t get_f_uint64_t(char **buf, uint64_t *val) static pbstream_status_t get_f_uint64_t(char **buf, uint64_t *val)
{ {
uint8_t *b = (uint8_t*)*buf; char *b = *buf;
#if __BYTE_ORDER == __LITTLE_ENDIAN /* TODO: is this worth 32/64 specializing? */
*val = *(uint64_t*)buf; /* likely unaligned, TODO: verify performance. */ #define SHL(val, bits) ((uint64_t)val << bits)
#else *val = SHL(b[0], 0) | SHL(b[1], 8) | SHL(b[2], 16) | SHL(b[3], 24) |
*val = (b[0]) | (b[1] << 8 ) | (b[2] << 16) | (b[3] << 24) | SHL(b[4], 32) | SHL(b[5], 40) | SHL(b[6], 48) | SHL(b[7], 56);
(b[4] << 32) | (b[5] << 40) | (b[6] << 48) | (b[7] << 56); #undef SHL
#endif *buf += sizeof(uint64_t);
*buf = (char*)b + sizeof(uint64_t); return PBSTREAM_STATUS_OK;
}
static pbstream_status_t skip_f_uint64_t(char **buf)
{
*buf += sizeof(uint64_t);
return PBSTREAM_STATUS_OK; return PBSTREAM_STATUS_OK;
} }
@ -148,7 +158,7 @@ static int64_t zz_decode_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
#define GET(type, v_or_f, wire_t, val_t, member_name) \ #define GET(type, v_or_f, wire_t, val_t, member_name) \
static pbstream_status_t get_ ## type(struct pbstream_parse_state *s, \ static pbstream_status_t get_ ## type(struct pbstream_parse_state *s, \
char *buf, \ char *buf, \
struct pbstream_value *d) { \ struct pbstream_tagged_value *d) { \
wire_t tmp; \ wire_t tmp; \
char *b = buf; \ char *b = buf; \
CHECK(get_ ## v_or_f ## _ ## wire_t(&b, &tmp)); \ CHECK(get_ ## v_or_f ## _ ## wire_t(&b, &tmp)); \
@ -175,7 +185,7 @@ T(FIXED64, f, uint64_t, uint64_t, uint64) { *d = s; }
T(SFIXED32, f, uint32_t, int32_t, int32) { *d = (int32_t)s; } T(SFIXED32, f, uint32_t, int32_t, int32) { *d = (int32_t)s; }
T(SFIXED64, f, uint64_t, int64_t, int64) { *d = (int64_t)s; } T(SFIXED64, f, uint64_t, int64_t, int64) { *d = (int64_t)s; }
T(BOOL, v, uint32_t, bool, _bool) { *d = (bool)s; } T(BOOL, v, uint32_t, bool, _bool) { *d = (bool)s; }
T(ENUM, v, uint32_t, int32_t, _enum) { *d = (int32_t)s; } T(ENUM, v, uint32_t, int32_t, int32) { *d = (int32_t)s; }
#undef WVTOV #undef WVTOV
#undef GET #undef GET
#undef T #undef T
@ -188,7 +198,7 @@ static void wvtov_delimited(uint32_t s, struct pbstream_delimited *d, size_t o)
/* Use BYTES version for both STRING and BYTES, leave UTF-8 checks to client. */ /* Use BYTES version for both STRING and BYTES, leave UTF-8 checks to client. */
static pbstream_status_t get_BYTES(struct pbstream_parse_state *s, char *buf, static pbstream_status_t get_BYTES(struct pbstream_parse_state *s, char *buf,
struct pbstream_value *d) { struct pbstream_tagged_value *d) {
uint32_t tmp; uint32_t tmp;
char *b = buf; char *b = buf;
CHECK(get_v_uint32_t(&b, &tmp)); CHECK(get_v_uint32_t(&b, &tmp));
@ -199,7 +209,7 @@ static pbstream_status_t get_BYTES(struct pbstream_parse_state *s, char *buf,
} }
static pbstream_status_t get_MESSAGE(struct pbstream_parse_state *s, char *buf, static pbstream_status_t get_MESSAGE(struct pbstream_parse_state *s, char *buf,
struct pbstream_value *d) { struct pbstream_tagged_value *d) {
/* We're entering a sub-message. */ /* We're entering a sub-message. */
uint32_t tmp; uint32_t tmp;
char *b = buf; char *b = buf;
@ -216,7 +226,7 @@ static pbstream_status_t get_MESSAGE(struct pbstream_parse_state *s, char *buf,
struct pbstream_type_info { struct pbstream_type_info {
pbstream_wire_type_t expected_wire_type; pbstream_wire_type_t expected_wire_type;
pbstream_status_t (*get)(struct pbstream_parse_state *s, char *buf, pbstream_status_t (*get)(struct pbstream_parse_state *s, char *buf,
struct pbstream_value *d); struct pbstream_tagged_value *d);
}; };
static struct pbstream_type_info type_info[] = { static struct pbstream_type_info type_info[] = {
{PBSTREAM_WIRE_TYPE_64BIT, get_DOUBLE}, {PBSTREAM_WIRE_TYPE_64BIT, get_DOUBLE},
@ -238,7 +248,7 @@ static struct pbstream_type_info type_info[] = {
{PBSTREAM_WIRE_TYPE_DELIMITED, get_MESSAGE} {PBSTREAM_WIRE_TYPE_DELIMITED, get_MESSAGE}
}; };
static pbstream_status_t parse_tag(char **buf, struct pbstream_tag *tag) pbstream_status_t parse_tag(char **buf, struct pbstream_tag *tag)
{ {
uint32_t tag_int; uint32_t tag_int;
CHECK(get_v_uint32_t(buf, &tag_int)); CHECK(get_v_uint32_t(buf, &tag_int));
@ -247,19 +257,21 @@ static pbstream_status_t parse_tag(char **buf, struct pbstream_tag *tag)
return PBSTREAM_STATUS_OK; return PBSTREAM_STATUS_OK;
} }
static pbstream_status_t parse_unknown_value( pbstream_status_t parse_wire_value(char **buf, size_t offset,
char **buf, int buf_offset, struct pbstream_wire_value *wv) pbstream_wire_type_t wt,
union pbstream_wire_value *wv)
{ {
switch(wv->type) { switch(wt) {
case PBSTREAM_WIRE_TYPE_VARINT: case PBSTREAM_WIRE_TYPE_VARINT:
CHECK(get_v_uint64_t(buf, &wv->v.varint)); break; CHECK(get_v_uint64_t(buf, &wv->varint)); break;
case PBSTREAM_WIRE_TYPE_64BIT: case PBSTREAM_WIRE_TYPE_64BIT:
CHECK(get_f_uint64_t(buf, &wv->v._64bit)); break; CHECK(get_f_uint64_t(buf, &wv->_64bit)); break;
case PBSTREAM_WIRE_TYPE_32BIT: case PBSTREAM_WIRE_TYPE_32BIT:
CHECK(get_f_uint32_t(buf, &wv->v._32bit)); break; CHECK(get_f_uint32_t(buf, &wv->_32bit)); break;
case PBSTREAM_WIRE_TYPE_DELIMITED: case PBSTREAM_WIRE_TYPE_DELIMITED:
wv->v.delimited.offset = buf_offset; wv->delimited.offset = offset;
CHECK(get_v_uint32_t(buf, &wv->v.delimited.len)); CHECK(get_v_uint32_t(buf, &wv->delimited.len));
*buf += wv->delimited.len;
break; break;
case PBSTREAM_WIRE_TYPE_START_GROUP: case PBSTREAM_WIRE_TYPE_START_GROUP:
case PBSTREAM_WIRE_TYPE_END_GROUP: case PBSTREAM_WIRE_TYPE_END_GROUP:
@ -268,7 +280,30 @@ static pbstream_status_t parse_unknown_value(
return PBSTREAM_STATUS_OK; return PBSTREAM_STATUS_OK;
} }
static struct pbstream_field *find_field(struct pbstream_fieldset* fs, pbstream_status_t skip_wire_value(char **buf, pbstream_wire_type_t wt)
{
switch(wt) {
case PBSTREAM_WIRE_TYPE_VARINT:
CHECK(skip_v_uint64_t(buf)); break;
case PBSTREAM_WIRE_TYPE_64BIT:
CHECK(skip_f_uint64_t(buf)); break;
case PBSTREAM_WIRE_TYPE_32BIT:
CHECK(skip_f_uint32_t(buf)); break;
case PBSTREAM_WIRE_TYPE_DELIMITED: {
/* Have to get (not skip) the length to skip the bytes. */
uint32_t len;
CHECK(get_v_uint32_t(buf, &len));
*buf += len;
break;
}
case PBSTREAM_WIRE_TYPE_START_GROUP:
case PBSTREAM_WIRE_TYPE_END_GROUP:
return PBSTREAM_ERROR_GROUP; /* deprecated, no plans to support. */
}
return PBSTREAM_STATUS_OK;
}
struct pbstream_field *pbstream_find_field(struct pbstream_fieldset* fs,
pbstream_field_number_t num) pbstream_field_number_t num)
{ {
/* TODO: the hashtable part. */ /* TODO: the hashtable part. */
@ -279,10 +314,9 @@ static struct pbstream_field *find_field(struct pbstream_fieldset* fs,
pbstream_status_t pbstream_parse_field(struct pbstream_parse_state *s, pbstream_status_t pbstream_parse_field(struct pbstream_parse_state *s,
char *buf, char *buf,
pbstream_field_number_t *fieldnum, pbstream_field_number_t *fieldnum,
struct pbstream_value *val, struct pbstream_tagged_value *val,
struct pbstream_wire_value *wv) struct pbstream_tagged_wire_value *wv)
{ {
char *b = buf;
/* Check for end-of-message at the current stack depth. */ /* Check for end-of-message at the current stack depth. */
if(unlikely(s->offset >= s->top->end_offset)) { if(unlikely(s->offset >= s->top->end_offset)) {
/* If the end offset isn't an exact field boundary, the pb is corrupt. */ /* If the end offset isn't an exact field boundary, the pb is corrupt. */
@ -293,9 +327,11 @@ pbstream_status_t pbstream_parse_field(struct pbstream_parse_state *s,
} }
struct pbstream_tag tag; struct pbstream_tag tag;
char *b = buf;
CHECK(parse_tag(&b, &tag)); CHECK(parse_tag(&b, &tag));
s->offset += (b-buf); s->offset += (b-buf);
struct pbstream_field *fd = find_field(s->top->fieldset, tag.field_number); struct pbstream_field *fd = pbstream_find_field(s->top->fieldset,
tag.field_number);
pbstream_status_t unknown_value_status; pbstream_status_t unknown_value_status;
if(unlikely(!fd)) { if(unlikely(!fd)) {
unknown_value_status = PBSTREAM_ERROR_UNKNOWN_VALUE; unknown_value_status = PBSTREAM_ERROR_UNKNOWN_VALUE;
@ -314,7 +350,8 @@ pbstream_status_t pbstream_parse_field(struct pbstream_parse_state *s,
unknown_value: unknown_value:
wv->type = tag.wire_type; wv->type = tag.wire_type;
CHECK(parse_unknown_value(&b, s->offset, wv)); b = buf;
CHECK(parse_wire_value(&b, s->offset, tag.wire_type, &wv->v));
s->offset += (b-buf); s->offset += (b-buf);
return unknown_value_status; return unknown_value_status;
} }

@ -4,9 +4,16 @@
* Copyright (c) 2008 Joshua Haberman. See LICENSE for details. * Copyright (c) 2008 Joshua Haberman. See LICENSE for details.
*/ */
#ifndef PBSTREAM_H_
#define PBSTREAM_H_
#include <stdint.h> #include <stdint.h>
#include <stdbool.h> #include <stdbool.h>
#ifdef __cplusplus
extern "C" {
#endif
/* The maximum that any submessages can be nested. Matches proto2's limit. */ /* The maximum that any submessages can be nested. Matches proto2's limit. */
#define PBSTREAM_MAX_STACK 64 #define PBSTREAM_MAX_STACK 64
@ -44,9 +51,9 @@ typedef enum pbstream_wire_type {
typedef int32_t pbstream_field_number_t; typedef int32_t pbstream_field_number_t;
/* A deserialized value as described in a .proto file. */ /* A deserialized value as described in a .proto file. */
struct pbstream_value { struct pbstream_tagged_value {
struct pbstream_field *field; struct pbstream_field *field;
union { union pbstream_value {
double _double; double _double;
float _float; float _float;
int32_t int32; int32_t int32;
@ -58,20 +65,14 @@ struct pbstream_value {
size_t offset; /* relative to the beginning of the stream. */ size_t offset; /* relative to the beginning of the stream. */
uint32_t len; uint32_t len;
} delimited; } delimited;
int32_t _enum;
} v; } v;
}; };
/* A tag occurs before each value on-the-wire. */ /* A value as it is encoded on-the-wire, before it has been interpreted as
struct pbstream_tag { * any particular .proto type. */
pbstream_field_number_t field_number; struct pbstream_tagged_wire_value {
pbstream_wire_type_t wire_type;
};
/* A value as it is encoded on-the-wire */
struct pbstream_wire_value {
pbstream_wire_type_t type; pbstream_wire_type_t type;
union { union pbstream_wire_value {
uint64_t varint; uint64_t varint;
uint64_t _64bit; uint64_t _64bit;
struct { struct {
@ -82,14 +83,19 @@ struct pbstream_wire_value {
} v; } v;
}; };
/* Definition of a single field in a message. */ /* Definition of a single field in a message. Note that this does not include
* nearly all of the information that can be specified about a field in a
* .proto file. For example, we don't even know the field's name. We keep
* only the information necessary to parse the field. */
struct pbstream_field { struct pbstream_field {
pbstream_field_number_t field_number; pbstream_field_number_t field_number;
pbstream_type_t type; pbstream_type_t type;
struct pbstream_fieldset *fieldset; /* if type == MESSAGE */ struct pbstream_fieldset *fieldset; /* if type == MESSAGE */
}; };
/* The set of fields corresponding to a message definition. */ /* A fieldset is a data structure that supports fast lookup of fields by number.
* It is logically a map of {field_number -> struct pbstream_field*}. Fast
* lookup is important, because it is in the critical path of parsing. */
struct pbstream_fieldset { struct pbstream_fieldset {
int num_fields; int num_fields;
struct pbstream_field *fields; struct pbstream_field *fields;
@ -170,5 +176,11 @@ struct pbstream_parse_state;
pbstream_status_t pbstream_parse_field(struct pbstream_parse_state *s, pbstream_status_t pbstream_parse_field(struct pbstream_parse_state *s,
char *buf, char *buf,
pbstream_field_number_t *fieldnum, pbstream_field_number_t *fieldnum,
struct pbstream_value *val, struct pbstream_tagged_value *val,
struct pbstream_wire_value *wv); struct pbstream_tagged_wire_value *wv);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* PBSTREAM_H_ */

@ -8,4 +8,5 @@
#include "pbstruct.h" #include "pbstruct.h"
#define alignof(t) offsetof(struct { char c; t x; }, x) #define alignof(t) offsetof(struct { char c; t x; }, x)
#define ALIGN_UP(p, t) (alignof(t) + ((p - 1) & ~(alignof(t) - 1)))

@ -60,8 +60,8 @@ void test_simple_proto()
pbstream_init_parser(&s, &fieldset1); pbstream_init_parser(&s, &fieldset1);
assert(s.offset == 0); assert(s.offset == 0);
pbstream_field_number_t fieldnum; pbstream_field_number_t fieldnum;
struct pbstream_value val; struct pbstream_tagged_value val;
struct pbstream_wire_value wv; struct pbstream_tagged_wire_value wv;
assert(pbstream_parse_field(&s, message1, &fieldnum, &val, &wv) == assert(pbstream_parse_field(&s, message1, &fieldnum, &val, &wv) ==
PBSTREAM_STATUS_OK); PBSTREAM_STATUS_OK);
assert(val.field->field_number == 1); assert(val.field->field_number == 1);

Loading…
Cancel
Save