commit
d29a54fc97
5 changed files with 726 additions and 0 deletions
@ -0,0 +1,8 @@ |
||||
|
||||
.PHONY: all clean |
||||
all: pbstream.o |
||||
clean: |
||||
rm -f pbstream.o
|
||||
|
||||
pbstream.o: pbstream.c |
||||
gcc -std=c99 -O3 -Wall -o pbstream.o -c pbstream.c
|
@ -0,0 +1,32 @@ |
||||
|
||||
#define MAX(a, b) ((a) > (b) ? (a) : (b)) |
||||
#define MIN(a, b) ((a) < (b) ? (a) : (b)) |
||||
|
||||
#define DEFINE_DYNARRAY(name, type) \ |
||||
type *name; \
|
||||
int name ## _len; \
|
||||
int name ## _size; |
||||
|
||||
#define RESIZE_DYNARRAY(name, desired_len) { \ |
||||
int orig_size = name ## _size; \
|
||||
while(name ## _size < (desired_len)) \
|
||||
name ## _size *= 2; \
|
||||
/* don't bother shrinking for now. when/if we do, we'll want to bake in \
|
||||
* some kind of hysteresis so that we don't shrink until we've been under \
|
||||
* for a while. */ \
|
||||
if(name ## _size != orig_size) \
|
||||
name = realloc(name, name ## _size * sizeof(*name)); \
|
||||
name ## _len = desired_len; \
|
||||
} |
||||
|
||||
#define INIT_DYNARRAY(name, initial_len, initial_size) \ |
||||
name ## _len = initial_len; \
|
||||
name ## _size = initial_size; \
|
||||
name = realloc(NULL, name ## _size * sizeof(*name)) |
||||
|
||||
#define FREE_DYNARRAY(name) \ |
||||
free(name); |
||||
|
||||
#define DYNARRAY_GET_TOP(name) \ |
||||
(&name[name ## _len - 1]) |
||||
|
@ -0,0 +1,453 @@ |
||||
/*
|
||||
* pbstream - a stream-oriented implementation of protocol buffers. |
||||
* |
||||
* Copyright (c) 2008 Joshua Haberman. See LICENSE for details. |
||||
*/ |
||||
|
||||
#include <limits.h> |
||||
#include <string.h> |
||||
#include "pbstream.h" |
||||
|
||||
/* Branch prediction hints for GCC. */ |
||||
#ifdef __GNUC__ |
||||
#define likely(x) __builtin_expect((x),1) |
||||
#define unlikely(x) __builtin_expect((x),0) |
||||
#else |
||||
#define likely(x) |
||||
#define unlikely(x) |
||||
#endif |
||||
|
||||
/* An array, indexed by pbstream_type, that indicates what wire type is
|
||||
* expected for the given pbstream type. */ |
||||
static enum pbstream_wire_type expected_wire_type[] = { |
||||
PBSTREAM_WIRE_TYPE_64BIT, // PBSTREAM_TYPE_DOUBLE,
|
||||
PBSTREAM_WIRE_TYPE_32BIT, // PBSTREAM_TYPE_FLOAT,
|
||||
PBSTREAM_WIRE_TYPE_VARINT, // PBSTREAM_TYPE_INT32,
|
||||
PBSTREAM_WIRE_TYPE_VARINT, // PBSTREAM_TYPE_INT64,
|
||||
PBSTREAM_WIRE_TYPE_VARINT, // PBSTREAM_TYPE_UINT32,
|
||||
PBSTREAM_WIRE_TYPE_VARINT, // PBSTREAM_TYPE_UINT64,
|
||||
PBSTREAM_WIRE_TYPE_VARINT, // PBSTREAM_TYPE_SINT32,
|
||||
PBSTREAM_WIRE_TYPE_VARINT, // PBSTREAM_TYPE_SINT64,
|
||||
PBSTREAM_WIRE_TYPE_32BIT, // PBSTREAM_TYPE_FIXED32,
|
||||
PBSTREAM_WIRE_TYPE_64BIT, // PBSTREAM_TYPE_FIXED64,
|
||||
PBSTREAM_WIRE_TYPE_32BIT, // PBSTREAM_TYPE_SFIXED32,
|
||||
PBSTREAM_WIRE_TYPE_64BIT, // PBSTREAM_TYPE_SFIXED64,
|
||||
PBSTREAM_WIRE_TYPE_VARINT, // PBSTREAM_TYPE_BOOL,
|
||||
PBSTREAM_WIRE_TYPE_STRING, // PBSTREAM_TYPE_STRING,
|
||||
PBSTREAM_WIRE_TYPE_STRING, // PBSTREAM_TYPE_BYTES,
|
||||
PBSTREAM_WIRE_TYPE_VARINT, // PBSTREAM_TYPE_ENUM,
|
||||
PBSTREAM_WIRE_TYPE_STRING, // PBSTREAM_TYPE_MESSAGE
|
||||
}; |
||||
|
||||
/* Reads a varint starting at buf (but not past end), storing the result
|
||||
* in out_value. Returns whether the operation was successful. */ |
||||
enum pbstream_status get_varint(char **buf, char *end, uint64_t *out_value) |
||||
{ |
||||
*out_value = 0; |
||||
int bitpos = 0; |
||||
char *b = *buf; |
||||
|
||||
/* Because we don't check for buffer overrun inside the loop, we require
|
||||
* that callers use a buffer that is overallocated by at least 9 bytes |
||||
* (the maximum we can overrun before the bitpos check catches the problem). */ |
||||
for(; *b & 0x80 && bitpos < 64; bitpos += 7, b++) |
||||
*out_value |= (*b & 0x7F) << bitpos; |
||||
|
||||
if(unlikely(bitpos >= 64)) { |
||||
return PBSTREAM_ERROR_UNTERMINATED_VARINT; |
||||
} |
||||
if(unlikely(b > end)) { |
||||
return PBSTREAM_STATUS_INCOMPLETE; |
||||
} |
||||
|
||||
*out_value |= (*b & 0x7F) << bitpos; |
||||
*buf = b; |
||||
return PBSTREAM_STATUS_OK; |
||||
} |
||||
|
||||
/* TODO: the little-endian versions of these functions don't respect alignment.
|
||||
* While it's hard to believe that this could be less efficient than the |
||||
* alternative (the big endian implementation), this deserves some tests and |
||||
* measurements to be sure. */ |
||||
enum pbstream_status get_32_le(char **buf, char *end, uint32_t *out_value) |
||||
{ |
||||
char *b = *buf; |
||||
char *int32_end = b+4; |
||||
if(unlikely(int32_end > end)) |
||||
return PBSTREAM_STATUS_INCOMPLETE; |
||||
|
||||
#if __BYTE_ORDER == __LITTLE_ENDIAN |
||||
*out_value = *(uint32_t*)b; |
||||
#else |
||||
*out_value = b[0] | (b[1] << 8) | (b[2] << 16) | (b[3] << 24); |
||||
#endif |
||||
*buf = int32_end; |
||||
return PBSTREAM_STATUS_OK; |
||||
} |
||||
|
||||
bool get_64_le(char **buf, char *end, uint64_t *out_value) |
||||
{ |
||||
char *b = *buf; |
||||
char *int64_end = b+8; |
||||
if(unlikely(int64_end > end)) |
||||
return PBSTREAM_STATUS_INCOMPLETE; |
||||
|
||||
#if __BYTE_ORDER == __LITTLE_ENDIAN |
||||
*out_value = *(uint64_t*)buf; |
||||
#else |
||||
*out_value = (b[0]) | (b[1] << 8 ) | (b[2] << 16) | (b[3] << 24) | |
||||
(b[4] << 32) | (b[5] << 40) | (b[6] << 48) | (b[7] << 56); |
||||
#endif |
||||
*buf = int64_end; |
||||
return PBSTREAM_STATUS_OK; |
||||
} |
||||
|
||||
int32_t zigzag_decode_32(uint64_t src) |
||||
{ |
||||
return 0; /* TODO */ |
||||
} |
||||
|
||||
int64_t zigzag_decode_64(uint64_t src) |
||||
{ |
||||
return 0; /* TODO */ |
||||
} |
||||
|
||||
/* Parses the next field-number/wire-value pair from the stream of bytes
|
||||
* starting at *buf, without reading past end. Stores the parsed and wire |
||||
* value in *field_number and *wire_value, respectively. |
||||
* |
||||
* Returns a status indicating whether the operation was successful. If the |
||||
* return status is STATUS_INCOMPLETE, returns the number of additional bytes |
||||
* requred in *need_more_bytes. Updates *buf to point past the end of the |
||||
* parsed data if the operation was successful. |
||||
*/ |
||||
enum pbstream_status pbstream_parse_wire_value(char **buf, char *end, |
||||
pbstream_field_number_t *field_number, |
||||
struct pbstream_wire_value *wire_value, |
||||
int *need_more_bytes) |
||||
{ |
||||
char *b = *buf; /* Our local buf pointer -- only update buf if we succeed. */ |
||||
|
||||
#define DECODE(dest, func) \ |
||||
do { \
|
||||
enum pbstream_status status = func(&b, end, &dest); \
|
||||
if(unlikely(status != PBSTREAM_STATUS_OK)) { \
|
||||
*need_more_bytes = 0; /* This only arises below in this function. */ \
|
||||
return status; \
|
||||
} \
|
||||
} while (0) |
||||
|
||||
uint64_t key; |
||||
DECODE(key, get_varint); |
||||
|
||||
*field_number = key >> 3; |
||||
wire_value->type = key & 0x07; |
||||
|
||||
switch(wire_value->type) |
||||
{ |
||||
case PBSTREAM_WIRE_TYPE_VARINT: |
||||
DECODE(wire_value->v.varint, get_varint); |
||||
break; |
||||
|
||||
case PBSTREAM_WIRE_TYPE_64BIT: |
||||
DECODE(wire_value->v._64bit, get_64_le); |
||||
break; |
||||
|
||||
case PBSTREAM_WIRE_TYPE_STRING: |
||||
{ |
||||
uint64_t string_len; |
||||
DECODE(string_len, get_varint); |
||||
if (string_len > INT_MAX) { |
||||
/* TODO: notice this and fail. */ |
||||
} |
||||
wire_value->v.string.len = (int)string_len; |
||||
if(b + wire_value->v.string.len > end) { |
||||
*need_more_bytes = b + wire_value->v.string.len - end; |
||||
return PBSTREAM_STATUS_INCOMPLETE; |
||||
} |
||||
wire_value->v.string.data = b; |
||||
b += wire_value->v.string.len; |
||||
break; |
||||
} |
||||
|
||||
case PBSTREAM_WIRE_TYPE_START_GROUP: |
||||
case PBSTREAM_WIRE_TYPE_END_GROUP: |
||||
/* TODO (though these are deprecated, so not high priority). */ |
||||
break; |
||||
|
||||
case PBSTREAM_WIRE_TYPE_32BIT: |
||||
DECODE(wire_value->v._32bit, get_32_le); |
||||
break; |
||||
} |
||||
|
||||
*buf = b; |
||||
return true; |
||||
} |
||||
|
||||
/* Translates from a wire value to a .proto value. The caller should have
|
||||
* already checked that the wire_value is of the correct type. The pbstream |
||||
* type must not be PBSTREAM_TYPE_MESSAGE. This operation always succeeds. */ |
||||
void pbstream_translate_field(struct pbstream_wire_value *wire_value, |
||||
enum pbstream_type type, |
||||
struct pbstream_value *out_value) |
||||
{ |
||||
out_value->type = type; |
||||
switch(type) { |
||||
case PBSTREAM_TYPE_DOUBLE: |
||||
memcpy(&out_value->v._double, &wire_value->v._64bit, sizeof(double)); |
||||
break; |
||||
|
||||
case PBSTREAM_TYPE_FLOAT: |
||||
memcpy(&out_value->v._float, &wire_value->v._32bit, sizeof(float)); |
||||
break; |
||||
|
||||
case PBSTREAM_TYPE_INT32: |
||||
out_value->v.int32 = (int32_t)wire_value->v.varint; |
||||
break; |
||||
|
||||
case PBSTREAM_TYPE_INT64: |
||||
out_value->v.int64 = (int64_t)zigzag_decode_64(wire_value->v.varint); |
||||
break; |
||||
|
||||
case PBSTREAM_TYPE_UINT32: |
||||
out_value->v.uint32 = (uint32_t)wire_value->v.varint; |
||||
break; |
||||
|
||||
case PBSTREAM_TYPE_UINT64: |
||||
out_value->v.uint64 = (uint64_t)wire_value->v.varint; |
||||
break; |
||||
|
||||
case PBSTREAM_TYPE_SINT32: |
||||
out_value->v.int32 = zigzag_decode_32(wire_value->v.varint); |
||||
break; |
||||
|
||||
case PBSTREAM_TYPE_SINT64: |
||||
out_value->v.int64 = zigzag_decode_64(wire_value->v.varint); |
||||
break; |
||||
|
||||
case PBSTREAM_TYPE_FIXED32: |
||||
out_value->v.int32 = wire_value->v._32bit; |
||||
break; |
||||
|
||||
case PBSTREAM_TYPE_FIXED64: |
||||
out_value->v.int64 = wire_value->v._64bit; |
||||
break; |
||||
|
||||
case PBSTREAM_TYPE_SFIXED32: |
||||
out_value->v.int32 = (int32_t)wire_value->v._32bit; |
||||
break; |
||||
|
||||
case PBSTREAM_TYPE_SFIXED64: |
||||
out_value->v.int64 = (int64_t)wire_value->v._64bit; |
||||
break; |
||||
|
||||
case PBSTREAM_TYPE_BOOL: |
||||
out_value->v._bool = (bool)wire_value->v.varint; |
||||
break; |
||||
|
||||
case PBSTREAM_TYPE_STRING: |
||||
out_value->v.string.data = wire_value->v.string.data; |
||||
out_value->v.string.len = wire_value->v.string.len; |
||||
/* TODO: validate UTF-8? */ |
||||
break; |
||||
|
||||
case PBSTREAM_TYPE_BYTES: |
||||
out_value->v.bytes.data = wire_value->v.string.data; |
||||
out_value->v.bytes.len = wire_value->v.string.len; |
||||
break; |
||||
|
||||
case PBSTREAM_TYPE_ENUM: |
||||
out_value->v._enum = (bool)wire_value->v.varint; |
||||
break; |
||||
|
||||
case PBSTREAM_TYPE_MESSAGE: |
||||
/* Should never happen. */ |
||||
break; |
||||
} |
||||
} |
||||
|
||||
/* Given a wire value that was just parsed and a matching field descriptor,
|
||||
* processes the given value and performs the appropriate actions. These |
||||
* actions include: |
||||
* - checking that the wire type is as expected |
||||
* - converting the wire type to a .proto type |
||||
* - entering a sub-message, if that is in fact what this field implies. |
||||
* |
||||
* This function also calls user callbacks pertaining to any of the above at |
||||
* the appropriate times. */ |
||||
void process_value(struct pbstream_parse_state *s, |
||||
struct pbstream_wire_value *wire_value, |
||||
struct pbstream_field_descriptor *field_descriptor) |
||||
{ |
||||
/* Check that the wire type is appropriate for this .proto type. */ |
||||
if(unlikely(wire_value->type != expected_wire_type[field_descriptor->type])) { |
||||
/* Report the type mismatch error. */ |
||||
if(s->callbacks.error_callback) { |
||||
/* TODO: a nice formatted message. */ |
||||
s->callbacks.error_callback(PBSTREAM_ERROR_MISMATCHED_TYPE, NULL, |
||||
s->offset, false); |
||||
} |
||||
|
||||
/* Report the wire value we parsed as an unknown value. */ |
||||
if(s->callbacks.unknown_value_callback) { |
||||
s->callbacks.unknown_value_callback(field_descriptor->field_number, wire_value, |
||||
s->user_data); |
||||
} |
||||
return; |
||||
} |
||||
|
||||
if(field_descriptor->type == PBSTREAM_TYPE_MESSAGE) { |
||||
/* We're entering a sub-message. */ |
||||
if(s->callbacks.begin_message_callback) { |
||||
s->callbacks.begin_message_callback(field_descriptor->d.message, s->user_data); |
||||
} |
||||
|
||||
/* Push and initialize a new stack frame. */ |
||||
RESIZE_DYNARRAY(s->stack, s->stack_len+1); |
||||
struct pbstream_parse_stack_frame *frame = DYNARRAY_GET_TOP(s->stack); |
||||
frame->message_descriptor = field_descriptor->d.message; |
||||
frame->end_offset = 0; /* TODO: set this correctly. */ |
||||
int num_seen_fields = frame->message_descriptor->num_seen_fields; |
||||
INIT_DYNARRAY(frame->seen_fields, num_seen_fields, num_seen_fields); |
||||
} |
||||
else { |
||||
/* This is a scalar value. */ |
||||
struct pbstream_value value; |
||||
pbstream_translate_field(wire_value, field_descriptor->type, &value); |
||||
if(s->callbacks.value_callback) { |
||||
s->callbacks.value_callback(field_descriptor, value, s->user_data); |
||||
} |
||||
} |
||||
} |
||||
|
||||
struct pbstream_field_descriptor *find_field_descriptor_by_number( |
||||
struct pbstream_message_descriptor* message_descriptor, |
||||
pbstream_field_number_t field_number) |
||||
{ |
||||
/* Currently a linear search -- could be optimized to do a binary search,
|
||||
* hash table lookup, or any other number of clever things you might imagine. */ |
||||
for (int i = 0; i < message_descriptor->fields_len; i++) |
||||
if (message_descriptor->fields[i].field_number == field_number) |
||||
return &message_descriptor->fields[i]; |
||||
return NULL; |
||||
} |
||||
|
||||
/* Parses and processes the next value from *buf (but not past end), returning
|
||||
* a status indicating whether the operation succeeded, and calling appropriate |
||||
* callbacks. If more data is needed to parse the last partial field, returns |
||||
* how many more bytes are needed in need_more_bytes. Updates *buf to point |
||||
* past the parsed value if the operation succeeds. */ |
||||
enum pbstream_status pbstream_parse_field(struct pbstream_parse_state *s, |
||||
char **buf, char *end, |
||||
int *need_more_bytes) |
||||
{ |
||||
struct pbstream_parse_stack_frame *frame = DYNARRAY_GET_TOP(s->stack); |
||||
struct pbstream_message_descriptor *message_descriptor = frame->message_descriptor; |
||||
|
||||
pbstream_field_number_t field_number; |
||||
struct pbstream_wire_value wire_value; |
||||
enum pbstream_status status; |
||||
|
||||
/* Decode the raw wire data. */ |
||||
status = pbstream_parse_wire_value(buf, end, &field_number, &wire_value, |
||||
need_more_bytes); |
||||
|
||||
if(unlikely(status != PBSTREAM_STATUS_OK)) { |
||||
if(status == PBSTREAM_ERROR_UNTERMINATED_VARINT && s->callbacks.error_callback) { |
||||
/* TODO: a nice formatted message. */ |
||||
s->callbacks.error_callback(PBSTREAM_ERROR_UNTERMINATED_VARINT, NULL, |
||||
s->offset, true); |
||||
} |
||||
s->fatal_error = true; |
||||
return status; |
||||
} |
||||
|
||||
/* Find the corresponding field definition from the .proto file. */ |
||||
struct pbstream_field_descriptor *field_descriptor; |
||||
field_descriptor = find_field_descriptor_by_number(message_descriptor, field_number); |
||||
|
||||
|
||||
if(likely(field_descriptor != NULL)) { |
||||
if(field_descriptor->seen_field_num > 0) { |
||||
/* Check that this field has not been seen before (unless it's a repeated field) */ |
||||
if(frame->seen_fields[field_descriptor->seen_field_num] && |
||||
field_descriptor->cardinality != PBSTREAM_CARDINALITY_REPEATED) { |
||||
if(s->callbacks.error_callback) { |
||||
s->callbacks.error_callback(PBSTREAM_ERROR_DUPLICATE_FIELD, NULL, |
||||
s->offset, false); |
||||
} |
||||
return PBSTREAM_STATUS_ERROR; |
||||
} |
||||
|
||||
/* Mark the field as seen. */ |
||||
frame->seen_fields[field_descriptor->seen_field_num] = true; |
||||
} |
||||
process_value(s, &wire_value, field_descriptor); |
||||
} else { |
||||
/* This field was not defined in the .proto file. */ |
||||
if(s->callbacks.unknown_value_callback) { |
||||
s->callbacks.unknown_value_callback(field_number, &wire_value, s->user_data); |
||||
} |
||||
} |
||||
return PBSTREAM_STATUS_OK; |
||||
} |
||||
|
||||
/* Process actions associated with the end of a submessage. This includes:
|
||||
* - emittin default values for all optional elements (either explicit |
||||
* defaults or implicit defaults). |
||||
* - emitting errors for any required fields that were not seen. |
||||
* - calling the user's callback. |
||||
* - popping the stack frame. */ |
||||
void process_submessage_end(struct pbstream_parse_state *s) |
||||
{ |
||||
/* TODO: emit default values for optional elements. either explicit defaults
|
||||
* (specified in the .proto file) or implicit defaults (which are specified |
||||
* in the pbstream definition, by type. */ |
||||
|
||||
/* TODO: emit errors for required fields that were not seen. */ |
||||
|
||||
/* Process the end of message by calling the user's callback and popping
|
||||
* our stack frame. */ |
||||
if(s->callbacks.end_message_callback) { |
||||
s->callbacks.end_message_callback(s->user_data); |
||||
} |
||||
|
||||
/* Pop the stack frame associated with this submessage. */ |
||||
RESIZE_DYNARRAY(s->stack, s->stack_len-1); |
||||
} |
||||
|
||||
/* The user-exposed parsing function -- see the header file for documentation. */ |
||||
enum pbstream_status pbstream_parse(struct pbstream_parse_state *s, |
||||
char *buf_start, int buf_len, |
||||
int *consumed_bytes, int *need_more_bytes) |
||||
{ |
||||
char *buf = buf_start; |
||||
char *end = buf_start + buf_len; |
||||
int buf_start_offset = s->offset; |
||||
enum pbstream_status status = PBSTREAM_STATUS_OK; |
||||
|
||||
while(buf < end) { |
||||
/* Check for a submessage ending. */ |
||||
while(s->offset >= DYNARRAY_GET_TOP(s->stack)->end_offset) { |
||||
/* A submessage should end exactly at a field boundary. If we find that
|
||||
* the submessage length indicated an end in the middle of a field, that |
||||
* is an error that indicates data corruption, and we refuse to proceed. */ |
||||
if(unlikely(s->offset != DYNARRAY_GET_TOP(s->stack)->end_offset)) { |
||||
if(s->callbacks.error_callback) { |
||||
s->callbacks.error_callback(PBSTREAM_ERROR_BAD_SUBMESSAGE_END, NULL, |
||||
s->offset, true); |
||||
} |
||||
s->fatal_error = true; |
||||
break; |
||||
} |
||||
process_submessage_end(s); |
||||
} |
||||
|
||||
status = pbstream_parse_field(s, &buf, end, need_more_bytes); |
||||
if(status != PBSTREAM_STATUS_OK) |
||||
break; |
||||
|
||||
s->offset = buf_start_offset + (buf - buf_start); |
||||
} |
||||
return status; |
||||
} |
@ -0,0 +1,233 @@ |
||||
/*
|
||||
* pbstream - a small and simple implementation of Protocol Buffers. |
||||
* |
||||
* Copyright (c) 2008 Joshua Haberman. See LICENSE for details. |
||||
*/ |
||||
|
||||
#include <stdint.h> |
||||
#include <stdbool.h> |
||||
#include <stdlib.h> |
||||
#include "dynarray.h" |
||||
|
||||
/* A list of types as they can appear in a .proto file. */ |
||||
enum pbstream_type { |
||||
PBSTREAM_TYPE_DOUBLE, |
||||
PBSTREAM_TYPE_FLOAT, |
||||
PBSTREAM_TYPE_INT32, |
||||
PBSTREAM_TYPE_INT64, |
||||
PBSTREAM_TYPE_UINT32, |
||||
PBSTREAM_TYPE_UINT64, |
||||
PBSTREAM_TYPE_SINT32, |
||||
PBSTREAM_TYPE_SINT64, |
||||
PBSTREAM_TYPE_FIXED32, |
||||
PBSTREAM_TYPE_FIXED64, |
||||
PBSTREAM_TYPE_SFIXED32, |
||||
PBSTREAM_TYPE_SFIXED64, |
||||
PBSTREAM_TYPE_BOOL, |
||||
PBSTREAM_TYPE_STRING, |
||||
PBSTREAM_TYPE_BYTES, |
||||
|
||||
PBSTREAM_TYPE_ENUM, |
||||
|
||||
PBSTREAM_TYPE_MESSAGE |
||||
}; |
||||
|
||||
/* A list of types as they are encoded on-the-wire. */ |
||||
enum pbstream_wire_type { |
||||
PBSTREAM_WIRE_TYPE_VARINT = 0, |
||||
PBSTREAM_WIRE_TYPE_64BIT = 1, |
||||
PBSTREAM_WIRE_TYPE_STRING = 2, |
||||
PBSTREAM_WIRE_TYPE_START_GROUP = 3, |
||||
PBSTREAM_WIRE_TYPE_END_GROUP = 4, |
||||
PBSTREAM_WIRE_TYPE_32BIT = 5, |
||||
}; |
||||
|
||||
/* Each field must have a cardinality that is one of the following. */ |
||||
enum pbstream_cardinality { |
||||
PBSTREAM_CARDINALITY_OPTIONAL, /* must appear 0 or 1 times */ |
||||
PBSTREAM_CARDINALITY_REQUIRED, /* must appear exactly 1 time */ |
||||
PBSTREAM_CARDINALITY_REPEATED, /* may appear 0 or more times */ |
||||
}; |
||||
|
||||
typedef int32_t pbstream_field_number_t; |
||||
|
||||
/* A deserialized value as described in a .proto file. */ |
||||
struct pbstream_value { |
||||
enum pbstream_type type; |
||||
union { |
||||
double _double; |
||||
float _float; |
||||
int32_t int32; |
||||
int64_t int64; |
||||
uint32_t uint32; |
||||
uint64_t uint64; |
||||
bool _bool; |
||||
struct { |
||||
char *data; /* This will be a pointer to the buffer of data the client provided. */ |
||||
int len; |
||||
} string; |
||||
struct { |
||||
char *data; /* This will be a pointer to the buffer of data the client provided. */ |
||||
int len; |
||||
} bytes; |
||||
int32_t _enum; |
||||
} v; |
||||
}; |
||||
|
||||
/* A value as it is encoded on-the-wire */ |
||||
struct pbstream_wire_value { |
||||
enum pbstream_wire_type type; |
||||
union { |
||||
uint64_t varint; |
||||
uint64_t _64bit; |
||||
struct { |
||||
char *data; /* This will be a pointer to the buffer of data the client provided. */ |
||||
int len; |
||||
} string; |
||||
uint32_t _32bit; |
||||
} v; |
||||
}; |
||||
|
||||
/* The definition of an enum as defined in a pbstream. For example:
|
||||
* Corpus { |
||||
* UNIVERSAL = 0; |
||||
* WEB = 1; |
||||
* IMAGES = 2; |
||||
* LOCAL = 3; |
||||
* NEWS = 4; |
||||
* } |
||||
*/ |
||||
struct pbstream_enum_descriptor { |
||||
char *name; |
||||
struct enum_value { |
||||
char *name; |
||||
int value; |
||||
} value; |
||||
DEFINE_DYNARRAY(values, struct enum_value); |
||||
}; |
||||
|
||||
/* The definition of a field as defined in a pbstream (within a message).
|
||||
* For example: |
||||
* required int32 a = 1; |
||||
*/ |
||||
struct pbstream_field_descriptor { |
||||
pbstream_field_number_t field_number; |
||||
char *name; |
||||
enum pbstream_type type; |
||||
enum pbstream_cardinality cardinality; |
||||
struct pbstream_value *default_value; /* NULL if none */ |
||||
|
||||
/* Index into the "seen" list for the message. -1 for repeated fields (for
|
||||
* which we have no need to track whether it's been seen). */ |
||||
int seen_field_num; |
||||
|
||||
union extra_data { |
||||
struct pbstream_enum_descriptor *_enum; |
||||
struct pbstream_message_descriptor *message; |
||||
} d; |
||||
}; |
||||
|
||||
/* A message as defined by the "message" construct in a .proto file. */ |
||||
struct pbstream_message_descriptor { |
||||
char *name; /* does not include package name or parent message names. */ |
||||
char *full_name; |
||||
int num_seen_fields; /* How many fields we have to track "seen" information for. */ |
||||
DEFINE_DYNARRAY(fields, struct pbstream_field_descriptor); |
||||
DEFINE_DYNARRAY(messages, struct pbstream_message_descriptor); |
||||
DEFINE_DYNARRAY(enums, struct pbstream_enum_descriptor); |
||||
}; |
||||
|
||||
/* Callback for when a value is parsed that matches a field in the .proto file.
|
||||
* */ |
||||
typedef void (*pbstream_value_callback_t)( |
||||
struct pbstream_field_descriptor *field_descriptor, |
||||
struct pbstream_value value, |
||||
void *user_data); |
||||
|
||||
/* Callback for when a value is parsed for which no field was defined in the
|
||||
* .proto file. */ |
||||
typedef void (*pbstream_unknown_value_callback_t)( |
||||
pbstream_field_number_t field_number, |
||||
struct pbstream_wire_value *wire_value, |
||||
void *user_data); |
||||
|
||||
/* Callback for when a nested message is beginning. */ |
||||
typedef void (*pbstream_begin_message_callback_t)( |
||||
struct pbstream_message_descriptor *message_descriptor, |
||||
void *user_data); |
||||
|
||||
/* Callback for when a nested message is ending. */ |
||||
typedef void (*pbstream_end_message_callback_t)(void *user_data); |
||||
|
||||
/* Callback for when an error occurred. */ |
||||
enum pbstream_error { |
||||
PBSTREAM_ERROR_UNTERMINATED_VARINT, /* A varint did not terminate before hitting 64 bits. Fatal. */ |
||||
PBSTREAM_ERROR_MISSING_REQUIRED_FIELD, /* A field marked "required" was not present. */ |
||||
PBSTREAM_ERROR_DUPLICATE_FIELD, /* An optional or required field appeared more than once. */ |
||||
PBSTREAM_ERROR_MISMATCHED_TYPE, /* A field was encoded with the wrong wire type. */ |
||||
PBSTREAM_ERROR_BAD_SUBMESSAGE_END, /* A submessage ended in the middle of data. Indicates corruption. */ |
||||
}; |
||||
/* The description is a static buffer which the client must not free. The
|
||||
* offset is the location in the input where the error was detected (this |
||||
* offset is relative to the beginning of the stream). If is_fatal is true, |
||||
* parsing cannot continue. */ |
||||
typedef void (*pbstream_error_callback_t)(enum pbstream_error error, char *description, |
||||
int offset, bool is_fatal); |
||||
|
||||
struct pbstream_callbacks { |
||||
pbstream_value_callback_t value_callback; |
||||
pbstream_unknown_value_callback_t unknown_value_callback; |
||||
pbstream_begin_message_callback_t begin_message_callback; |
||||
pbstream_end_message_callback_t end_message_callback; |
||||
pbstream_error_callback_t error_callback; |
||||
}; |
||||
|
||||
struct pbstream_parse_stack_frame { |
||||
struct pbstream_message_descriptor *message_descriptor; |
||||
int end_offset; /* We don't know this for the outermost frame, and set it to INT_MAX. */ |
||||
|
||||
/* For every field except repeated ones we track whether we have seen it or
|
||||
* not. This lets us detect three important conditions: |
||||
* 1. the field has a default, but we did not see it anywhere (action: emit the default) |
||||
* 2. the field is required, but we did not see it anywhere (action: error) |
||||
* 3. the field is required or optional, but we saw it more than once (action: error) */ |
||||
DEFINE_DYNARRAY(seen_fields, bool); |
||||
}; |
||||
|
||||
/* The stream parser keeps this as its state. */ |
||||
struct pbstream_parse_state { |
||||
struct pbstream_callbacks callbacks; |
||||
int offset; |
||||
bool fatal_error; |
||||
void *user_data; |
||||
DEFINE_DYNARRAY(stack, struct pbstream_parse_stack_frame); |
||||
}; |
||||
|
||||
/* Call this once before parsing to initialize the data structures.
|
||||
* message_type can be NULL, in which case all fields will be reported as |
||||
* unknown. */ |
||||
void pbstream_init_parser(struct pbstream_parse_state *state, |
||||
struct pbstream_message_descriptor *message_descriptor, |
||||
struct pbstream_callbacks *callbacks, |
||||
void *user_data); |
||||
|
||||
/* Call this to parse as much of buf as possible, calling callbacks as
|
||||
* appropriate. buf need not be a complete pbstream. Returns the number of |
||||
* bytes consumed. In subsequent calls, buf should point to the first byte not |
||||
* consumed by previous calls. |
||||
* |
||||
* If need_more_bytes is non-zero when parse() returns, this indicates that the |
||||
* beginning of a string or sub-message was recognized, but not all bytes of |
||||
* the string were in memory. The string will not be successfully parsed (and |
||||
* thus parsing of the pbstream cannot proceed) unless need_more_bytes more |
||||
* data is available upon the next call to parse. The caller may need to |
||||
* increase its buffer size. */ |
||||
enum pbstream_status { |
||||
PBSTREAM_STATUS_OK = 0, |
||||
PBSTREAM_STATUS_INCOMPLETE = 1, /* buffer ended in the middle of a field. */ |
||||
PBSTREAM_STATUS_ERROR = 2, /* fatal error in the file, cannot recover. */ |
||||
}; |
||||
|
||||
enum pbstream_status pbstream_parse(struct pbstream_parse_state *state, |
||||
char *buf, int buf_len, |
||||
int *consumed_bytes, int *need_more_bytes); |
Binary file not shown.
Loading…
Reference in new issue