From d29a54fc976f2dd91f05e40baf0a4b37869a88bf Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 1 Feb 2009 12:57:09 -0800 Subject: [PATCH] Initial commit. --- Makefile | 8 + dynarray.h | 32 ++++ pbstream.c | 453 +++++++++++++++++++++++++++++++++++++++++++++++++++++ pbstream.h | 233 +++++++++++++++++++++++++++ pbstream.o | Bin 0 -> 5920 bytes 5 files changed, 726 insertions(+) create mode 100644 Makefile create mode 100644 dynarray.h create mode 100644 pbstream.c create mode 100644 pbstream.h create mode 100644 pbstream.o diff --git a/Makefile b/Makefile new file mode 100644 index 0000000000..5dbbecede3 --- /dev/null +++ b/Makefile @@ -0,0 +1,8 @@ + +.PHONY: all clean +all: pbstream.o +clean: + rm -f pbstream.o + +pbstream.o: pbstream.c + gcc -std=c99 -O3 -Wall -o pbstream.o -c pbstream.c diff --git a/dynarray.h b/dynarray.h new file mode 100644 index 0000000000..9ff1289860 --- /dev/null +++ b/dynarray.h @@ -0,0 +1,32 @@ + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +#define MIN(a, b) ((a) < (b) ? (a) : (b)) + +#define DEFINE_DYNARRAY(name, type) \ + type *name; \ + int name ## _len; \ + int name ## _size; + +#define RESIZE_DYNARRAY(name, desired_len) { \ + int orig_size = name ## _size; \ + while(name ## _size < (desired_len)) \ + name ## _size *= 2; \ + /* don't bother shrinking for now. when/if we do, we'll want to bake in \ + * some kind of hysteresis so that we don't shrink until we've been under \ + * for a while. */ \ + if(name ## _size != orig_size) \ + name = realloc(name, name ## _size * sizeof(*name)); \ + name ## _len = desired_len; \ +} + +#define INIT_DYNARRAY(name, initial_len, initial_size) \ + name ## _len = initial_len; \ + name ## _size = initial_size; \ + name = realloc(NULL, name ## _size * sizeof(*name)) + +#define FREE_DYNARRAY(name) \ + free(name); + +#define DYNARRAY_GET_TOP(name) \ + (&name[name ## _len - 1]) + diff --git a/pbstream.c b/pbstream.c new file mode 100644 index 0000000000..c300153e86 --- /dev/null +++ b/pbstream.c @@ -0,0 +1,453 @@ +/* + * pbstream - a stream-oriented implementation of protocol buffers. + * + * Copyright (c) 2008 Joshua Haberman. See LICENSE for details. + */ + +#include +#include +#include "pbstream.h" + +/* Branch prediction hints for GCC. */ +#ifdef __GNUC__ +#define likely(x) __builtin_expect((x),1) +#define unlikely(x) __builtin_expect((x),0) +#else +#define likely(x) +#define unlikely(x) +#endif + +/* An array, indexed by pbstream_type, that indicates what wire type is + * expected for the given pbstream type. */ +static enum pbstream_wire_type expected_wire_type[] = { + PBSTREAM_WIRE_TYPE_64BIT, // PBSTREAM_TYPE_DOUBLE, + PBSTREAM_WIRE_TYPE_32BIT, // PBSTREAM_TYPE_FLOAT, + PBSTREAM_WIRE_TYPE_VARINT, // PBSTREAM_TYPE_INT32, + PBSTREAM_WIRE_TYPE_VARINT, // PBSTREAM_TYPE_INT64, + PBSTREAM_WIRE_TYPE_VARINT, // PBSTREAM_TYPE_UINT32, + PBSTREAM_WIRE_TYPE_VARINT, // PBSTREAM_TYPE_UINT64, + PBSTREAM_WIRE_TYPE_VARINT, // PBSTREAM_TYPE_SINT32, + PBSTREAM_WIRE_TYPE_VARINT, // PBSTREAM_TYPE_SINT64, + PBSTREAM_WIRE_TYPE_32BIT, // PBSTREAM_TYPE_FIXED32, + PBSTREAM_WIRE_TYPE_64BIT, // PBSTREAM_TYPE_FIXED64, + PBSTREAM_WIRE_TYPE_32BIT, // PBSTREAM_TYPE_SFIXED32, + PBSTREAM_WIRE_TYPE_64BIT, // PBSTREAM_TYPE_SFIXED64, + PBSTREAM_WIRE_TYPE_VARINT, // PBSTREAM_TYPE_BOOL, + PBSTREAM_WIRE_TYPE_STRING, // PBSTREAM_TYPE_STRING, + PBSTREAM_WIRE_TYPE_STRING, // PBSTREAM_TYPE_BYTES, + PBSTREAM_WIRE_TYPE_VARINT, // PBSTREAM_TYPE_ENUM, + PBSTREAM_WIRE_TYPE_STRING, // PBSTREAM_TYPE_MESSAGE +}; + +/* Reads a varint starting at buf (but not past end), storing the result + * in out_value. Returns whether the operation was successful. */ +enum pbstream_status get_varint(char **buf, char *end, uint64_t *out_value) +{ + *out_value = 0; + int bitpos = 0; + char *b = *buf; + + /* Because we don't check for buffer overrun inside the loop, we require + * that callers use a buffer that is overallocated by at least 9 bytes + * (the maximum we can overrun before the bitpos check catches the problem). */ + for(; *b & 0x80 && bitpos < 64; bitpos += 7, b++) + *out_value |= (*b & 0x7F) << bitpos; + + if(unlikely(bitpos >= 64)) { + return PBSTREAM_ERROR_UNTERMINATED_VARINT; + } + if(unlikely(b > end)) { + return PBSTREAM_STATUS_INCOMPLETE; + } + + *out_value |= (*b & 0x7F) << bitpos; + *buf = b; + return PBSTREAM_STATUS_OK; +} + +/* TODO: the little-endian versions of these functions don't respect alignment. + * While it's hard to believe that this could be less efficient than the + * alternative (the big endian implementation), this deserves some tests and + * measurements to be sure. */ +enum pbstream_status get_32_le(char **buf, char *end, uint32_t *out_value) +{ + char *b = *buf; + char *int32_end = b+4; + if(unlikely(int32_end > end)) + return PBSTREAM_STATUS_INCOMPLETE; + +#if __BYTE_ORDER == __LITTLE_ENDIAN + *out_value = *(uint32_t*)b; +#else + *out_value = b[0] | (b[1] << 8) | (b[2] << 16) | (b[3] << 24); +#endif + *buf = int32_end; + return PBSTREAM_STATUS_OK; +} + +bool get_64_le(char **buf, char *end, uint64_t *out_value) +{ + char *b = *buf; + char *int64_end = b+8; + if(unlikely(int64_end > end)) + return PBSTREAM_STATUS_INCOMPLETE; + +#if __BYTE_ORDER == __LITTLE_ENDIAN + *out_value = *(uint64_t*)buf; +#else + *out_value = (b[0]) | (b[1] << 8 ) | (b[2] << 16) | (b[3] << 24) | + (b[4] << 32) | (b[5] << 40) | (b[6] << 48) | (b[7] << 56); +#endif + *buf = int64_end; + return PBSTREAM_STATUS_OK; +} + +int32_t zigzag_decode_32(uint64_t src) +{ + return 0; /* TODO */ +} + +int64_t zigzag_decode_64(uint64_t src) +{ + return 0; /* TODO */ +} + +/* Parses the next field-number/wire-value pair from the stream of bytes + * starting at *buf, without reading past end. Stores the parsed and wire + * value in *field_number and *wire_value, respectively. + * + * Returns a status indicating whether the operation was successful. If the + * return status is STATUS_INCOMPLETE, returns the number of additional bytes + * requred in *need_more_bytes. Updates *buf to point past the end of the + * parsed data if the operation was successful. + */ +enum pbstream_status pbstream_parse_wire_value(char **buf, char *end, + pbstream_field_number_t *field_number, + struct pbstream_wire_value *wire_value, + int *need_more_bytes) +{ + char *b = *buf; /* Our local buf pointer -- only update buf if we succeed. */ + +#define DECODE(dest, func) \ + do { \ + enum pbstream_status status = func(&b, end, &dest); \ + if(unlikely(status != PBSTREAM_STATUS_OK)) { \ + *need_more_bytes = 0; /* This only arises below in this function. */ \ + return status; \ + } \ + } while (0) + + uint64_t key; + DECODE(key, get_varint); + + *field_number = key >> 3; + wire_value->type = key & 0x07; + + switch(wire_value->type) + { + case PBSTREAM_WIRE_TYPE_VARINT: + DECODE(wire_value->v.varint, get_varint); + break; + + case PBSTREAM_WIRE_TYPE_64BIT: + DECODE(wire_value->v._64bit, get_64_le); + break; + + case PBSTREAM_WIRE_TYPE_STRING: + { + uint64_t string_len; + DECODE(string_len, get_varint); + if (string_len > INT_MAX) { + /* TODO: notice this and fail. */ + } + wire_value->v.string.len = (int)string_len; + if(b + wire_value->v.string.len > end) { + *need_more_bytes = b + wire_value->v.string.len - end; + return PBSTREAM_STATUS_INCOMPLETE; + } + wire_value->v.string.data = b; + b += wire_value->v.string.len; + break; + } + + case PBSTREAM_WIRE_TYPE_START_GROUP: + case PBSTREAM_WIRE_TYPE_END_GROUP: + /* TODO (though these are deprecated, so not high priority). */ + break; + + case PBSTREAM_WIRE_TYPE_32BIT: + DECODE(wire_value->v._32bit, get_32_le); + break; + } + + *buf = b; + return true; +} + +/* Translates from a wire value to a .proto value. The caller should have + * already checked that the wire_value is of the correct type. The pbstream + * type must not be PBSTREAM_TYPE_MESSAGE. This operation always succeeds. */ +void pbstream_translate_field(struct pbstream_wire_value *wire_value, + enum pbstream_type type, + struct pbstream_value *out_value) +{ + out_value->type = type; + switch(type) { + case PBSTREAM_TYPE_DOUBLE: + memcpy(&out_value->v._double, &wire_value->v._64bit, sizeof(double)); + break; + + case PBSTREAM_TYPE_FLOAT: + memcpy(&out_value->v._float, &wire_value->v._32bit, sizeof(float)); + break; + + case PBSTREAM_TYPE_INT32: + out_value->v.int32 = (int32_t)wire_value->v.varint; + break; + + case PBSTREAM_TYPE_INT64: + out_value->v.int64 = (int64_t)zigzag_decode_64(wire_value->v.varint); + break; + + case PBSTREAM_TYPE_UINT32: + out_value->v.uint32 = (uint32_t)wire_value->v.varint; + break; + + case PBSTREAM_TYPE_UINT64: + out_value->v.uint64 = (uint64_t)wire_value->v.varint; + break; + + case PBSTREAM_TYPE_SINT32: + out_value->v.int32 = zigzag_decode_32(wire_value->v.varint); + break; + + case PBSTREAM_TYPE_SINT64: + out_value->v.int64 = zigzag_decode_64(wire_value->v.varint); + break; + + case PBSTREAM_TYPE_FIXED32: + out_value->v.int32 = wire_value->v._32bit; + break; + + case PBSTREAM_TYPE_FIXED64: + out_value->v.int64 = wire_value->v._64bit; + break; + + case PBSTREAM_TYPE_SFIXED32: + out_value->v.int32 = (int32_t)wire_value->v._32bit; + break; + + case PBSTREAM_TYPE_SFIXED64: + out_value->v.int64 = (int64_t)wire_value->v._64bit; + break; + + case PBSTREAM_TYPE_BOOL: + out_value->v._bool = (bool)wire_value->v.varint; + break; + + case PBSTREAM_TYPE_STRING: + out_value->v.string.data = wire_value->v.string.data; + out_value->v.string.len = wire_value->v.string.len; + /* TODO: validate UTF-8? */ + break; + + case PBSTREAM_TYPE_BYTES: + out_value->v.bytes.data = wire_value->v.string.data; + out_value->v.bytes.len = wire_value->v.string.len; + break; + + case PBSTREAM_TYPE_ENUM: + out_value->v._enum = (bool)wire_value->v.varint; + break; + + case PBSTREAM_TYPE_MESSAGE: + /* Should never happen. */ + break; + } +} + +/* Given a wire value that was just parsed and a matching field descriptor, + * processes the given value and performs the appropriate actions. These + * actions include: + * - checking that the wire type is as expected + * - converting the wire type to a .proto type + * - entering a sub-message, if that is in fact what this field implies. + * + * This function also calls user callbacks pertaining to any of the above at + * the appropriate times. */ +void process_value(struct pbstream_parse_state *s, + struct pbstream_wire_value *wire_value, + struct pbstream_field_descriptor *field_descriptor) +{ + /* Check that the wire type is appropriate for this .proto type. */ + if(unlikely(wire_value->type != expected_wire_type[field_descriptor->type])) { + /* Report the type mismatch error. */ + if(s->callbacks.error_callback) { + /* TODO: a nice formatted message. */ + s->callbacks.error_callback(PBSTREAM_ERROR_MISMATCHED_TYPE, NULL, + s->offset, false); + } + + /* Report the wire value we parsed as an unknown value. */ + if(s->callbacks.unknown_value_callback) { + s->callbacks.unknown_value_callback(field_descriptor->field_number, wire_value, + s->user_data); + } + return; + } + + if(field_descriptor->type == PBSTREAM_TYPE_MESSAGE) { + /* We're entering a sub-message. */ + if(s->callbacks.begin_message_callback) { + s->callbacks.begin_message_callback(field_descriptor->d.message, s->user_data); + } + + /* Push and initialize a new stack frame. */ + RESIZE_DYNARRAY(s->stack, s->stack_len+1); + struct pbstream_parse_stack_frame *frame = DYNARRAY_GET_TOP(s->stack); + frame->message_descriptor = field_descriptor->d.message; + frame->end_offset = 0; /* TODO: set this correctly. */ + int num_seen_fields = frame->message_descriptor->num_seen_fields; + INIT_DYNARRAY(frame->seen_fields, num_seen_fields, num_seen_fields); + } + else { + /* This is a scalar value. */ + struct pbstream_value value; + pbstream_translate_field(wire_value, field_descriptor->type, &value); + if(s->callbacks.value_callback) { + s->callbacks.value_callback(field_descriptor, value, s->user_data); + } + } +} + +struct pbstream_field_descriptor *find_field_descriptor_by_number( + struct pbstream_message_descriptor* message_descriptor, + pbstream_field_number_t field_number) +{ + /* Currently a linear search -- could be optimized to do a binary search, + * hash table lookup, or any other number of clever things you might imagine. */ + for (int i = 0; i < message_descriptor->fields_len; i++) + if (message_descriptor->fields[i].field_number == field_number) + return &message_descriptor->fields[i]; + return NULL; +} + +/* Parses and processes the next value from *buf (but not past end), returning + * a status indicating whether the operation succeeded, and calling appropriate + * callbacks. If more data is needed to parse the last partial field, returns + * how many more bytes are needed in need_more_bytes. Updates *buf to point + * past the parsed value if the operation succeeds. */ +enum pbstream_status pbstream_parse_field(struct pbstream_parse_state *s, + char **buf, char *end, + int *need_more_bytes) +{ + struct pbstream_parse_stack_frame *frame = DYNARRAY_GET_TOP(s->stack); + struct pbstream_message_descriptor *message_descriptor = frame->message_descriptor; + + pbstream_field_number_t field_number; + struct pbstream_wire_value wire_value; + enum pbstream_status status; + + /* Decode the raw wire data. */ + status = pbstream_parse_wire_value(buf, end, &field_number, &wire_value, + need_more_bytes); + + if(unlikely(status != PBSTREAM_STATUS_OK)) { + if(status == PBSTREAM_ERROR_UNTERMINATED_VARINT && s->callbacks.error_callback) { + /* TODO: a nice formatted message. */ + s->callbacks.error_callback(PBSTREAM_ERROR_UNTERMINATED_VARINT, NULL, + s->offset, true); + } + s->fatal_error = true; + return status; + } + + /* Find the corresponding field definition from the .proto file. */ + struct pbstream_field_descriptor *field_descriptor; + field_descriptor = find_field_descriptor_by_number(message_descriptor, field_number); + + + if(likely(field_descriptor != NULL)) { + if(field_descriptor->seen_field_num > 0) { + /* Check that this field has not been seen before (unless it's a repeated field) */ + if(frame->seen_fields[field_descriptor->seen_field_num] && + field_descriptor->cardinality != PBSTREAM_CARDINALITY_REPEATED) { + if(s->callbacks.error_callback) { + s->callbacks.error_callback(PBSTREAM_ERROR_DUPLICATE_FIELD, NULL, + s->offset, false); + } + return PBSTREAM_STATUS_ERROR; + } + + /* Mark the field as seen. */ + frame->seen_fields[field_descriptor->seen_field_num] = true; + } + process_value(s, &wire_value, field_descriptor); + } else { + /* This field was not defined in the .proto file. */ + if(s->callbacks.unknown_value_callback) { + s->callbacks.unknown_value_callback(field_number, &wire_value, s->user_data); + } + } + return PBSTREAM_STATUS_OK; +} + +/* Process actions associated with the end of a submessage. This includes: + * - emittin default values for all optional elements (either explicit + * defaults or implicit defaults). + * - emitting errors for any required fields that were not seen. + * - calling the user's callback. + * - popping the stack frame. */ +void process_submessage_end(struct pbstream_parse_state *s) +{ + /* TODO: emit default values for optional elements. either explicit defaults + * (specified in the .proto file) or implicit defaults (which are specified + * in the pbstream definition, by type. */ + + /* TODO: emit errors for required fields that were not seen. */ + + /* Process the end of message by calling the user's callback and popping + * our stack frame. */ + if(s->callbacks.end_message_callback) { + s->callbacks.end_message_callback(s->user_data); + } + + /* Pop the stack frame associated with this submessage. */ + RESIZE_DYNARRAY(s->stack, s->stack_len-1); +} + +/* The user-exposed parsing function -- see the header file for documentation. */ +enum pbstream_status pbstream_parse(struct pbstream_parse_state *s, + char *buf_start, int buf_len, + int *consumed_bytes, int *need_more_bytes) +{ + char *buf = buf_start; + char *end = buf_start + buf_len; + int buf_start_offset = s->offset; + enum pbstream_status status = PBSTREAM_STATUS_OK; + + while(buf < end) { + /* Check for a submessage ending. */ + while(s->offset >= DYNARRAY_GET_TOP(s->stack)->end_offset) { + /* A submessage should end exactly at a field boundary. If we find that + * the submessage length indicated an end in the middle of a field, that + * is an error that indicates data corruption, and we refuse to proceed. */ + if(unlikely(s->offset != DYNARRAY_GET_TOP(s->stack)->end_offset)) { + if(s->callbacks.error_callback) { + s->callbacks.error_callback(PBSTREAM_ERROR_BAD_SUBMESSAGE_END, NULL, + s->offset, true); + } + s->fatal_error = true; + break; + } + process_submessage_end(s); + } + + status = pbstream_parse_field(s, &buf, end, need_more_bytes); + if(status != PBSTREAM_STATUS_OK) + break; + + s->offset = buf_start_offset + (buf - buf_start); + } + return status; +} diff --git a/pbstream.h b/pbstream.h new file mode 100644 index 0000000000..e4046e0ca2 --- /dev/null +++ b/pbstream.h @@ -0,0 +1,233 @@ +/* + * pbstream - a small and simple implementation of Protocol Buffers. + * + * Copyright (c) 2008 Joshua Haberman. See LICENSE for details. + */ + +#include +#include +#include +#include "dynarray.h" + +/* A list of types as they can appear in a .proto file. */ +enum pbstream_type { + PBSTREAM_TYPE_DOUBLE, + PBSTREAM_TYPE_FLOAT, + PBSTREAM_TYPE_INT32, + PBSTREAM_TYPE_INT64, + PBSTREAM_TYPE_UINT32, + PBSTREAM_TYPE_UINT64, + PBSTREAM_TYPE_SINT32, + PBSTREAM_TYPE_SINT64, + PBSTREAM_TYPE_FIXED32, + PBSTREAM_TYPE_FIXED64, + PBSTREAM_TYPE_SFIXED32, + PBSTREAM_TYPE_SFIXED64, + PBSTREAM_TYPE_BOOL, + PBSTREAM_TYPE_STRING, + PBSTREAM_TYPE_BYTES, + + PBSTREAM_TYPE_ENUM, + + PBSTREAM_TYPE_MESSAGE +}; + +/* A list of types as they are encoded on-the-wire. */ +enum pbstream_wire_type { + PBSTREAM_WIRE_TYPE_VARINT = 0, + PBSTREAM_WIRE_TYPE_64BIT = 1, + PBSTREAM_WIRE_TYPE_STRING = 2, + PBSTREAM_WIRE_TYPE_START_GROUP = 3, + PBSTREAM_WIRE_TYPE_END_GROUP = 4, + PBSTREAM_WIRE_TYPE_32BIT = 5, +}; + +/* Each field must have a cardinality that is one of the following. */ +enum pbstream_cardinality { + PBSTREAM_CARDINALITY_OPTIONAL, /* must appear 0 or 1 times */ + PBSTREAM_CARDINALITY_REQUIRED, /* must appear exactly 1 time */ + PBSTREAM_CARDINALITY_REPEATED, /* may appear 0 or more times */ +}; + +typedef int32_t pbstream_field_number_t; + +/* A deserialized value as described in a .proto file. */ +struct pbstream_value { + enum pbstream_type type; + union { + double _double; + float _float; + int32_t int32; + int64_t int64; + uint32_t uint32; + uint64_t uint64; + bool _bool; + struct { + char *data; /* This will be a pointer to the buffer of data the client provided. */ + int len; + } string; + struct { + char *data; /* This will be a pointer to the buffer of data the client provided. */ + int len; + } bytes; + int32_t _enum; + } v; +}; + +/* A value as it is encoded on-the-wire */ +struct pbstream_wire_value { + enum pbstream_wire_type type; + union { + uint64_t varint; + uint64_t _64bit; + struct { + char *data; /* This will be a pointer to the buffer of data the client provided. */ + int len; + } string; + uint32_t _32bit; + } v; +}; + +/* The definition of an enum as defined in a pbstream. For example: + * Corpus { + * UNIVERSAL = 0; + * WEB = 1; + * IMAGES = 2; + * LOCAL = 3; + * NEWS = 4; + * } + */ +struct pbstream_enum_descriptor { + char *name; + struct enum_value { + char *name; + int value; + } value; + DEFINE_DYNARRAY(values, struct enum_value); +}; + +/* The definition of a field as defined in a pbstream (within a message). + * For example: + * required int32 a = 1; + */ +struct pbstream_field_descriptor { + pbstream_field_number_t field_number; + char *name; + enum pbstream_type type; + enum pbstream_cardinality cardinality; + struct pbstream_value *default_value; /* NULL if none */ + + /* Index into the "seen" list for the message. -1 for repeated fields (for + * which we have no need to track whether it's been seen). */ + int seen_field_num; + + union extra_data { + struct pbstream_enum_descriptor *_enum; + struct pbstream_message_descriptor *message; + } d; +}; + +/* A message as defined by the "message" construct in a .proto file. */ +struct pbstream_message_descriptor { + char *name; /* does not include package name or parent message names. */ + char *full_name; + int num_seen_fields; /* How many fields we have to track "seen" information for. */ + DEFINE_DYNARRAY(fields, struct pbstream_field_descriptor); + DEFINE_DYNARRAY(messages, struct pbstream_message_descriptor); + DEFINE_DYNARRAY(enums, struct pbstream_enum_descriptor); +}; + +/* Callback for when a value is parsed that matches a field in the .proto file. + * */ +typedef void (*pbstream_value_callback_t)( + struct pbstream_field_descriptor *field_descriptor, + struct pbstream_value value, + void *user_data); + +/* Callback for when a value is parsed for which no field was defined in the + * .proto file. */ +typedef void (*pbstream_unknown_value_callback_t)( + pbstream_field_number_t field_number, + struct pbstream_wire_value *wire_value, + void *user_data); + +/* Callback for when a nested message is beginning. */ +typedef void (*pbstream_begin_message_callback_t)( + struct pbstream_message_descriptor *message_descriptor, + void *user_data); + +/* Callback for when a nested message is ending. */ +typedef void (*pbstream_end_message_callback_t)(void *user_data); + +/* Callback for when an error occurred. */ +enum pbstream_error { + PBSTREAM_ERROR_UNTERMINATED_VARINT, /* A varint did not terminate before hitting 64 bits. Fatal. */ + PBSTREAM_ERROR_MISSING_REQUIRED_FIELD, /* A field marked "required" was not present. */ + PBSTREAM_ERROR_DUPLICATE_FIELD, /* An optional or required field appeared more than once. */ + PBSTREAM_ERROR_MISMATCHED_TYPE, /* A field was encoded with the wrong wire type. */ + PBSTREAM_ERROR_BAD_SUBMESSAGE_END, /* A submessage ended in the middle of data. Indicates corruption. */ +}; +/* The description is a static buffer which the client must not free. The + * offset is the location in the input where the error was detected (this + * offset is relative to the beginning of the stream). If is_fatal is true, + * parsing cannot continue. */ +typedef void (*pbstream_error_callback_t)(enum pbstream_error error, char *description, + int offset, bool is_fatal); + +struct pbstream_callbacks { + pbstream_value_callback_t value_callback; + pbstream_unknown_value_callback_t unknown_value_callback; + pbstream_begin_message_callback_t begin_message_callback; + pbstream_end_message_callback_t end_message_callback; + pbstream_error_callback_t error_callback; +}; + +struct pbstream_parse_stack_frame { + struct pbstream_message_descriptor *message_descriptor; + int end_offset; /* We don't know this for the outermost frame, and set it to INT_MAX. */ + + /* For every field except repeated ones we track whether we have seen it or + * not. This lets us detect three important conditions: + * 1. the field has a default, but we did not see it anywhere (action: emit the default) + * 2. the field is required, but we did not see it anywhere (action: error) + * 3. the field is required or optional, but we saw it more than once (action: error) */ + DEFINE_DYNARRAY(seen_fields, bool); +}; + +/* The stream parser keeps this as its state. */ +struct pbstream_parse_state { + struct pbstream_callbacks callbacks; + int offset; + bool fatal_error; + void *user_data; + DEFINE_DYNARRAY(stack, struct pbstream_parse_stack_frame); +}; + +/* Call this once before parsing to initialize the data structures. + * message_type can be NULL, in which case all fields will be reported as + * unknown. */ +void pbstream_init_parser(struct pbstream_parse_state *state, + struct pbstream_message_descriptor *message_descriptor, + struct pbstream_callbacks *callbacks, + void *user_data); + +/* Call this to parse as much of buf as possible, calling callbacks as + * appropriate. buf need not be a complete pbstream. Returns the number of + * bytes consumed. In subsequent calls, buf should point to the first byte not + * consumed by previous calls. + * + * If need_more_bytes is non-zero when parse() returns, this indicates that the + * beginning of a string or sub-message was recognized, but not all bytes of + * the string were in memory. The string will not be successfully parsed (and + * thus parsing of the pbstream cannot proceed) unless need_more_bytes more + * data is available upon the next call to parse. The caller may need to + * increase its buffer size. */ +enum pbstream_status { + PBSTREAM_STATUS_OK = 0, + PBSTREAM_STATUS_INCOMPLETE = 1, /* buffer ended in the middle of a field. */ + PBSTREAM_STATUS_ERROR = 2, /* fatal error in the file, cannot recover. */ +}; + +enum pbstream_status pbstream_parse(struct pbstream_parse_state *state, + char *buf, int buf_len, + int *consumed_bytes, int *need_more_bytes); diff --git a/pbstream.o b/pbstream.o new file mode 100644 index 0000000000000000000000000000000000000000..7227f4a5db7f5557ca6bb2cc8e015da3c4f5db7d GIT binary patch literal 5920 zcmeHKeQZzLbQfAsHN>G_`XyXWhkd+xc(a8qoT)8SCWa426?mNTHZUZIPjXX8!w2!+}`CE!%o( zovjd~8*Wa6adL*NX7*9;_#0qwZoyj}QIrzFi-z%HJPzep!n=dpv_UFdT`(C=`@FR4-;J=JZ zp)4_nJ&N~xS-R8XtybR8fkO`gCI>T4WOAnmCD{T4xPaB{qdR@M;<9ZAG)m7eW_hFZ zH%+0b(6tzi`4=G{4oKN!u>Ghx4$0!Vny?p!sxeR1v4(e2M?r+C+?>W_PRyIHS-!(W-Cz^68f{bcZmb? zu$w;Pna$rX6qfFv)*7Y1Ya{e-3l^dIw?=EP&ss%q>~Lo=Mc&a4xJ~9gpRwD#=LMhY zp)U|?9T}Z6p2Quh#7YNa^sj`v{m2IhjrcuC1(EUMY~Xh zJ}CLFF#|5>;b7D{<_{DrKZ}*WnO?_wC{{m!|D*arEL1SF4jOJG*8Y09ZsXHs#GF%! zBNxTE_z9EbfR9iYO;^$MMsat1#TbdT3+@Jp&u)O!0mJADpmBvU_aS{`5=3wtv-8La zC2N-tM1r$_0I(?W4oaBE@d~2dw}8bv$$%`gc#hddP4t5%-bra zBbM3ZB$-zK#;Db+u)U9FHG0WAdLDdPgDRZC#?cRsKS8XvsvX;n&yvj3LFiKzgkHy% z3(ymg;fDFaJ3K5S1|Hy?-z((jG|``-GN!GaDztD7uC=SlH&U+{Tg{{EoyKO+Q?*}V zceVH0V(%3J*sFO-jnHYV743QL>$xk6156Y8u6<1MYIu`l?(MEI?KzT!-lOf?O|M`Y zqV%6_B(rG?FXt-Ka3^B*2fXH+;3Sqg=7-}q{JsdiZD$A?DWqmcYyHfzZ(zN7M}LIj zS>Y-SaSe1@=*ZY_OS^JlxvsqK!HztMO&OJ10?^8_E?G^Yrx#y~(FHa7-p33~gnyUwFO=XI%Q?I_-3S0;TrA59aC^x3j*)zwN8)wCmeW+oWon44f*-Ws)+kDC`be*l=z!Ts+4Dg` zrSzUet)UMYN^NJtNGP=(>9kVI&{V&TFh)NZ?@A?lb)`1h-`lJAfqY-Tq1S@@&1oZ% zEIm1h?PClTbTBQeB;7{NcjB3p3||S2vaE;?C%s9lGl>i#;v?B;Zyjj@M5FbGhhRS{wWe{e> z&vIULz7dZ5_MM!d_(LF18kb(d-xHs zHwv)30_P2ty*Yp>10B%(^+a!NQqhM7^rWG8#t(O=^tf?kKv#}-A2^yg5bxBJ{hfMz z>lO*vR;P4z_jSg*y7it;kV+?0-2+B{D&BD<-Z$9Wp{JAsx)I-27w^#-wRMZ24kc3E z@NNvG`jdJ(9ZwH-^unAtpvU#TP6ZFG}vql+7EL z>P3s3j0_cxn9W_)%;TP{I4q@jFZl~Uci@31gDB&VbBH_aR9tjunUzKnn z-!9?5R>DP`S0!B7&r7(F|Gk6@`DqEi%j21m@S7Z;Ujbi~aQ2roU@FJ)?Zx^R zqn;B2_i=uYgsYrCDB