From 75ee6df2ff6509e0d8d92e26c613cfb24d3828e2 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 21 Feb 2009 00:07:33 -0800 Subject: [PATCH] Newfound respect for 80 characters and for terseness. --- pbstream.c | 101 +++++++++++++++++++++++------------------------------ pbstream.h | 63 ++++++++++++++++++--------------- 2 files changed, 77 insertions(+), 87 deletions(-) diff --git a/pbstream.c b/pbstream.c index 991be7fa51..070e42987e 100644 --- a/pbstream.c +++ b/pbstream.c @@ -1,7 +1,7 @@ /* * pbstream - a stream-oriented implementation of protocol buffers. * - * Copyright (c) 2008 Joshua Haberman. See LICENSE for details. + * Copyright (c) 2008-2009 Joshua Haberman. See LICENSE for details. */ #include @@ -48,8 +48,8 @@ enum pbstream_status get_varint(char **buf, char *end, uint64_t *out_value) char *b = *buf; /* Because we don't check for buffer overrun inside the loop, we require - * that callers use a buffer that is overallocated by at least 9 bytes - * (the maximum we can overrun before the bitpos check catches the problem). */ + * that callers use a buffer that is overallocated by at least 9 bytes (the + * maximum we can overrun before the bitpos check catches the problem). */ for(; *b & 0x80 && bitpos < 64; bitpos += 7, b++) *out_value |= (uint64_t)(*b & 0x7F) << bitpos; @@ -59,13 +59,8 @@ enum pbstream_status get_varint(char **buf, char *end, uint64_t *out_value) *out_value |= (uint64_t)(*b & 0x7F) << bitpos; b++; - if(unlikely(bitpos >= 64)) { - return PBSTREAM_ERROR_UNTERMINATED_VARINT; - } - if(unlikely(b > end)) { - return PBSTREAM_STATUS_INCOMPLETE; - } - + if(unlikely(bitpos >= 64)) return PBSTREAM_ERROR_UNTERMINATED_VARINT; + if(unlikely(b > end)) return PBSTREAM_STATUS_INCOMPLETE; *buf = b; return PBSTREAM_STATUS_OK; } @@ -78,9 +73,7 @@ enum pbstream_status get_32_le(char **buf, char *end, uint32_t *out_value) { char *b = *buf; char *int32_end = b+4; - if(unlikely(int32_end > end)) - return PBSTREAM_STATUS_INCOMPLETE; - + if(unlikely(int32_end > end)) return PBSTREAM_STATUS_INCOMPLETE; #if __BYTE_ORDER == __LITTLE_ENDIAN *out_value = *(uint32_t*)b; #else @@ -94,9 +87,7 @@ bool get_64_le(char **buf, char *end, uint64_t *out_value) { char *b = *buf; char *int64_end = b+8; - if(unlikely(int64_end > end)) - return PBSTREAM_STATUS_INCOMPLETE; - + if(unlikely(int64_end > end)) return PBSTREAM_STATUS_INCOMPLETE; #if __BYTE_ORDER == __LITTLE_ENDIAN *out_value = *(uint64_t*)buf; #else @@ -126,10 +117,11 @@ int64_t zigzag_decode_64(uint64_t n) * requred in *need_more_bytes. Updates *buf to point past the end of the * parsed data if the operation was successful. */ -enum pbstream_status pbstream_parse_wire_value(char **buf, char *end, - pbstream_field_number_t *field_number, - struct pbstream_wire_value *wire_value, - int *need_more_bytes) +enum pbstream_status pbstream_parse_wire_value( + char **buf, char *end, + pbstream_field_number_t *field_number, + struct pbstream_wire_value *wire_value, + int *need_more_bytes) { char *b = *buf; /* Our local buf pointer -- only update buf if we succeed. */ @@ -148,8 +140,7 @@ enum pbstream_status pbstream_parse_wire_value(char **buf, char *end, *field_number = key >> 3; wire_value->type = key & 0x07; - switch(wire_value->type) - { + switch(wire_value->type) { case PBSTREAM_WIRE_TYPE_VARINT: DECODE(wire_value->v.varint, get_varint); break; @@ -158,11 +149,10 @@ enum pbstream_status pbstream_parse_wire_value(char **buf, char *end, DECODE(wire_value->v._64bit, get_64_le); break; - case PBSTREAM_WIRE_TYPE_STRING: - { + case PBSTREAM_WIRE_TYPE_STRING: { uint64_t string_len; DECODE(string_len, get_varint); - if (string_len > INT_MAX) { + if (unlikely(string_len > INT_MAX)) { /* TODO: notice this and fail. */ } wire_value->v.string.len = (int)string_len; @@ -193,8 +183,8 @@ enum pbstream_status pbstream_parse_wire_value(char **buf, char *end, * already checked that the wire_value is of the correct type. The pbstream * type must not be PBSTREAM_TYPE_MESSAGE. This operation always succeeds. */ void pbstream_translate_field(struct pbstream_wire_value *wire_value, - enum pbstream_type type, - struct pbstream_value *out_value) + enum pbstream_type type, + struct pbstream_value *out_value) { out_value->type = type; switch(type) { @@ -286,7 +276,7 @@ void process_value(struct pbstream_parse_state *s, { /* Check that the wire type is appropriate for this .proto type. */ if(unlikely(wire_value->type != expected_wire_type[field_descriptor->type])) { - /* Report the type mismatch error. */ + /* Type mismatch. */ if(s->callbacks.error_callback) { /* TODO: a nice formatted message. */ s->callbacks.error_callback(PBSTREAM_ERROR_MISMATCHED_TYPE, NULL, @@ -295,7 +285,8 @@ void process_value(struct pbstream_parse_state *s, /* Report the wire value we parsed as an unknown value. */ if(s->callbacks.unknown_value_callback) { - s->callbacks.unknown_value_callback(field_descriptor->field_number, wire_value, + s->callbacks.unknown_value_callback(field_descriptor->field_number, + wire_value, s->user_data); } return; @@ -304,7 +295,8 @@ void process_value(struct pbstream_parse_state *s, if(field_descriptor->type == PBSTREAM_TYPE_MESSAGE) { /* We're entering a sub-message. */ if(s->callbacks.begin_message_callback) { - s->callbacks.begin_message_callback(field_descriptor->d.message, s->user_data); + s->callbacks.begin_message_callback(field_descriptor->d.message, + s->user_data); } /* Push and initialize a new stack frame. */ @@ -314,13 +306,12 @@ void process_value(struct pbstream_parse_state *s, frame->end_offset = 0; /* TODO: set this correctly. */ int num_seen_fields = frame->message_descriptor->num_seen_fields; INIT_DYNARRAY(frame->seen_fields, num_seen_fields, num_seen_fields); - } - else { + } else { /* This is a scalar value. */ struct pbstream_value value; pbstream_translate_field(wire_value, field_descriptor->type, &value); if(s->callbacks.value_callback) { - s->callbacks.value_callback(field_descriptor, value, s->user_data); + s->callbacks.value_callback(field_descriptor, &value, s->user_data); } } } @@ -329,8 +320,8 @@ struct pbstream_field_descriptor *find_field_descriptor_by_number( struct pbstream_message_descriptor* message_descriptor, pbstream_field_number_t field_number) { - /* Currently a linear search -- could be optimized to do a binary search, - * hash table lookup, or any other number of clever things you might imagine. */ + /* Currently a linear search -- could be optimized to do a binary search, hash + * table lookup, or any other number of clever things you might imagine. */ for (int i = 0; i < message_descriptor->fields_len; i++) if (message_descriptor->fields[i].field_number == field_number) return &message_descriptor->fields[i]; @@ -347,8 +338,8 @@ enum pbstream_status pbstream_parse_field(struct pbstream_parse_state *s, int *need_more_bytes) { struct pbstream_parse_stack_frame *frame = DYNARRAY_GET_TOP(s->stack); - struct pbstream_message_descriptor *message_descriptor = frame->message_descriptor; - + struct pbstream_message_descriptor *message_descriptor = + frame->message_descriptor; pbstream_field_number_t field_number; struct pbstream_wire_value wire_value; enum pbstream_status status; @@ -358,7 +349,8 @@ enum pbstream_status pbstream_parse_field(struct pbstream_parse_state *s, need_more_bytes); if(unlikely(status != PBSTREAM_STATUS_OK)) { - if(status == PBSTREAM_ERROR_UNTERMINATED_VARINT && s->callbacks.error_callback) { + if(status == PBSTREAM_ERROR_UNTERMINATED_VARINT && + s->callbacks.error_callback) { /* TODO: a nice formatted message. */ s->callbacks.error_callback(PBSTREAM_ERROR_UNTERMINATED_VARINT, NULL, s->offset, true); @@ -369,36 +361,33 @@ enum pbstream_status pbstream_parse_field(struct pbstream_parse_state *s, /* Find the corresponding field definition from the .proto file. */ struct pbstream_field_descriptor *field_descriptor; - field_descriptor = find_field_descriptor_by_number(message_descriptor, field_number); - + field_descriptor = find_field_descriptor_by_number(message_descriptor, + field_number); if(likely(field_descriptor != NULL)) { - if(field_descriptor->seen_field_num > 0) { - /* Check that this field has not been seen before (unless it's a repeated field) */ - if(frame->seen_fields[field_descriptor->seen_field_num] && - field_descriptor->cardinality != PBSTREAM_CARDINALITY_REPEATED) { - if(s->callbacks.error_callback) { + if(field_descriptor->seen_field_num > 0) { /* for non-repeated fields */ + /* Check that this field has not been seen before. */ + if(frame->seen_fields[field_descriptor->seen_field_num]) { + if(s->callbacks.error_callback) s->callbacks.error_callback(PBSTREAM_ERROR_DUPLICATE_FIELD, NULL, s->offset, false); - } return PBSTREAM_STATUS_ERROR; } - /* Mark the field as seen. */ frame->seen_fields[field_descriptor->seen_field_num] = true; } process_value(s, &wire_value, field_descriptor); } else { /* This field was not defined in the .proto file. */ - if(s->callbacks.unknown_value_callback) { - s->callbacks.unknown_value_callback(field_number, &wire_value, s->user_data); - } + if(s->callbacks.unknown_value_callback) + s->callbacks.unknown_value_callback(field_number, &wire_value, + s->user_data); } return PBSTREAM_STATUS_OK; } /* Process actions associated with the end of a submessage. This includes: - * - emittin default values for all optional elements (either explicit + * - emitting default values for all optional elements (either explicit * defaults or implicit defaults). * - emitting errors for any required fields that were not seen. * - calling the user's callback. @@ -413,15 +402,13 @@ void process_submessage_end(struct pbstream_parse_state *s) /* Process the end of message by calling the user's callback and popping * our stack frame. */ - if(s->callbacks.end_message_callback) { + if(s->callbacks.end_message_callback) s->callbacks.end_message_callback(s->user_data); - } /* Pop the stack frame associated with this submessage. */ RESIZE_DYNARRAY(s->stack, s->stack_len-1); } -/* The user-exposed parsing function -- see the header file for documentation. */ enum pbstream_status pbstream_parse(struct pbstream_parse_state *s, char *buf_start, int buf_len, int *consumed_bytes, int *need_more_bytes) @@ -434,9 +421,8 @@ enum pbstream_status pbstream_parse(struct pbstream_parse_state *s, while(buf < end) { /* Check for a submessage ending. */ while(s->offset >= DYNARRAY_GET_TOP(s->stack)->end_offset) { - /* A submessage should end exactly at a field boundary. If we find that - * the submessage length indicated an end in the middle of a field, that - * is an error that indicates data corruption, and we refuse to proceed. */ + /* A submessage that doesn't end exactly on a field boundary indicates + * corruption. */ if(unlikely(s->offset != DYNARRAY_GET_TOP(s->stack)->end_offset)) { if(s->callbacks.error_callback) { s->callbacks.error_callback(PBSTREAM_ERROR_BAD_SUBMESSAGE_END, NULL, @@ -451,7 +437,6 @@ enum pbstream_status pbstream_parse(struct pbstream_parse_state *s, status = pbstream_parse_field(s, &buf, end, need_more_bytes); if(status != PBSTREAM_STATUS_OK) break; - s->offset = buf_start_offset + (buf - buf_start); } return status; diff --git a/pbstream.h b/pbstream.h index 2d6694a7a5..32da913506 100644 --- a/pbstream.h +++ b/pbstream.h @@ -63,11 +63,11 @@ struct pbstream_value { uint64_t uint64; bool _bool; struct { - char *data; /* This will be a pointer to the buffer of data the client provided. */ + char *data; /* points into the client's input buffer */ int len; } string; struct { - char *data; /* This will be a pointer to the buffer of data the client provided. */ + char *data; /* points into the client's input buffer */ int len; } bytes; int32_t _enum; @@ -81,7 +81,7 @@ struct pbstream_wire_value { uint64_t varint; uint64_t _64bit; struct { - char *data; /* This will be a pointer to the buffer of data the client provided. */ + char *data; /* points into the client's input buffer */ int len; } string; uint32_t _32bit; @@ -129,23 +129,21 @@ struct pbstream_field_descriptor { /* A message as defined by the "message" construct in a .proto file. */ struct pbstream_message_descriptor { - char *name; /* does not include package name or parent message names. */ + char *name; /* does not include package name or parent message names */ char *full_name; - int num_seen_fields; /* How many fields we have to track "seen" information for. */ + int num_seen_fields; /* fields we have to track "seen" information for */ DEFINE_DYNARRAY(fields, struct pbstream_field_descriptor); DEFINE_DYNARRAY(messages, struct pbstream_message_descriptor); DEFINE_DYNARRAY(enums, struct pbstream_enum_descriptor); }; -/* Callback for when a value is parsed that matches a field in the .proto file. - * */ +/* Callback for when a regular value is parsed. */ typedef void (*pbstream_value_callback_t)( struct pbstream_field_descriptor *field_descriptor, - struct pbstream_value value, + struct pbstream_value *value, void *user_data); -/* Callback for when a value is parsed for which no field was defined in the - * .proto file. */ +/* Callback for when a value is parsed but wasn't in the .proto file. */ typedef void (*pbstream_unknown_value_callback_t)( pbstream_field_number_t field_number, struct pbstream_wire_value *wire_value, @@ -161,17 +159,27 @@ typedef void (*pbstream_end_message_callback_t)(void *user_data); /* Callback for when an error occurred. */ enum pbstream_error { - PBSTREAM_ERROR_UNTERMINATED_VARINT, /* A varint did not terminate before hitting 64 bits. Fatal. */ - PBSTREAM_ERROR_MISSING_REQUIRED_FIELD, /* A field marked "required" was not present. */ - PBSTREAM_ERROR_DUPLICATE_FIELD, /* An optional or required field appeared more than once. */ - PBSTREAM_ERROR_MISMATCHED_TYPE, /* A field was encoded with the wrong wire type. */ - PBSTREAM_ERROR_BAD_SUBMESSAGE_END, /* A submessage ended in the middle of data. Indicates corruption. */ + /* A varint did not terminate before hitting 64 bits. Fatal. */ + PBSTREAM_ERROR_UNTERMINATED_VARINT, + + /* A field marked "required" was not present. */ + PBSTREAM_ERROR_MISSING_REQUIRED_FIELD, + + /* An optional or required field appeared more than once. */ + PBSTREAM_ERROR_DUPLICATE_FIELD, + + /* A field was encoded with the wrong wire type. */ + PBSTREAM_ERROR_MISMATCHED_TYPE, + + /* A submessage ended in the middle of data. Indicates corruption. */ + PBSTREAM_ERROR_BAD_SUBMESSAGE_END, }; /* The description is a static buffer which the client must not free. The * offset is the location in the input where the error was detected (this * offset is relative to the beginning of the stream). If is_fatal is true, * parsing cannot continue. */ -typedef void (*pbstream_error_callback_t)(enum pbstream_error error, char *description, +typedef void (*pbstream_error_callback_t)(enum pbstream_error error, + char *description, int offset, bool is_fatal); struct pbstream_callbacks { @@ -184,17 +192,13 @@ struct pbstream_callbacks { struct pbstream_parse_stack_frame { struct pbstream_message_descriptor *message_descriptor; - int end_offset; /* We don't know this for the outermost frame, and set it to INT_MAX. */ + int end_offset; /* unknown for the top frame, so we set to INT_MAX */ - /* For every field except repeated ones we track whether we have seen it or - * not. This lets us detect three important conditions: - * 1. the field has a default, but we did not see it anywhere (action: emit the default) - * 2. the field is required, but we did not see it anywhere (action: error) - * 3. the field is required or optional, but we saw it more than once (action: error) */ + /* Tracks whether we've seen non-repeated fields. */ DEFINE_DYNARRAY(seen_fields, bool); }; -/* The stream parser keeps this as its state. */ +/* The stream parser's state. */ struct pbstream_parse_state { struct pbstream_callbacks callbacks; int offset; @@ -206,10 +210,11 @@ struct pbstream_parse_state { /* Call this once before parsing to initialize the data structures. * message_type can be NULL, in which case all fields will be reported as * unknown. */ -void pbstream_init_parser(struct pbstream_parse_state *state, - struct pbstream_message_descriptor *message_descriptor, - struct pbstream_callbacks *callbacks, - void *user_data); +void pbstream_init_parser( + struct pbstream_parse_state *state, + struct pbstream_message_descriptor *message_descriptor, + struct pbstream_callbacks *callbacks, + void *user_data); /* Call this to parse as much of buf as possible, calling callbacks as * appropriate. buf need not be a complete pbstream. Returns the number of @@ -224,8 +229,8 @@ void pbstream_init_parser(struct pbstream_parse_state *state, * increase its buffer size. */ enum pbstream_status { PBSTREAM_STATUS_OK = 0, - PBSTREAM_STATUS_INCOMPLETE = 1, /* buffer ended in the middle of a field. */ - PBSTREAM_STATUS_ERROR = 2, /* fatal error in the file, cannot recover. */ + PBSTREAM_STATUS_INCOMPLETE = 1, /* buffer ended in the middle of a field */ + PBSTREAM_STATUS_ERROR = 2, /* fatal error in the file, cannot recover */ }; enum pbstream_status pbstream_parse(struct pbstream_parse_state *state,