Protocol Buffers - Google's data interchange format (grpc依赖) https://developers.google.com/protocol-buffers/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

380 lines
14 KiB

16 years ago
/*
* upb - a minimalist implementation of protocol buffers.
16 years ago
*
* Copyright (c) 2008-2009 Joshua Haberman. See LICENSE for details.
16 years ago
*/
#include "upb_parse.h"
#include <assert.h>
#include <stddef.h>
#include <stdlib.h>
16 years ago
#include <string.h>
#include "descriptor.h"
16 years ago
/* Branch prediction hints for GCC. */
#ifdef __GNUC__
#define likely(x) __builtin_expect((x),1)
#define unlikely(x) __builtin_expect((x),0)
#else
16 years ago
#define likely(x) (x)
#define unlikely(x) (x)
16 years ago
#endif
#define CHECK(func) do { \
upb_status_t status = func; \
if(status != UPB_STATUS_OK) return status; \
} while (0)
/* Lowest-level functions -- these read integers from the input buffer.
* To avoid branches, none of these do bounds checking. So we force clients
* to overallocate their buffers by >=9 bytes. */
16 years ago
static size_t min(size_t a, size_t b) { return a < b ? a : b; }
static upb_status_t get_v_uint64_t(void *restrict *buf, size_t len,
uint64_t *restrict val)
16 years ago
{
uint32_t bitpos, bytes = min(len, 10);
uint8_t *b = *buf;
uint8_t *end = b + bytes;
uint8_t last = 0x80;
*val = 0;
for(bitpos = 0; b < end && (last & 0x80); b++, bitpos += 7)
*val |= ((uint64_t)((last = *b) & 0x7F)) << bitpos;
if(unlikely(last & 0x80)) {
return bytes < 10 ? UPB_STATUS_NEED_MORE_DATA : UPB_ERROR_UNTERMINATED_VARINT;
} else {
*buf = b;
return UPB_STATUS_OK;
}
16 years ago
}
static upb_status_t skip_v_uint64_t(void **buf, size_t len)
{
uint32_t bytes = min(len, 10);
uint8_t *b = *buf;
uint8_t *end = b + bytes;
uint8_t last = 0x80;
for(; b < end && (last & 0x80); b++)
last = *b;
if(unlikely(b == end)) {
return bytes < 10 ? UPB_STATUS_NEED_MORE_DATA : UPB_ERROR_UNTERMINATED_VARINT;
} else {
*buf = b;
return UPB_STATUS_OK;
}
}
static upb_status_t get_v_uint32_t(void *restrict *buf, size_t len,
uint32_t *restrict val)
16 years ago
{
uint32_t bitpos, bytes = min(len, 5);
uint8_t *b = *buf;
uint8_t *end = b + bytes;
uint8_t last = 0x80;
*val = 0;
for(bitpos = 0; b < end && (last & 0x80); b++, bitpos += 7)
*val |= ((uint32_t)((last = *b) & 0x7F)) << bitpos;
if(unlikely(b == end)) {
return bytes < 5 ? UPB_STATUS_NEED_MORE_DATA : UPB_ERROR_UNTERMINATED_VARINT;
} else {
*buf = b;
return UPB_STATUS_OK;
}
16 years ago
}
#define SHL(val, bits) ((uint32_t)val << bits)
static upb_status_t get_f_uint32_t(void *restrict *buf, size_t len,
uint32_t *restrict val)
{
const uint8_t size = sizeof(uint32_t);
if(unlikely(len < size)) return UPB_STATUS_NEED_MORE_DATA;
uint8_t *b = *buf;
#if UPB_UNALIGNED_READS_OK
*val = *(uint32_t*)b;
#else
*val = SHL(b[0], 0) | SHL(b[1], 8) | SHL(b[2], 16) | SHL(b[3], 24);
#endif
b += size;
*buf = b;
return UPB_STATUS_OK;
}
#undef SHL
static upb_status_t skip_f_uint32_t(void **buf, size_t len)
{
const uint8_t size = sizeof(uint32_t);
if(unlikely(len < size)) return UPB_STATUS_NEED_MORE_DATA;
*buf = (char*)*buf + size;
return UPB_STATUS_OK;
16 years ago
}
static upb_status_t get_f_uint64_t(void *restrict *buf, size_t len,
uint64_t *restrict val)
16 years ago
{
if(unlikely(len < sizeof(uint64_t))) return UPB_STATUS_NEED_MORE_DATA;
#if UPB_UNALIGNED_READS_OK
*val = *(uint64_t*)*buf;
*buf = (char*)*buf + sizeof(uint64_t);
#else
uint32_t lo32, hi32;
get_f_uint32_t(buf, &lo32);
get_f_uint32_t(buf, &hi32);
*val = lo32 | ((uint64_t)hi32 << 32);
#endif
return UPB_STATUS_OK;
}
static upb_status_t skip_f_uint64_t(void **buf, size_t len)
{
const uint8_t size = sizeof(uint64_t);
if(unlikely(len < size)) return UPB_STATUS_NEED_MORE_DATA;
*buf = (char*)*buf + size;
return UPB_STATUS_OK;
16 years ago
}
static int32_t zz_decode_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
static int64_t zz_decode_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
16 years ago
/* Functions for reading wire values and converting them to values. These
* are generated with macros because they follow a higly consistent pattern. */
#define WVTOV(type, wire_t, val_t) \
static void wvtov_ ## type(wire_t s, val_t *d)
#define GET(type, v_or_f, wire_t, val_t, member_name) \
static upb_status_t get_ ## type(void **buf, size_t len, val_t *d) { \
wire_t tmp; \
CHECK(get_ ## v_or_f ## _ ## wire_t(buf, len, &tmp)); \
wvtov_ ## type(tmp, d); \
return UPB_STATUS_OK; \
}
16 years ago
#define T(type, v_or_f, wire_t, val_t, member_name) \
WVTOV(type, wire_t, val_t); /* prototype for GET below */ \
GET(type, v_or_f, wire_t, val_t, member_name) \
WVTOV(type, wire_t, val_t)
T(DOUBLE, f, uint64_t, double, _double) { memcpy(d, &s, sizeof(double)); }
T(FLOAT, f, uint32_t, float, _float) { memcpy(d, &s, sizeof(float)); }
T(INT32, v, uint32_t, int32_t, int32) { *d = (int32_t)s; }
T(INT64, v, uint64_t, int64_t, int64) { *d = (int64_t)s; }
T(UINT32, v, uint32_t, uint32_t, uint32) { *d = s; }
T(UINT64, v, uint64_t, uint64_t, uint64) { *d = s; }
T(SINT32, v, uint32_t, int32_t, int32) { *d = zz_decode_32(s); }
T(SINT64, v, uint64_t, int64_t, int64) { *d = zz_decode_64(s); }
T(FIXED32, f, uint32_t, uint32_t, uint32) { *d = s; }
T(FIXED64, f, uint64_t, uint64_t, uint64) { *d = s; }
T(SFIXED32, f, uint32_t, int32_t, int32) { *d = (int32_t)s; }
T(SFIXED64, f, uint64_t, int64_t, int64) { *d = (int64_t)s; }
T(BOOL, v, uint32_t, bool, _bool) { *d = (bool)s; }
T(ENUM, v, uint32_t, int32_t, int32) { *d = (int32_t)s; }
#undef WVTOV
#undef GET
#undef T
#define alignof(t) offsetof(struct { char c; t x; }, x)
/* May want to move this to upb.c if enough other things warrant it. */
struct upb_type_info upb_type_info[] = {
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE] = {alignof(double), sizeof(double), UPB_WIRE_TYPE_64BIT},
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT] = {alignof(float), sizeof(float), UPB_WIRE_TYPE_32BIT},
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64] = {alignof(int64_t), sizeof(int64_t), UPB_WIRE_TYPE_VARINT},
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64] = {alignof(uint64_t), sizeof(uint64_t), UPB_WIRE_TYPE_VARINT},
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32] = {alignof(int32_t), sizeof(int32_t), UPB_WIRE_TYPE_VARINT},
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64] = {alignof(uint64_t), sizeof(uint64_t), UPB_WIRE_TYPE_64BIT},
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32] = {alignof(uint32_t), sizeof(uint32_t), UPB_WIRE_TYPE_32BIT},
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL] = {alignof(bool), sizeof(bool), UPB_WIRE_TYPE_VARINT},
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING] = {alignof(struct upb_string), sizeof(struct upb_string), UPB_WIRE_TYPE_DELIMITED},
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES] = {alignof(struct upb_string), sizeof(struct upb_string), UPB_WIRE_TYPE_DELIMITED},
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP] = {0,0,0},
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE] = {alignof(void*), sizeof(void*), UPB_WIRE_TYPE_DELIMITED},
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32] = {alignof(uint32_t), sizeof(uint32_t), UPB_WIRE_TYPE_VARINT},
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM] = {alignof(uint32_t), sizeof(uint32_t), UPB_WIRE_TYPE_VARINT},
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32]= {alignof(int32_t), sizeof(int32_t), UPB_WIRE_TYPE_32BIT},
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64]= {alignof(int64_t), sizeof(int64_t), UPB_WIRE_TYPE_64BIT},
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32] = {alignof(int32_t), sizeof(int32_t), UPB_WIRE_TYPE_VARINT},
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64] = {alignof(int64_t), sizeof(int64_t), UPB_WIRE_TYPE_VARINT},
};
16 years ago
upb_status_t upb_parse_tag(void **buf, size_t len, struct upb_tag *tag)
{
uint32_t tag_int;
CHECK(get_v_uint32_t(buf, len, &tag_int));
tag->wire_type = (upb_wire_type_t)(tag_int & 0x07);
tag->field_number = tag_int >> 3;
return UPB_STATUS_OK;
}
16 years ago
upb_status_t upb_parse_wire_value(void *buf, size_t len, size_t *offset,
upb_wire_type_t wt,
union upb_wire_value *wv)
{
#define READ(expr) CHECK(expr); *offset += ((char*)b-(char*)buf)
void *b = buf;
switch(wt) {
case UPB_WIRE_TYPE_VARINT: READ(get_v_uint64_t(&b, len, &wv->varint)); break;
case UPB_WIRE_TYPE_64BIT: READ(get_f_uint64_t(&b, len, &wv->_64bit)); break;
case UPB_WIRE_TYPE_32BIT: READ(get_f_uint32_t(&b, len, &wv->_32bit)); break;
case UPB_WIRE_TYPE_DELIMITED:
READ(get_v_uint32_t(&b, len, &wv->_32bit));
size_t new_offset = *offset + wv->_32bit;
if (new_offset < *offset) return UPB_ERROR_OVERFLOW;
16 years ago
*offset = new_offset;
break;
case UPB_WIRE_TYPE_START_GROUP:
case UPB_WIRE_TYPE_END_GROUP: break;
}
return UPB_STATUS_OK;
}
upb_status_t upb_skip_wire_value(void *buf, size_t len, size_t *offset,
upb_wire_type_t wt)
{
void *b = buf;
switch(wt) {
case UPB_WIRE_TYPE_VARINT: READ(skip_v_uint64_t(&b, len)); break;
case UPB_WIRE_TYPE_64BIT: READ(skip_f_uint64_t(&b, len)); break;
case UPB_WIRE_TYPE_32BIT: READ(skip_f_uint32_t(&b, len)); break;
case UPB_WIRE_TYPE_DELIMITED: {
/* Have to get (not skip) the length to skip the bytes. */
uint32_t delim_len;
READ(get_v_uint32_t(&b, len, &delim_len));
size_t new_offset = *offset + delim_len;
if (new_offset < *offset) return UPB_ERROR_OVERFLOW;
16 years ago
*offset = new_offset;
16 years ago
break;
}
case UPB_WIRE_TYPE_START_GROUP: /* TODO: skip to matching end group. */
case UPB_WIRE_TYPE_END_GROUP: break;
16 years ago
}
return UPB_STATUS_OK;
#undef READ
16 years ago
}
upb_status_t upb_parse_value(void **b, size_t len, upb_field_type_t ft,
union upb_value *v)
{
#define CASE(t, member_name) \
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## t: \
return get_ ## t(b, len, &v->member_name);
switch(ft) {
CASE(DOUBLE, _double)
CASE(FLOAT, _float)
CASE(INT32, int32)
CASE(INT64, int64)
CASE(UINT32, uint32)
CASE(UINT64, uint64)
CASE(SINT32, int32)
CASE(SINT64, int64)
CASE(FIXED32, uint32)
CASE(FIXED64, uint64)
CASE(SFIXED32, int32)
CASE(SFIXED64, int64)
CASE(BOOL, _bool)
CASE(ENUM, int32)
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES:
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING:
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE:
return get_INT32(b, len, &v->int32);
default: return 0; /* Including GROUP -- groups have no value. */
}
#undef CASE
}
void upb_parse_state_init(struct upb_parse_state *state, size_t udata_size)
{
state->offset = 0;
size_t stack_bytes = (sizeof(*state->stack) + udata_size) * UPB_MAX_NESTING;
state->stack = state->top = malloc(stack_bytes);
state->limit = (struct upb_parse_stack_frame*)((char*)state->stack + stack_bytes);
state->udata_size = udata_size;
state->done = false;
state->packed_end_offset = 0;
}
void upb_parse_state_free(struct upb_parse_state *state)
{
free(state->stack);
}
static void pop_stack_frame(struct upb_parse_state *s)
{
s->submsg_end_cb(s);
s->top--;
s->top = (struct upb_parse_stack_frame*)((char*)s->top - s->udata_size);
}
static upb_status_t push_stack_frame(struct upb_parse_state *s, size_t end,
void *user_field_desc)
{
s->top++;
s->top = (struct upb_parse_stack_frame*)((char*)s->top + s->udata_size);
if(unlikely(s->top > s->limit)) return UPB_ERROR_STACK_OVERFLOW;
s->top->end_offset = end;
s->submsg_start_cb(s, user_field_desc);
return UPB_STATUS_OK;
}
#if 0
upb_status_t upb_parse(struct upb_parse_state *s, void *buf, size_t len,
size_t *read)
{
size_t start_offset = s->offset;
size_t end_offset = start_offset + len;
while(!s->done && s->offset < end_offset) {
while(s->offset >= s->top->end_offset) pop_stack_frame(s);
while(s->packed_end_offset > s->offset) {
/* Parse a packed field entry. */
}
struct upb_tag tag;
void *b = buf;
CHECK(upb_parse_tag(&b, len, &tag));
int tag_bytes = ((char*)b - (char*)buf);
s->offset += tag_bytes;
buf = b;
if(unlikely(tag.wire_type == UPB_WIRE_TYPE_END_GROUP)) {
if(unlikely(s->top->end_offset != 0)) return UPB_ERROR_SPURIOUS_END_GROUP;
pop_stack_frame(s);
continue;
}
void *user_field_desc;
//upb_field_type_t ft = s->tag_cb(s, &tag, &user_field_desc);
if(ft == 0) {
CHECK(upb_skip_wire_value(b, &s->offset, tag.wire_type));
} else if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP) {
/* No length specified, an "end group" tag will mark the end. */
push_stack_frame(s, 0, user_field_desc);
} else {
/* For all other cases we parse the next value. */
union upb_value v;
CHECK(upb_parse_value(&b, ft, &v));
if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE) {
/* The value we parsed is the length of the submessage. */
push_stack_frame(s, s->offset + v.delim_len, user_field_desc);
} else if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING ||
ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES) {
s->value_cb(s, &v, b, user_field_desc);
b = (char*)b + v.delim_len;
} else if(tag.wire_type == UPB_WIRE_TYPE_DELIMITED) {
/* Delimited data which is not a string, bytes, or a submessage.
* It must be a packed array. */
s->packed_type = ft;
s->packed_end_offset = s->offset + v.delim_len;
} else {
/* The common case: a simple value. */
s->value_cb(s, &v, b, user_field_desc);
}
}
}
*read = s->offset - start_offset;
return UPB_STATUS_OK;
}
#endif