@ -1,14 +1,13 @@
/*
* pbstream - a stream - oriented implementation of protocol buffers .
* u pb - a minimalist implementation of protocol buffers .
*
* Copyright ( c ) 2008 - 2009 Joshua Haberman . See LICENSE for details .
*/
# include "upb_parse.h"
# include <assert.h>
# include <stdlib.h>
# include <string.h>
# include "pbstream.h"
# include "pbstream_lowlevel.h"
/* Branch prediction hints for GCC. */
# ifdef __GNUC__
@ -20,16 +19,16 @@
# endif
# define CHECK(func) do { \
pbstream _status_t status = func ; \
if ( status ! = PBSTREAM _STATUS_OK ) return status ; \
u pb_status_t status = func ; \
if ( status ! = U PB_STATUS_OK) return status ; \
} while ( 0 )
/* Lowest-level functions -- these read integers from the input buffer.
* To avoid branches , none of these do bounds checking . So we force clients
* to overallocate their buffers by > = 9 bytes . */
static pbstream _status_t get_v_uint64_t ( uint8_t * restrict * buf ,
uint64_t * restrict val )
static u pb_status_t get_v_uint64_t ( uint8_t * restrict * buf ,
uint64_t * restrict val )
{
uint8_t * ptr = * buf , b ;
uint32_t part0 = 0 , part1 = 0 , part2 = 0 ;
@ -45,15 +44,15 @@ static pbstream_status_t get_v_uint64_t(uint8_t *restrict *buf,
b = * ( ptr + + ) ; part1 | = ( b & 0x7F ) < < 21 ; if ( ! ( b & 0x80 ) ) goto done ;
b = * ( ptr + + ) ; part2 = ( b & 0x7F ) ; if ( ! ( b & 0x80 ) ) goto done ;
b = * ( ptr + + ) ; part2 | = ( b & 0x7F ) < < 7 ; if ( ! ( b & 0x80 ) ) goto done ;
return PBSTREAM _ERROR_UNTERMINATED_VARINT ;
return U PB_ERROR_UNTERMINATED_VARINT;
done :
* buf = ptr ;
* val = ( uint64_t ) part0 | ( ( uint64_t ) part1 < < 28 ) | ( ( uint64_t ) part2 < < 56 ) ;
return PBSTREAM _STATUS_OK ;
return U PB_STATUS_OK;
}
static pbstream _status_t skip_v_uint64_t ( uint8_t * * buf )
static u pb_status_t skip_v_uint64_t ( uint8_t * * buf )
{
uint8_t * ptr = * buf , b ;
b = * ( ptr + + ) ; if ( ! ( b & 0x80 ) ) goto done ;
@ -66,15 +65,15 @@ static pbstream_status_t skip_v_uint64_t(uint8_t **buf)
b = * ( ptr + + ) ; if ( ! ( b & 0x80 ) ) goto done ;
b = * ( ptr + + ) ; if ( ! ( b & 0x80 ) ) goto done ;
b = * ( ptr + + ) ; if ( ! ( b & 0x80 ) ) goto done ;
return PBSTREAM _ERROR_UNTERMINATED_VARINT ;
return U PB_ERROR_UNTERMINATED_VARINT;
done :
* buf = ( uint8_t * ) ptr ;
return PBSTREAM _STATUS_OK ;
return U PB_STATUS_OK;
}
static pbstream _status_t get_v_uint32_t ( uint8_t * restrict * buf ,
uint32_t * restrict val )
static u pb_status_t get_v_uint32_t ( uint8_t * restrict * buf ,
uint32_t * restrict val )
{
uint8_t * ptr = * buf , b ;
uint32_t result ;
@ -85,33 +84,33 @@ static pbstream_status_t get_v_uint32_t(uint8_t *restrict *buf,
b = * ( ptr + + ) ; result | = ( b & 0x7F ) < < 14 ; if ( ! ( b & 0x80 ) ) goto done ;
b = * ( ptr + + ) ; result | = ( b & 0x7F ) < < 21 ; if ( ! ( b & 0x80 ) ) goto done ;
b = * ( ptr + + ) ; result = ( b & 0x7F ) < < 28 ; if ( ! ( b & 0x80 ) ) goto done ;
return PBSTREAM _ERROR_UNTERMINATED_VARINT ;
return U PB_ERROR_UNTERMINATED_VARINT;
done :
* buf = ptr ;
* val = result ;
return PBSTREAM _STATUS_OK ;
return U PB_STATUS_OK;
}
static pbstream _status_t get_f_uint32_t ( uint8_t * restrict * buf ,
uint32_t * restrict val )
static u pb_status_t get_f_uint32_t ( uint8_t * restrict * buf ,
uint32_t * restrict val )
{
uint8_t * b = * buf ;
# define SHL(val, bits) ((uint32_t)val << bits)
* val = SHL ( b [ 0 ] , 0 ) | SHL ( b [ 1 ] , 8 ) | SHL ( b [ 2 ] , 16 ) | SHL ( b [ 3 ] , 24 ) ;
# undef SHL
* buf + = sizeof ( uint32_t ) ;
return PBSTREAM _STATUS_OK ;
return U PB_STATUS_OK;
}
static pbstream _status_t skip_f_uint32_t ( uint8_t * * buf )
static u pb_status_t skip_f_uint32_t ( uint8_t * * buf )
{
* buf + = sizeof ( uint32_t ) ;
return PBSTREAM _STATUS_OK ;
return U PB_STATUS_OK;
}
static pbstream _status_t get_f_uint64_t ( uint8_t * restrict * buf ,
uint64_t * restrict val )
static u pb_status_t get_f_uint64_t ( uint8_t * restrict * buf ,
uint64_t * restrict val )
{
uint8_t * b = * buf ;
/* TODO: is this worth 32/64 specializing? */
@ -120,13 +119,13 @@ static pbstream_status_t get_f_uint64_t(uint8_t *restrict *buf,
SHL ( b [ 4 ] , 32 ) | SHL ( b [ 5 ] , 40 ) | SHL ( b [ 6 ] , 48 ) | SHL ( b [ 7 ] , 56 ) ;
# undef SHL
* buf + = sizeof ( uint64_t ) ;
return PBSTREAM _STATUS_OK ;
return U PB_STATUS_OK;
}
static pbstream _status_t skip_f_uint64_t ( uint8_t * * buf )
static u pb_status_t skip_f_uint64_t ( uint8_t * * buf )
{
* buf + = sizeof ( uint64_t ) ;
return PBSTREAM _STATUS_OK ;
return U PB_STATUS_OK;
}
static int32_t zz_decode_32 ( uint32_t n ) { return ( n > > 1 ) ^ - ( int32_t ) ( n & 1 ) ; }
@ -139,15 +138,15 @@ static int64_t zz_decode_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
static void wvtov_ # # type ( wire_t s , val_t * d )
# define GET(type, v_or_f, wire_t, val_t, member_name) \
static pbstream _status_t get_ # # type ( struct pbstream _parse_state * s , \
static u pb_status_t get_ # # type ( struct u pb_parse_state * s , \
uint8_t * buf , \
struct pbstream _tagged_value * d ) { \
struct u pb_tagged_value * d ) { \
wire_t tmp ; \
uint8_t * b = buf ; \
CHECK ( get_ # # v_or_f # # _ # # wire_t ( & b , & tmp ) ) ; \
wvtov_ # # type ( tmp , & d - > v . member_name ) ; \
s - > offset + = ( b - buf ) ; \
return PBSTREAM _STATUS_OK ; \
return U PB_STATUS_OK; \
}
# define T(type, v_or_f, wire_t, val_t, member_name) \
@ -173,28 +172,28 @@ T(ENUM, v, uint32_t, int32_t, int32) { *d = (int32_t)s; }
# undef GET
# undef T
static void wvtov_delimited ( uint32_t s , struct pbstream _delimited * d , size_t o )
static void wvtov_delimited ( uint32_t s , struct u pb_delimited * d , size_t o )
{
d - > offset = o ;
d - > len = s ;
}
/* Use BYTES version for both STRING and BYTES, leave UTF-8 checks to client. */
static pbstream _status_t get_BYTES ( struct pbstream _parse_state * s , uint8_t * buf ,
struct pbstream _tagged_value * d ) {
static u pb_status_t get_BYTES ( struct u pb_parse_state * s , uint8_t * buf ,
struct u pb_tagged_value * d ) {
uint32_t tmp ;
uint8_t * b = buf ;
CHECK ( get_v_uint32_t ( & b , & tmp ) ) ;
s - > offset + = ( b - buf ) ; /* advance past length varint. */
wvtov_delimited ( tmp , & d - > v . delimited , s - > offset ) ;
size_t new_offset = s - > offset + d - > v . delimited . len ; /* skip bytes */
if ( unlikely ( new_offset < s - > offset ) ) return PBSTREAM _ERROR_OVERFLOW ;
if ( unlikely ( new_offset < s - > offset ) ) return U PB_ERROR_OVERFLOW;
s - > offset = new_offset ;
return PBSTREAM _STATUS_OK ;
return U PB_STATUS_OK;
}
static pbstream _status_t get_MESSAGE ( struct pbstream _parse_state * s , uint8_t * buf ,
struct pbstream _tagged_value * d ) {
static u pb_status_t get_MESSAGE ( struct u pb_parse_state * s , uint8_t * buf ,
struct u pb_tagged_value * d ) {
/* We're entering a sub-message. */
uint32_t tmp ;
uint8_t * b = buf ;
@ -202,146 +201,139 @@ static pbstream_status_t get_MESSAGE(struct pbstream_parse_state *s, uint8_t *bu
s - > offset + = ( b - buf ) ; /* advance past length varint. */
wvtov_delimited ( tmp , & d - > v . delimited , s - > offset ) ;
/* Unlike STRING and BYTES, we *don't* advance past delimited here. */
if ( unlikely ( + + s - > top = = s - > limit ) ) return PBSTREAM _ERROR_STACK_OVERFLOW ;
if ( unlikely ( + + s - > top = = s - > limit ) ) return U PB_ERROR_STACK_OVERFLOW;
s - > top - > fieldset = d - > field - > fieldset ;
s - > top - > end_offset = d - > v . delimited . offset + d - > v . delimited . len ;
if ( unlikely ( s - > top - > end_offset < s - > offset ) ) return PBSTREAM _ERROR_OVERFLOW ;
return PBSTREAM _STATUS_OK ;
if ( unlikely ( s - > top - > end_offset < s - > offset ) ) return U PB_ERROR_OVERFLOW;
return U PB_STATUS_OK;
}
struct pbstream _type_info {
pbstream _wire_type_t expected_wire_type ;
pbstream _status_t ( * get ) ( struct pbstream _parse_state * s , uint8_t * buf ,
struct pbstream _tagged_value * d ) ;
struct u pb_type_info {
u pb_wire_type_t expected_wire_type ;
u pb_status_t ( * get ) ( struct u pb_parse_state * s , uint8_t * buf ,
struct u pb_tagged_value * d ) ;
} ;
static struct pbstream _type_info type_info [ ] = {
{ PBSTREAM _WIRE_TYPE_64BIT , get_DOUBLE } ,
{ PBSTREAM _WIRE_TYPE_32BIT , get_FLOAT } ,
{ PBSTREAM _WIRE_TYPE_VARINT , get_INT32 } ,
{ PBSTREAM _WIRE_TYPE_VARINT , get_INT64 } ,
{ PBSTREAM _WIRE_TYPE_VARINT , get_UINT32 } ,
{ PBSTREAM _WIRE_TYPE_VARINT , get_UINT64 } ,
{ PBSTREAM _WIRE_TYPE_VARINT , get_SINT32 } ,
{ PBSTREAM _WIRE_TYPE_VARINT , get_SINT64 } ,
{ PBSTREAM _WIRE_TYPE_32BIT , get_FIXED32 } ,
{ PBSTREAM _WIRE_TYPE_64BIT , get_FIXED64 } ,
{ PBSTREAM _WIRE_TYPE_32BIT , get_SFIXED32 } ,
{ PBSTREAM _WIRE_TYPE_64BIT , get_SFIXED64 } ,
{ PBSTREAM _WIRE_TYPE_VARINT , get_BOOL } ,
{ PBSTREAM _WIRE_TYPE_DELIMITED , get_BYTES } ,
{ PBSTREAM _WIRE_TYPE_DELIMITED , get_BYTES } ,
{ PBSTREAM _WIRE_TYPE_VARINT , get_ENUM } ,
{ PBSTREAM _WIRE_TYPE_DELIMITED , get_MESSAGE }
static struct u pb_type_info type_info [ ] = {
{ U PB_WIRE_TYPE_64BIT, get_DOUBLE } ,
{ U PB_WIRE_TYPE_32BIT, get_FLOAT } ,
{ U PB_WIRE_TYPE_VARINT, get_INT32 } ,
{ U PB_WIRE_TYPE_VARINT, get_INT64 } ,
{ U PB_WIRE_TYPE_VARINT, get_UINT32 } ,
{ U PB_WIRE_TYPE_VARINT, get_UINT64 } ,
{ U PB_WIRE_TYPE_VARINT, get_SINT32 } ,
{ U PB_WIRE_TYPE_VARINT, get_SINT64 } ,
{ U PB_WIRE_TYPE_32BIT, get_FIXED32 } ,
{ U PB_WIRE_TYPE_64BIT, get_FIXED64 } ,
{ U PB_WIRE_TYPE_32BIT, get_SFIXED32 } ,
{ U PB_WIRE_TYPE_64BIT, get_SFIXED64 } ,
{ U PB_WIRE_TYPE_VARINT, get_BOOL } ,
{ U PB_WIRE_TYPE_DELIMITED, get_BYTES } ,
{ U PB_WIRE_TYPE_DELIMITED, get_BYTES } ,
{ U PB_WIRE_TYPE_VARINT, get_ENUM } ,
{ U PB_WIRE_TYPE_DELIMITED, get_MESSAGE }
} ;
pbstream _status_t parse_tag ( uint8_t * * buf , struct pbstream _tag * tag )
u pb_status_t parse_tag ( uint8_t * * buf , struct u pb_tag * tag )
{
uint32_t tag_int ;
CHECK ( get_v_uint32_t ( buf , & tag_int ) ) ;
tag - > wire_type = ( pbstream _wire_type_t ) ( tag_int & 0x07 ) ;
tag - > wire_type = ( u pb_wire_type_t) ( tag_int & 0x07 ) ;
tag - > field_number = tag_int > > 3 ;
return PBSTREAM _STATUS_OK ;
return U PB_STATUS_OK;
}
pbstream _status_t parse_wire_value ( uint8_t * buf , size_t * offset ,
pbstream _wire_type_t wt ,
union pbstream _wire_value * wv )
u pb_status_t parse_wire_value ( uint8_t * buf , size_t * offset ,
u pb_wire_type_t wt ,
union u pb_wire_value * wv )
{
# define READ(expr) CHECK(expr); *offset += (b-buf)
uint8_t * b = buf ;
switch ( wt ) {
case PBSTREAM _WIRE_TYPE_VARINT :
case U PB_WIRE_TYPE_VARINT:
READ ( get_v_uint64_t ( & b , & wv - > varint ) ) ; break ;
case PBSTREAM _WIRE_TYPE_64BIT :
case U PB_WIRE_TYPE_64BIT:
READ ( get_f_uint64_t ( & b , & wv - > _64bit ) ) ; break ;
case PBSTREAM _WIRE_TYPE_32BIT :
case U PB_WIRE_TYPE_32BIT:
READ ( get_f_uint32_t ( & b , & wv - > _32bit ) ) ; break ;
case PBSTREAM _WIRE_TYPE_DELIMITED :
case U PB_WIRE_TYPE_DELIMITED:
wv - > delimited . offset = * offset ;
READ ( get_v_uint32_t ( & b , & wv - > delimited . len ) ) ;
size_t new_offset = * offset + wv - > delimited . len ;
if ( new_offset < * offset ) return PBSTREAM _ERROR_OVERFLOW ;
if ( new_offset < * offset ) return U PB_ERROR_OVERFLOW;
* offset + = new_offset ;
break ;
case PBSTREAM _WIRE_TYPE_START_GROUP :
case PBSTREAM _WIRE_TYPE_END_GROUP :
return PBSTREAM _ERROR_GROUP ; /* deprecated, no plans to support. */
case U PB_WIRE_TYPE_START_GROUP:
case U PB_WIRE_TYPE_END_GROUP:
return U PB_ERROR_GROUP; /* deprecated, no plans to support. */
}
return PBSTREAM _STATUS_OK ;
return U PB_STATUS_OK;
}
pbstream _status_t skip_wire_value ( uint8_t * buf , size_t * offset ,
pbstream _wire_type_t wt )
u pb_status_t skip_wire_value ( uint8_t * buf , size_t * offset ,
u pb_wire_type_t wt )
{
uint8_t * b = buf ;
switch ( wt ) {
case PBSTREAM _WIRE_TYPE_VARINT :
case U PB_WIRE_TYPE_VARINT:
READ ( skip_v_uint64_t ( & b ) ) ; break ;
case PBSTREAM _WIRE_TYPE_64BIT :
case U PB_WIRE_TYPE_64BIT:
READ ( skip_f_uint64_t ( & b ) ) ; break ;
case PBSTREAM _WIRE_TYPE_32BIT :
case U PB_WIRE_TYPE_32BIT:
READ ( skip_f_uint32_t ( & b ) ) ; break ;
case PBSTREAM _WIRE_TYPE_DELIMITED : {
case U PB_WIRE_TYPE_DELIMITED: {
/* Have to get (not skip) the length to skip the bytes. */
uint32_t len ;
READ ( get_v_uint32_t ( & b , & len ) ) ;
size_t new_offset = * offset + len ;
if ( new_offset < * offset ) return PBSTREAM _ERROR_OVERFLOW ;
if ( new_offset < * offset ) return U PB_ERROR_OVERFLOW;
* offset + = new_offset ;
break ;
}
case PBSTREAM _WIRE_TYPE_START_GROUP :
case PBSTREAM _WIRE_TYPE_END_GROUP :
return PBSTREAM _ERROR_GROUP ; /* deprecated, no plans to support. */
case U PB_WIRE_TYPE_START_GROUP:
case U PB_WIRE_TYPE_END_GROUP:
return U PB_ERROR_GROUP; /* deprecated, no plans to support. */
}
return PBSTREAM _STATUS_OK ;
return U PB_STATUS_OK;
# undef READ
}
struct pbstream_field * pbstream_find_field ( struct pbstream_fieldset * fs ,
pbstream_field_number_t num )
{
/* TODO: the hashtable part. */
return fs - > array [ num - 1 ] ;
}
/* Parses and processes the next value from buf. */
pbstream _status_t pbstream _parse_field ( struct pbstream _parse_state * s ,
uint8_t * buf ,
pbstream _field_number_t * fieldnum ,
struct pbstream _tagged_value * val ,
struct pbstream _tagged_wire_value * wv )
upb_status_t upb_parse_field ( struct upb_parse_state * s ,
uint8_t * buf ,
upb_field_number_t * fieldnum ,
struct upb_tagged_value * val ,
struct upb_tagged_wire_value * wv )
{
/* Check for end-of-message at the current stack depth. */
if ( unlikely ( s - > offset > = s - > top - > end_offset ) ) {
/* If the end offset isn't an exact field boundary, the pb is corrupt. */
if ( unlikely ( s - > offset ! = s - > top - > end_offset ) )
return PBSTREAM _ERROR_BAD_SUBMESSAGE_END ;
return U PB_ERROR_BAD_SUBMESSAGE_END;
s - > top - - ;
return PBSTREAM _STATUS_SUBMESSAGE_END ;
return U PB_STATUS_SUBMESSAGE_END;
}
struct pbstream _tag tag ;
struct u pb_tag tag ;
uint8_t * b = buf ;
CHECK ( parse_tag ( & b , & tag ) ) ;
s - > offset + = ( b - buf ) ;
struct pbstream _field * fd = pbstream _find_field ( s - > top - > fieldset ,
struct u pb_field * fd = u pb_find_field( s - > top - > fieldset ,
tag . field_number ) ;
pbstream _status_t unknown_value_status ;
u pb_status_t unknown_value_status ;
if ( unlikely ( ! fd ) ) {
unknown_value_status = PBSTREAM _ERROR_UNKNOWN_VALUE ;
unknown_value_status = U PB_ERROR_UNKNOWN_VALUE;
goto unknown_value ;
}
struct pbstream _type_info * info = & type_info [ fd - > type ] ;
struct u pb_type_info * info = & type_info [ fd - > type ] ;
if ( unlikely ( tag . wire_type ! = info - > expected_wire_type ) ) {
unknown_value_status = PBSTREAM _ERROR_MISMATCHED_TYPE ;
unknown_value_status = U PB_ERROR_MISMATCHED_TYPE;
goto unknown_value ;
}
* fieldnum = tag . field_number ;
val - > field = fd ;
CHECK ( info - > get ( s , b , val ) ) ;
return PBSTREAM _STATUS_OK ;
return U PB_STATUS_OK;
unknown_value :
wv - > type = tag . wire_type ;
@ -349,55 +341,20 @@ unknown_value:
return unknown_value_status ;
}
void pbstream _init_parser (
struct pbstream _parse_state * state ,
struct pbstream _fieldset * toplevel_fieldset )
void u pb_init_parser(
struct u pb_parse_state * state ,
struct u pb_fieldset * toplevel_fieldset )
{
state - > offset = 0 ;
state - > top = state - > stack ;
state - > limit = state - > top + PBSTREAM _MAX_STACK ;
state - > limit = state - > top + U PB_MAX_STACK;
state - > top - > fieldset = toplevel_fieldset ;
state - > top - > end_offset = SIZE_MAX ;
}
static int compare_fields ( const void * f1 , const void * f2 )
{
return ( ( struct pbstream_field * ) f1 ) - > field_number -
( ( struct pbstream_field * ) f2 ) - > field_number ;
}
void pbstream_init_fieldset ( struct pbstream_fieldset * fieldset ,
struct pbstream_field * fields ,
int num_fields )
{
qsort ( fields , num_fields , sizeof ( * fields ) , compare_fields ) ;
/* Find the largest n for which at least half the fieldnums <n are used.
* Start at 8 to avoid noise of small numbers . */
pbstream_field_number_t n = 0 , maybe_n ;
for ( int i = 0 ; i < num_fields ; i + + ) {
maybe_n = fields [ i ] . field_number ;
if ( maybe_n > 8 & & maybe_n / ( i + 1 ) > = 2 ) break ;
n = maybe_n ;
}
fieldset - > num_fields = num_fields ;
fieldset - > fields = malloc ( sizeof ( * fieldset - > fields ) * num_fields ) ;
memcpy ( fieldset - > fields , fields , sizeof ( * fields ) * num_fields ) ;
fieldset - > array_size = n ;
fieldset - > array = malloc ( sizeof ( * fieldset - > array ) * n ) ;
memset ( fieldset - > array , 0 , sizeof ( * fieldset - > array ) * n ) ;
for ( int i = 0 ; i < num_fields & & fields [ i ] . field_number < = n ; i + + )
fieldset - > array [ fields [ i ] . field_number - 1 ] = & fieldset - > fields [ i ] ;
/* Until we support the hashtable part... */
assert ( n = = fields [ num_fields - 1 ] . field_number ) ;
return ( ( struct upb_field * ) f1 ) - > field_number -
( ( struct upb_field * ) f2 ) - > field_number ;
}
void pbstream_free_fieldset ( struct pbstream_fieldset * fieldset )
{
free ( fieldset - > fields ) ;
free ( fieldset - > array ) ;
}