From 932753d91e183ab25cf1a0f420d9488295002297 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Tue, 2 Apr 2019 07:30:30 -0700 Subject: [PATCH 1/6] WIP. --- upb/decode.c | 6 ++++-- upb/msg.c | 15 ++++----------- upb/msg.h | 6 ++---- 3 files changed, 10 insertions(+), 17 deletions(-) diff --git a/upb/decode.c b/upb/decode.c index 96622f9450..79bda54d2c 100644 --- a/upb/decode.c +++ b/upb/decode.c @@ -33,6 +33,7 @@ typedef struct { /* Current decoding pointer. Points to the beginning of a field until we * have finished decoding the whole field. */ const char *ptr; + upb_arena *arena; } upb_decstate; /* Data pertaining to a single message frame. */ @@ -557,10 +558,11 @@ static bool upb_decode_message(upb_decstate *d, const char *limit, return true; } -bool upb_decode(const char *buf, size_t size, void *msg, - const upb_msglayout *l) { +bool upb_decode(const char *buf, size_t size, void *msg, const upb_msglayout *l, + upb_arena *arena) { upb_decstate state; state.ptr = buf; + state.arena = arena; return upb_decode_message(&state, buf + size, 0, msg, l); } diff --git a/upb/msg.c b/upb/msg.c index ca2766cacc..31607b75e8 100644 --- a/upb/msg.c +++ b/upb/msg.c @@ -13,9 +13,6 @@ /* Used when a message is not extendable. */ typedef struct { - /* TODO(haberman): use pointer tagging so we we are slim when known unknown - * fields are not present. */ - upb_arena *arena; char *unknown; size_t unknown_len; size_t unknown_size; @@ -49,10 +46,6 @@ static upb_msg_internal_withext *upb_msg_getinternalwithext( return VOIDPTR_AT(msg, -sizeof(upb_msg_internal_withext)); } -upb_arena *upb_msg_arena(const upb_msg *msg) { - return upb_msg_getinternal_const(msg)->arena; -} - upb_msg *upb_msg_new(const upb_msglayout *l, upb_arena *a) { upb_alloc *alloc = upb_arena_alloc(a); void *mem = upb_malloc(alloc, upb_msg_sizeof(l)); @@ -70,7 +63,6 @@ upb_msg *upb_msg_new(const upb_msglayout *l, upb_arena *a) { /* Initialize internal members. */ in = upb_msg_getinternal(msg); - in->arena = a; in->unknown = NULL; in->unknown_len = 0; in->unknown_size = 0; @@ -99,10 +91,11 @@ upb_array *upb_array_new(upb_fieldtype_t type, upb_arena *a) { return ret; } -void upb_msg_addunknown(upb_msg *msg, const char *data, size_t len) { - upb_msg_internal* in = upb_msg_getinternal(msg); +void upb_msg_addunknown(upb_msg *msg, const char *data, size_t len, + upb_arena *arena) { + upb_msg_internal *in = upb_msg_getinternal(msg); if (len > in->unknown_size - in->unknown_len) { - upb_alloc *alloc = upb_arena_alloc(in->arena); + upb_alloc *alloc = upb_arena_alloc(arena); size_t need = in->unknown_size + len; size_t newsize = UPB_MAX(in->unknown_size * 2, need); in->unknown = upb_realloc(alloc, in->unknown, in->unknown_size, newsize); diff --git a/upb/msg.h b/upb/msg.h index e46733f4f6..7f1a6e7b7e 100644 --- a/upb/msg.h +++ b/upb/msg.h @@ -58,10 +58,8 @@ typedef struct { upb_msg *upb_msg_new(const upb_msglayout *l, upb_arena *a); upb_msg *upb_msg_new(const upb_msglayout *l, upb_arena *a); -/* Returns the arena for the given message. */ -upb_arena *upb_msg_arena(const upb_msg *msg); - -void upb_msg_addunknown(upb_msg *msg, const char *data, size_t len); +void upb_msg_addunknown(upb_msg *msg, const char *data, size_t len, + upb_arena *arena); const char *upb_msg_getunknown(const upb_msg *msg, size_t *len); upb_array *upb_array_new(upb_fieldtype_t type, upb_arena *a); From 7485261370ad0bb691c801a4128712d962cdd7d3 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 2 Jun 2019 21:09:33 -0700 Subject: [PATCH 2/6] WIP. --- upb/decode.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/upb/decode.c b/upb/decode.c index 17733d33d1..8d3b3985a7 100644 --- a/upb/decode.c +++ b/upb/decode.c @@ -30,20 +30,26 @@ const uint8_t upb_desctype_to_fieldtype[] = { /* Data pertaining to the parse. */ typedef struct { - /* Current decoding pointer. Points to the beginning of a field until we - * have finished decoding the whole field. */ - const char *ptr; + /* Parsing limit: either end of delimited region or end of buffer. */ + const char *limit; + + /* Signals how the parse ended: + * - when 0: parse ended at delimited limit. + * - when 1: parse ended due to end-of-stream. + * - otherwise: parse ended due to a terminating tag (either 0 or END_GROUP). + * + * In the last case, tag-1 is stored, to avoid conflicting with case 0. */ + uint32_t parse_status; + upb_arena *arena; } upb_decstate; -/* Data pertaining to a single message frame. */ +/* Data passed by value to each parsing function. */ typedef struct { - const char *limit; - int32_t group_number; /* 0 if we are not parsing a group. */ - - /* These members are unset for an unknown group frame. */ + const char *ptr, char *msg; const upb_msglayout *m; + upb_decstate *state; } upb_decframe; #define CHK(x) if (!(x)) { return false; } @@ -133,7 +139,7 @@ static void upb_set32(void *msg, size_t ofs, uint32_t val) { static bool upb_append_unknown(upb_decstate *d, upb_decframe *frame, const char *start) { - upb_msg_addunknown(frame->msg, start, d->ptr - start); + upb_msg_addunknown(frame->msg, start, d->ptr - start, d->arena); return true; } From ba0a2fb955450e339f01d58018b34b094115f7cf Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 3 Jun 2019 13:50:35 -0700 Subject: [PATCH 3/6] Compiles, doesn't work yet. --- upb/decode.c | 305 +++++++++++++++++++++++++--------------------- upb/decode.h | 2 +- upbc/generator.cc | 2 +- 3 files changed, 169 insertions(+), 140 deletions(-) diff --git a/upb/decode.c b/upb/decode.c index 8d3b3985a7..5719d12aec 100644 --- a/upb/decode.c +++ b/upb/decode.c @@ -30,8 +30,10 @@ const uint8_t upb_desctype_to_fieldtype[] = { /* Data pertaining to the parse. */ typedef struct { - /* Parsing limit: either end of delimited region or end of buffer. */ - const char *limit; + const char *ptr; /* Current parsing position. */ + const char *field_start; /* Start of this field. */ + const char *limit; /* End of delimited region or end of buffer. */ + int depth; /* Signals how the parse ended: * - when 0: parse ended at delimited limit. @@ -46,18 +48,15 @@ typedef struct { /* Data passed by value to each parsing function. */ typedef struct { - const char *ptr, char *msg; - const upb_msglayout *m; + const upb_msglayout *layout; upb_decstate *state; } upb_decframe; -#define CHK(x) if (!(x)) { return false; } +#define CHK(x) if (!(x)) { return 0; } -static bool upb_skip_unknowngroup(upb_decstate *d, int field_number, - const char *limit); -static bool upb_decode_message(upb_decstate *d, const char *limit, - int group_number, char *msg, +static bool upb_skip_unknowngroup(upb_decstate *d, int field_number); +static bool upb_decode_message(upb_decstate *d, char *msg, const upb_msglayout *l); static bool upb_decode_varint(const char **ptr, const char *limit, @@ -103,15 +102,6 @@ static bool upb_decode_32bit(const char **ptr, const char *limit, return true; } -static bool upb_decode_tag(const char **ptr, const char *limit, - int *field_number, int *wire_type) { - uint32_t tag = 0; - CHK(upb_decode_varint32(ptr, limit, &tag)); - *field_number = tag >> 3; - *wire_type = tag & 7; - return true; -} - static int32_t upb_zzdecode_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } @@ -121,15 +111,14 @@ static int64_t upb_zzdecode_64(uint64_t n) { } static bool upb_decode_string(const char **ptr, const char *limit, - upb_strview *val) { + int *outlen) { uint32_t len; CHK(upb_decode_varint32(ptr, limit, &len) && len < INT32_MAX && limit - *ptr >= (int32_t)len); - *val = upb_strview_make(*ptr, len); - *ptr += len; + *outlen = len; return true; } @@ -137,41 +126,54 @@ static void upb_set32(void *msg, size_t ofs, uint32_t val) { memcpy((char*)msg + ofs, &val, sizeof(val)); } -static bool upb_append_unknown(upb_decstate *d, upb_decframe *frame, - const char *start) { - upb_msg_addunknown(frame->msg, start, d->ptr - start, d->arena); +static bool upb_append_unknown(upb_decstate *d, upb_decframe *frame) { + upb_msg_addunknown(frame->msg, d->field_start, d->ptr - d->field_start, + d->arena); return true; } -static bool upb_skip_unknownfielddata(upb_decstate *d, upb_decframe *frame, - int field_number, int wire_type) { - switch (wire_type) { + +static bool upb_skip_unknownfielddata(upb_decstate *d, uint32_t tag, + uint32_t group_fieldnum) { + switch (tag & 7) { case UPB_WIRE_TYPE_VARINT: { uint64_t val; - return upb_decode_varint(&d->ptr, frame->limit, &val); + return upb_decode_varint(&d->ptr, d->limit, &val); } case UPB_WIRE_TYPE_32BIT: { uint32_t val; - return upb_decode_32bit(&d->ptr, frame->limit, &val); + return upb_decode_32bit(&d->ptr, d->limit, &val); } case UPB_WIRE_TYPE_64BIT: { uint64_t val; - return upb_decode_64bit(&d->ptr, frame->limit, &val); + return upb_decode_64bit(&d->ptr, d->limit, &val); } case UPB_WIRE_TYPE_DELIMITED: { - upb_strview val; - return upb_decode_string(&d->ptr, frame->limit, &val); + int len; + return upb_decode_string(&d->ptr, d->limit, &len); + d->ptr += len; } case UPB_WIRE_TYPE_START_GROUP: - return upb_skip_unknowngroup(d, field_number, frame->limit); + return upb_skip_unknowngroup(d, tag >> 3); case UPB_WIRE_TYPE_END_GROUP: - CHK(field_number == frame->group_number); - frame->limit = d->ptr; + CHK((tag >> 3) == group_fieldnum); return true; } return false; } +static bool upb_skip_unknowngroup(upb_decstate *d, int field_number) { + while (d->ptr < d->limit && d->parse_status == 0) { + uint32_t tag = 0; + CHK(upb_decode_varint32(&d->ptr, d->limit, &tag)); + CHK(upb_skip_unknownfielddata(d, tag, field_number)); + } + + CHK(d->parse_status == field_number); + d->parse_status = 0; + return true; +} + static bool upb_array_grow(upb_array *arr, size_t elements, size_t elem_size) { size_t needed = arr->len + elements; size_t new_size = UPB_MAX(arr->size, 8); @@ -197,7 +199,7 @@ static bool upb_array_grow(upb_array *arr, size_t elements, size_t elem_size) { static void *upb_array_reserve(upb_array *arr, size_t elements, size_t elem_size) { if (arr->size - arr->len < elements) { - if (!upb_array_grow(arr, elements, elem_size)) return NULL; + CHK(upb_array_grow(arr, elements, elem_size)); } return (char*)arr->data + (arr->len * elem_size); } @@ -225,16 +227,44 @@ static upb_array *upb_getorcreatearr(upb_decframe *frame, if (!arr) { upb_fieldtype_t type = upb_desctype_to_fieldtype[field->descriptortype]; - arr = upb_array_new(type, upb_msg_arena(frame->msg)); - if (!arr) { - return NULL; - } + arr = upb_array_new(type, frame->state->arena); + CHK(arr); *(upb_array**)&frame->msg[field->offset] = arr; } return arr; } +static upb_msg *upb_getorcreatemsg(upb_decframe *frame, + const upb_msglayout_field *field, + const upb_msglayout **subm) { + upb_msg **submsg = (void*)(frame->msg + field->offset); + *subm = frame->layout->submsgs[field->submsg_index]; + + UPB_ASSERT(field->label != UPB_LABEL_REPEATED); + + if (!*submsg) { + *submsg = upb_msg_new(*subm, frame->state->arena); + CHK(*submsg); + } + + return *submsg; +} + +static upb_msg *upb_addmsg(upb_decframe *frame, + const upb_msglayout_field *field, + const upb_msglayout **subm) { + upb_msg *submsg; + upb_array *arr = upb_getorcreatearr(frame, field); + + *subm = frame->layout->submsgs[field->submsg_index]; + submsg = upb_msg_new(*subm, frame->state->arena); + CHK(submsg); + upb_array_add(arr, 1, sizeof(submsg), &submsg); + + return submsg; +} + static void upb_sethasbit(upb_decframe *frame, const upb_msglayout_field *field) { int32_t hasbit = field->presence; @@ -278,27 +308,33 @@ static void upb_decode_setpresent(upb_decframe *frame, } } -static bool upb_decode_submsg(upb_decstate *d, upb_decframe *frame, - const char *limit, - const upb_msglayout_field *field, - int group_number) { - upb_msg **submsg = (void*)(frame->msg + field->offset); - const upb_msglayout *subm = frame->m->submsgs[field->submsg_index]; - - if (!*submsg) { - *submsg = upb_msg_new(subm, upb_msg_arena(frame->msg)); - CHK(*submsg); - } +static bool upb_decode_msgfield(upb_decstate *d, upb_msg *msg, + const upb_msglayout *layout, int limit) { + const char* saved_limit = d->limit; + d->limit = d->ptr + limit; + CHK(--d->depth >= 0); + upb_decode_message(d, msg, layout); + d->depth++; + d->limit = saved_limit; + CHK(d->parse_status == 0); + return true; +} - upb_decode_message(d, limit, group_number, *submsg, subm); +static bool upb_decode_groupfield(upb_decstate *d, upb_msg *msg, + const upb_msglayout *layout, + int field_number) { + CHK(--d->depth >= 0); + upb_decode_message(d, msg, layout); + d->depth++; + CHK(d->parse_status == field_number); + d->parse_status = 0; return true; } static bool upb_decode_varintfield(upb_decstate *d, upb_decframe *frame, - const char *field_start, const upb_msglayout_field *field) { uint64_t val; - CHK(upb_decode_varint(&d->ptr, frame->limit, &val)); + CHK(upb_decode_varint(&d->ptr, d->limit, &val)); switch (field->descriptortype) { case UPB_DESCRIPTOR_TYPE_INT64: @@ -328,7 +364,7 @@ static bool upb_decode_varintfield(upb_decstate *d, upb_decframe *frame, break; } default: - return upb_append_unknown(d, frame, field_start); + return upb_append_unknown(d, frame); } upb_decode_setpresent(frame, field); @@ -336,10 +372,9 @@ static bool upb_decode_varintfield(upb_decstate *d, upb_decframe *frame, } static bool upb_decode_64bitfield(upb_decstate *d, upb_decframe *frame, - const char *field_start, const upb_msglayout_field *field) { uint64_t val; - CHK(upb_decode_64bit(&d->ptr, frame->limit, &val)); + CHK(upb_decode_64bit(&d->ptr, d->limit, &val)); switch (field->descriptortype) { case UPB_DESCRIPTOR_TYPE_DOUBLE: @@ -348,7 +383,7 @@ static bool upb_decode_64bitfield(upb_decstate *d, upb_decframe *frame, CHK(upb_decode_addval(frame, field, &val, sizeof(val))); break; default: - return upb_append_unknown(d, frame, field_start); + return upb_append_unknown(d, frame); } upb_decode_setpresent(frame, field); @@ -356,10 +391,9 @@ static bool upb_decode_64bitfield(upb_decstate *d, upb_decframe *frame, } static bool upb_decode_32bitfield(upb_decstate *d, upb_decframe *frame, - const char *field_start, const upb_msglayout_field *field) { uint32_t val; - CHK(upb_decode_32bit(&d->ptr, frame->limit, &val)); + CHK(upb_decode_32bit(&d->ptr, d->limit, &val)); switch (field->descriptortype) { case UPB_DESCRIPTOR_TYPE_FLOAT: @@ -368,34 +402,41 @@ static bool upb_decode_32bitfield(upb_decstate *d, upb_decframe *frame, CHK(upb_decode_addval(frame, field, &val, sizeof(val))); break; default: - return upb_append_unknown(d, frame, field_start); + return upb_append_unknown(d, frame); } upb_decode_setpresent(frame, field); return true; } -static bool upb_decode_fixedpacked(upb_array *arr, upb_strview data, - int elem_size) { - size_t elements = data.size / elem_size; +static bool upb_decode_fixedpacked(upb_decstate *d, upb_array *arr, + uint32_t len, int elem_size) { + size_t elements = len / elem_size; - CHK((size_t)(elements * elem_size) == data.size); - CHK(upb_array_add(arr, elements, elem_size, data.data)); + CHK((size_t)(elements * elem_size) == len); + CHK(upb_array_add(arr, elements, elem_size, d->ptr)); + d->ptr += len; return true; } +static upb_strview upb_decode_strfield(upb_decstate *d, uint32_t len) { + upb_strview ret; + ret.data = d->ptr; + ret.size = len; + d->ptr += len; + return ret; +} + static bool upb_decode_toarray(upb_decstate *d, upb_decframe *frame, - const char *field_start, - const upb_msglayout_field *field, - upb_strview val) { + const upb_msglayout_field *field, int len) { upb_array *arr = upb_getorcreatearr(frame, field); CHK(arr); #define VARINT_CASE(ctype, decode) \ { \ - const char *ptr = val.data; \ - const char *limit = ptr + val.size; \ + const char *ptr = d->ptr; \ + const char *limit = ptr + len; \ while (ptr < limit) { \ uint64_t val; \ ctype decoded; \ @@ -403,22 +444,24 @@ static bool upb_decode_toarray(upb_decstate *d, upb_decframe *frame, decoded = (decode)(val); \ CHK(upb_array_add(arr, 1, sizeof(decoded), &decoded)); \ } \ + d->ptr = ptr; \ return true; \ } switch (field->descriptortype) { case UPB_DESCRIPTOR_TYPE_STRING: case UPB_DESCRIPTOR_TYPE_BYTES: { - return upb_array_add(arr, 1, sizeof(val), &val); + upb_strview str = upb_decode_strfield(d, len); + return upb_array_add(arr, 1, sizeof(str), &str); } case UPB_DESCRIPTOR_TYPE_FLOAT: case UPB_DESCRIPTOR_TYPE_FIXED32: case UPB_DESCRIPTOR_TYPE_SFIXED32: - return upb_decode_fixedpacked(arr, val, sizeof(int32_t)); + return upb_decode_fixedpacked(d, arr, len, sizeof(int32_t)); case UPB_DESCRIPTOR_TYPE_DOUBLE: case UPB_DESCRIPTOR_TYPE_FIXED64: case UPB_DESCRIPTOR_TYPE_SFIXED64: - return upb_decode_fixedpacked(arr, val, sizeof(int64_t)); + return upb_decode_fixedpacked(d, arr, len, sizeof(int64_t)); case UPB_DESCRIPTOR_TYPE_INT32: case UPB_DESCRIPTOR_TYPE_UINT32: case UPB_DESCRIPTOR_TYPE_ENUM: @@ -433,48 +476,45 @@ static bool upb_decode_toarray(upb_decstate *d, upb_decframe *frame, case UPB_DESCRIPTOR_TYPE_SINT64: VARINT_CASE(int64_t, upb_zzdecode_64); case UPB_DESCRIPTOR_TYPE_MESSAGE: { - const upb_msglayout *subm = frame->m->submsgs[field->submsg_index]; - upb_msg *submsg = upb_msg_new(subm, upb_msg_arena(frame->msg)); - + const upb_msglayout *subm; + upb_msg *submsg = upb_addmsg(frame, field, &subm); CHK(submsg); - CHK(val.size <= (size_t)(frame->limit - val.data)); - upb_array_add(arr, 1, sizeof(submsg), &submsg); - - d->ptr -= val.size; - return upb_decode_message( - d, val.data + val.size, frame->group_number, submsg, subm); + return upb_decode_message(d, submsg, subm); } case UPB_DESCRIPTOR_TYPE_GROUP: - return upb_append_unknown(d, frame, field_start); + return upb_append_unknown(d, frame); } #undef VARINT_CASE UPB_UNREACHABLE(); } static bool upb_decode_delimitedfield(upb_decstate *d, upb_decframe *frame, - const char *field_start, const upb_msglayout_field *field) { - upb_strview val; + int len; - CHK(upb_decode_string(&d->ptr, frame->limit, &val)); + CHK(upb_decode_string(&d->ptr, d->limit, &len)); if (field->label == UPB_LABEL_REPEATED) { - return upb_decode_toarray(d, frame, field_start, field, val); + return upb_decode_toarray(d, frame, field, len); } else { switch (field->descriptortype) { case UPB_DESCRIPTOR_TYPE_STRING: case UPB_DESCRIPTOR_TYPE_BYTES: { - CHK(upb_decode_addval(frame, field, &val, sizeof(val))); + upb_strview str = upb_decode_strfield(d, len); + CHK(upb_decode_addval(frame, field, &str, sizeof(str))); break; } - case UPB_DESCRIPTOR_TYPE_MESSAGE: - CHK(val.size <= (size_t)(frame->limit - val.data)); - d->ptr -= val.size; - CHK(upb_decode_submsg(d, frame, val.data + val.size, field, 0)); + case UPB_DESCRIPTOR_TYPE_MESSAGE: { + const upb_msglayout *subm; + upb_msg *submsg = upb_getorcreatemsg(frame, field, &subm); + CHK(submsg); + CHK(upb_decode_msgfield(d, submsg, subm, len)); break; + } default: /* TODO(haberman): should we accept the last element of a packed? */ - return upb_append_unknown(d, frame, field_start); + d->ptr += len; + return upb_append_unknown(d, frame); } upb_decode_setpresent(frame, field); return true; @@ -495,71 +535,58 @@ static const upb_msglayout_field *upb_find_field(const upb_msglayout *l, } static bool upb_decode_field(upb_decstate *d, upb_decframe *frame) { - int field_number; - int wire_type; - const char *field_start = d->ptr; + uint32_t tag; const upb_msglayout_field *field; + int field_number; - CHK(upb_decode_tag(&d->ptr, frame->limit, &field_number, &wire_type)); - field = upb_find_field(frame->m, field_number); + d->field_start = d->ptr; + CHK(upb_decode_varint32(&d->ptr, d->limit, &tag)); + field_number = tag >> 3; + field = upb_find_field(frame->layout, field_number); if (field) { - switch (wire_type) { + switch (tag & 7) { case UPB_WIRE_TYPE_VARINT: - return upb_decode_varintfield(d, frame, field_start, field); + return upb_decode_varintfield(d, frame, field); case UPB_WIRE_TYPE_32BIT: - return upb_decode_32bitfield(d, frame, field_start, field); + return upb_decode_32bitfield(d, frame, field); case UPB_WIRE_TYPE_64BIT: - return upb_decode_64bitfield(d, frame, field_start, field); + return upb_decode_64bitfield(d, frame, field); case UPB_WIRE_TYPE_DELIMITED: - return upb_decode_delimitedfield(d, frame, field_start, field); - case UPB_WIRE_TYPE_START_GROUP: - CHK(field->descriptortype == UPB_DESCRIPTOR_TYPE_GROUP); - return upb_decode_submsg(d, frame, frame->limit, field, field_number); + return upb_decode_delimitedfield(d, frame, field); + case UPB_WIRE_TYPE_START_GROUP: { + const upb_msglayout *layout; + upb_msg *group; + + if (field->label == UPB_LABEL_REPEATED) { + group = upb_addmsg(frame, field, &layout); + } else { + group = upb_getorcreatemsg(frame, field, &layout); + } + + return upb_decode_groupfield(d, group, layout, field_number); + } case UPB_WIRE_TYPE_END_GROUP: - CHK(frame->group_number == field_number) - frame->limit = d->ptr; + d->parse_status = field_number; return true; default: CHK(false); } } else { CHK(field_number != 0); - CHK(upb_skip_unknownfielddata(d, frame, field_number, wire_type)); - CHK(upb_append_unknown(d, frame, field_start)); + CHK(upb_skip_unknownfielddata(d, tag, -1)); + CHK(upb_append_unknown(d, frame)); return true; } } -static bool upb_skip_unknowngroup(upb_decstate *d, int field_number, - const char *limit) { - upb_decframe frame; - frame.msg = NULL; - frame.m = NULL; - frame.group_number = field_number; - frame.limit = limit; - - while (d->ptr < frame.limit) { - int wire_type; - int field_number; - - CHK(upb_decode_tag(&d->ptr, frame.limit, &field_number, &wire_type)); - CHK(upb_skip_unknownfielddata(d, &frame, field_number, wire_type)); - } - - return true; -} - -static bool upb_decode_message(upb_decstate *d, const char *limit, - int group_number, char *msg, - const upb_msglayout *l) { +static bool upb_decode_message(upb_decstate *d, char *msg, const upb_msglayout *l) { upb_decframe frame; - frame.group_number = group_number; - frame.limit = limit; frame.msg = msg; - frame.m = l; + frame.layout = l; + frame.state = d; - while (d->ptr < frame.limit) { + while (d->ptr < d->limit) { CHK(upb_decode_field(d, &frame)); } @@ -570,9 +597,11 @@ bool upb_decode(const char *buf, size_t size, void *msg, const upb_msglayout *l, upb_arena *arena) { upb_decstate state; state.ptr = buf; + state.limit = buf + size; state.arena = arena; - return upb_decode_message(&state, buf + size, 0, msg, l); + CHK(upb_decode_message(&state, msg, l)); + return state.parse_status == 0; } #undef CHK diff --git a/upb/decode.h b/upb/decode.h index d21b6599f8..9de8638de5 100644 --- a/upb/decode.h +++ b/upb/decode.h @@ -12,7 +12,7 @@ extern "C" { #endif bool upb_decode(const char *buf, size_t size, upb_msg *msg, - const upb_msglayout *l); + const upb_msglayout *l, upb_arena *arena); #ifdef __cplusplus } /* extern "C" */ diff --git a/upbc/generator.cc b/upbc/generator.cc index 53d849e364..7096278180 100644 --- a/upbc/generator.cc +++ b/upbc/generator.cc @@ -338,7 +338,7 @@ void GenerateMessageInHeader(const protobuf::Descriptor* message, Output& output "UPB_INLINE $0 *$0_parse(const char *buf, size_t size,\n" " upb_arena *arena) {\n" " $0 *ret = $0_new(arena);\n" - " return (ret && upb_decode(buf, size, ret, &$1)) ? ret : NULL;\n" + " return (ret && upb_decode(buf, size, ret, &$1, arena)) ? ret : NULL;\n" "}\n" "UPB_INLINE char *$0_serialize(const $0 *msg, upb_arena *arena, size_t " "*len) {\n" From 56779f09eb993bcde5b7bc1c7de9ad943d6cd5ff Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 3 Jun 2019 14:44:52 -0700 Subject: [PATCH 4/6] All tests pass again. --- .../google/protobuf/descriptor.upb.h | 54 +++++++++---------- upb/bindings/lua/upb/pb.c | 2 +- upb/decode.c | 10 ++-- 3 files changed, 34 insertions(+), 32 deletions(-) diff --git a/generated_for_cmake/google/protobuf/descriptor.upb.h b/generated_for_cmake/google/protobuf/descriptor.upb.h index 3016c9170d..681614910e 100644 --- a/generated_for_cmake/google/protobuf/descriptor.upb.h +++ b/generated_for_cmake/google/protobuf/descriptor.upb.h @@ -162,7 +162,7 @@ UPB_INLINE google_protobuf_FileDescriptorSet *google_protobuf_FileDescriptorSet_ UPB_INLINE google_protobuf_FileDescriptorSet *google_protobuf_FileDescriptorSet_parse(const char *buf, size_t size, upb_arena *arena) { google_protobuf_FileDescriptorSet *ret = google_protobuf_FileDescriptorSet_new(arena); - return (ret && upb_decode(buf, size, ret, &google_protobuf_FileDescriptorSet_msginit)) ? ret : NULL; + return (ret && upb_decode(buf, size, ret, &google_protobuf_FileDescriptorSet_msginit, arena)) ? ret : NULL; } UPB_INLINE char *google_protobuf_FileDescriptorSet_serialize(const google_protobuf_FileDescriptorSet *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_FileDescriptorSet_msginit, arena, len); @@ -192,7 +192,7 @@ UPB_INLINE google_protobuf_FileDescriptorProto *google_protobuf_FileDescriptorPr UPB_INLINE google_protobuf_FileDescriptorProto *google_protobuf_FileDescriptorProto_parse(const char *buf, size_t size, upb_arena *arena) { google_protobuf_FileDescriptorProto *ret = google_protobuf_FileDescriptorProto_new(arena); - return (ret && upb_decode(buf, size, ret, &google_protobuf_FileDescriptorProto_msginit)) ? ret : NULL; + return (ret && upb_decode(buf, size, ret, &google_protobuf_FileDescriptorProto_msginit, arena)) ? ret : NULL; } UPB_INLINE char *google_protobuf_FileDescriptorProto_serialize(const google_protobuf_FileDescriptorProto *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_FileDescriptorProto_msginit, arena, len); @@ -345,7 +345,7 @@ UPB_INLINE google_protobuf_DescriptorProto *google_protobuf_DescriptorProto_new( UPB_INLINE google_protobuf_DescriptorProto *google_protobuf_DescriptorProto_parse(const char *buf, size_t size, upb_arena *arena) { google_protobuf_DescriptorProto *ret = google_protobuf_DescriptorProto_new(arena); - return (ret && upb_decode(buf, size, ret, &google_protobuf_DescriptorProto_msginit)) ? ret : NULL; + return (ret && upb_decode(buf, size, ret, &google_protobuf_DescriptorProto_msginit, arena)) ? ret : NULL; } UPB_INLINE char *google_protobuf_DescriptorProto_serialize(const google_protobuf_DescriptorProto *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_DescriptorProto_msginit, arena, len); @@ -491,7 +491,7 @@ UPB_INLINE google_protobuf_DescriptorProto_ExtensionRange *google_protobuf_Descr UPB_INLINE google_protobuf_DescriptorProto_ExtensionRange *google_protobuf_DescriptorProto_ExtensionRange_parse(const char *buf, size_t size, upb_arena *arena) { google_protobuf_DescriptorProto_ExtensionRange *ret = google_protobuf_DescriptorProto_ExtensionRange_new(arena); - return (ret && upb_decode(buf, size, ret, &google_protobuf_DescriptorProto_ExtensionRange_msginit)) ? ret : NULL; + return (ret && upb_decode(buf, size, ret, &google_protobuf_DescriptorProto_ExtensionRange_msginit, arena)) ? ret : NULL; } UPB_INLINE char *google_protobuf_DescriptorProto_ExtensionRange_serialize(const google_protobuf_DescriptorProto_ExtensionRange *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_DescriptorProto_ExtensionRange_msginit, arena, len); @@ -534,7 +534,7 @@ UPB_INLINE google_protobuf_DescriptorProto_ReservedRange *google_protobuf_Descri UPB_INLINE google_protobuf_DescriptorProto_ReservedRange *google_protobuf_DescriptorProto_ReservedRange_parse(const char *buf, size_t size, upb_arena *arena) { google_protobuf_DescriptorProto_ReservedRange *ret = google_protobuf_DescriptorProto_ReservedRange_new(arena); - return (ret && upb_decode(buf, size, ret, &google_protobuf_DescriptorProto_ReservedRange_msginit)) ? ret : NULL; + return (ret && upb_decode(buf, size, ret, &google_protobuf_DescriptorProto_ReservedRange_msginit, arena)) ? ret : NULL; } UPB_INLINE char *google_protobuf_DescriptorProto_ReservedRange_serialize(const google_protobuf_DescriptorProto_ReservedRange *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_DescriptorProto_ReservedRange_msginit, arena, len); @@ -562,7 +562,7 @@ UPB_INLINE google_protobuf_ExtensionRangeOptions *google_protobuf_ExtensionRange UPB_INLINE google_protobuf_ExtensionRangeOptions *google_protobuf_ExtensionRangeOptions_parse(const char *buf, size_t size, upb_arena *arena) { google_protobuf_ExtensionRangeOptions *ret = google_protobuf_ExtensionRangeOptions_new(arena); - return (ret && upb_decode(buf, size, ret, &google_protobuf_ExtensionRangeOptions_msginit)) ? ret : NULL; + return (ret && upb_decode(buf, size, ret, &google_protobuf_ExtensionRangeOptions_msginit, arena)) ? ret : NULL; } UPB_INLINE char *google_protobuf_ExtensionRangeOptions_serialize(const google_protobuf_ExtensionRangeOptions *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_ExtensionRangeOptions_msginit, arena, len); @@ -592,7 +592,7 @@ UPB_INLINE google_protobuf_FieldDescriptorProto *google_protobuf_FieldDescriptor UPB_INLINE google_protobuf_FieldDescriptorProto *google_protobuf_FieldDescriptorProto_parse(const char *buf, size_t size, upb_arena *arena) { google_protobuf_FieldDescriptorProto *ret = google_protobuf_FieldDescriptorProto_new(arena); - return (ret && upb_decode(buf, size, ret, &google_protobuf_FieldDescriptorProto_msginit)) ? ret : NULL; + return (ret && upb_decode(buf, size, ret, &google_protobuf_FieldDescriptorProto_msginit, arena)) ? ret : NULL; } UPB_INLINE char *google_protobuf_FieldDescriptorProto_serialize(const google_protobuf_FieldDescriptorProto *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_FieldDescriptorProto_msginit, arena, len); @@ -677,7 +677,7 @@ UPB_INLINE google_protobuf_OneofDescriptorProto *google_protobuf_OneofDescriptor UPB_INLINE google_protobuf_OneofDescriptorProto *google_protobuf_OneofDescriptorProto_parse(const char *buf, size_t size, upb_arena *arena) { google_protobuf_OneofDescriptorProto *ret = google_protobuf_OneofDescriptorProto_new(arena); - return (ret && upb_decode(buf, size, ret, &google_protobuf_OneofDescriptorProto_msginit)) ? ret : NULL; + return (ret && upb_decode(buf, size, ret, &google_protobuf_OneofDescriptorProto_msginit, arena)) ? ret : NULL; } UPB_INLINE char *google_protobuf_OneofDescriptorProto_serialize(const google_protobuf_OneofDescriptorProto *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_OneofDescriptorProto_msginit, arena, len); @@ -714,7 +714,7 @@ UPB_INLINE google_protobuf_EnumDescriptorProto *google_protobuf_EnumDescriptorPr UPB_INLINE google_protobuf_EnumDescriptorProto *google_protobuf_EnumDescriptorProto_parse(const char *buf, size_t size, upb_arena *arena) { google_protobuf_EnumDescriptorProto *ret = google_protobuf_EnumDescriptorProto_new(arena); - return (ret && upb_decode(buf, size, ret, &google_protobuf_EnumDescriptorProto_msginit)) ? ret : NULL; + return (ret && upb_decode(buf, size, ret, &google_protobuf_EnumDescriptorProto_msginit, arena)) ? ret : NULL; } UPB_INLINE char *google_protobuf_EnumDescriptorProto_serialize(const google_protobuf_EnumDescriptorProto *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_EnumDescriptorProto_msginit, arena, len); @@ -790,7 +790,7 @@ UPB_INLINE google_protobuf_EnumDescriptorProto_EnumReservedRange *google_protobu UPB_INLINE google_protobuf_EnumDescriptorProto_EnumReservedRange *google_protobuf_EnumDescriptorProto_EnumReservedRange_parse(const char *buf, size_t size, upb_arena *arena) { google_protobuf_EnumDescriptorProto_EnumReservedRange *ret = google_protobuf_EnumDescriptorProto_EnumReservedRange_new(arena); - return (ret && upb_decode(buf, size, ret, &google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit)) ? ret : NULL; + return (ret && upb_decode(buf, size, ret, &google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit, arena)) ? ret : NULL; } UPB_INLINE char *google_protobuf_EnumDescriptorProto_EnumReservedRange_serialize(const google_protobuf_EnumDescriptorProto_EnumReservedRange *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit, arena, len); @@ -818,7 +818,7 @@ UPB_INLINE google_protobuf_EnumValueDescriptorProto *google_protobuf_EnumValueDe UPB_INLINE google_protobuf_EnumValueDescriptorProto *google_protobuf_EnumValueDescriptorProto_parse(const char *buf, size_t size, upb_arena *arena) { google_protobuf_EnumValueDescriptorProto *ret = google_protobuf_EnumValueDescriptorProto_new(arena); - return (ret && upb_decode(buf, size, ret, &google_protobuf_EnumValueDescriptorProto_msginit)) ? ret : NULL; + return (ret && upb_decode(buf, size, ret, &google_protobuf_EnumValueDescriptorProto_msginit, arena)) ? ret : NULL; } UPB_INLINE char *google_protobuf_EnumValueDescriptorProto_serialize(const google_protobuf_EnumValueDescriptorProto *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_EnumValueDescriptorProto_msginit, arena, len); @@ -861,7 +861,7 @@ UPB_INLINE google_protobuf_ServiceDescriptorProto *google_protobuf_ServiceDescri UPB_INLINE google_protobuf_ServiceDescriptorProto *google_protobuf_ServiceDescriptorProto_parse(const char *buf, size_t size, upb_arena *arena) { google_protobuf_ServiceDescriptorProto *ret = google_protobuf_ServiceDescriptorProto_new(arena); - return (ret && upb_decode(buf, size, ret, &google_protobuf_ServiceDescriptorProto_msginit)) ? ret : NULL; + return (ret && upb_decode(buf, size, ret, &google_protobuf_ServiceDescriptorProto_msginit, arena)) ? ret : NULL; } UPB_INLINE char *google_protobuf_ServiceDescriptorProto_serialize(const google_protobuf_ServiceDescriptorProto *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_ServiceDescriptorProto_msginit, arena, len); @@ -912,7 +912,7 @@ UPB_INLINE google_protobuf_MethodDescriptorProto *google_protobuf_MethodDescript UPB_INLINE google_protobuf_MethodDescriptorProto *google_protobuf_MethodDescriptorProto_parse(const char *buf, size_t size, upb_arena *arena) { google_protobuf_MethodDescriptorProto *ret = google_protobuf_MethodDescriptorProto_new(arena); - return (ret && upb_decode(buf, size, ret, &google_protobuf_MethodDescriptorProto_msginit)) ? ret : NULL; + return (ret && upb_decode(buf, size, ret, &google_protobuf_MethodDescriptorProto_msginit, arena)) ? ret : NULL; } UPB_INLINE char *google_protobuf_MethodDescriptorProto_serialize(const google_protobuf_MethodDescriptorProto *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_MethodDescriptorProto_msginit, arena, len); @@ -973,7 +973,7 @@ UPB_INLINE google_protobuf_FileOptions *google_protobuf_FileOptions_new(upb_aren UPB_INLINE google_protobuf_FileOptions *google_protobuf_FileOptions_parse(const char *buf, size_t size, upb_arena *arena) { google_protobuf_FileOptions *ret = google_protobuf_FileOptions_new(arena); - return (ret && upb_decode(buf, size, ret, &google_protobuf_FileOptions_msginit)) ? ret : NULL; + return (ret && upb_decode(buf, size, ret, &google_protobuf_FileOptions_msginit, arena)) ? ret : NULL; } UPB_INLINE char *google_protobuf_FileOptions_serialize(const google_protobuf_FileOptions *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_FileOptions_msginit, arena, len); @@ -1123,7 +1123,7 @@ UPB_INLINE google_protobuf_MessageOptions *google_protobuf_MessageOptions_new(up UPB_INLINE google_protobuf_MessageOptions *google_protobuf_MessageOptions_parse(const char *buf, size_t size, upb_arena *arena) { google_protobuf_MessageOptions *ret = google_protobuf_MessageOptions_new(arena); - return (ret && upb_decode(buf, size, ret, &google_protobuf_MessageOptions_msginit)) ? ret : NULL; + return (ret && upb_decode(buf, size, ret, &google_protobuf_MessageOptions_msginit, arena)) ? ret : NULL; } UPB_INLINE char *google_protobuf_MessageOptions_serialize(const google_protobuf_MessageOptions *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_MessageOptions_msginit, arena, len); @@ -1177,7 +1177,7 @@ UPB_INLINE google_protobuf_FieldOptions *google_protobuf_FieldOptions_new(upb_ar UPB_INLINE google_protobuf_FieldOptions *google_protobuf_FieldOptions_parse(const char *buf, size_t size, upb_arena *arena) { google_protobuf_FieldOptions *ret = google_protobuf_FieldOptions_new(arena); - return (ret && upb_decode(buf, size, ret, &google_protobuf_FieldOptions_msginit)) ? ret : NULL; + return (ret && upb_decode(buf, size, ret, &google_protobuf_FieldOptions_msginit, arena)) ? ret : NULL; } UPB_INLINE char *google_protobuf_FieldOptions_serialize(const google_protobuf_FieldOptions *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_FieldOptions_msginit, arena, len); @@ -1243,7 +1243,7 @@ UPB_INLINE google_protobuf_OneofOptions *google_protobuf_OneofOptions_new(upb_ar UPB_INLINE google_protobuf_OneofOptions *google_protobuf_OneofOptions_parse(const char *buf, size_t size, upb_arena *arena) { google_protobuf_OneofOptions *ret = google_protobuf_OneofOptions_new(arena); - return (ret && upb_decode(buf, size, ret, &google_protobuf_OneofOptions_msginit)) ? ret : NULL; + return (ret && upb_decode(buf, size, ret, &google_protobuf_OneofOptions_msginit, arena)) ? ret : NULL; } UPB_INLINE char *google_protobuf_OneofOptions_serialize(const google_protobuf_OneofOptions *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_OneofOptions_msginit, arena, len); @@ -1273,7 +1273,7 @@ UPB_INLINE google_protobuf_EnumOptions *google_protobuf_EnumOptions_new(upb_aren UPB_INLINE google_protobuf_EnumOptions *google_protobuf_EnumOptions_parse(const char *buf, size_t size, upb_arena *arena) { google_protobuf_EnumOptions *ret = google_protobuf_EnumOptions_new(arena); - return (ret && upb_decode(buf, size, ret, &google_protobuf_EnumOptions_msginit)) ? ret : NULL; + return (ret && upb_decode(buf, size, ret, &google_protobuf_EnumOptions_msginit, arena)) ? ret : NULL; } UPB_INLINE char *google_protobuf_EnumOptions_serialize(const google_protobuf_EnumOptions *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_EnumOptions_msginit, arena, len); @@ -1315,7 +1315,7 @@ UPB_INLINE google_protobuf_EnumValueOptions *google_protobuf_EnumValueOptions_ne UPB_INLINE google_protobuf_EnumValueOptions *google_protobuf_EnumValueOptions_parse(const char *buf, size_t size, upb_arena *arena) { google_protobuf_EnumValueOptions *ret = google_protobuf_EnumValueOptions_new(arena); - return (ret && upb_decode(buf, size, ret, &google_protobuf_EnumValueOptions_msginit)) ? ret : NULL; + return (ret && upb_decode(buf, size, ret, &google_protobuf_EnumValueOptions_msginit, arena)) ? ret : NULL; } UPB_INLINE char *google_protobuf_EnumValueOptions_serialize(const google_protobuf_EnumValueOptions *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_EnumValueOptions_msginit, arena, len); @@ -1351,7 +1351,7 @@ UPB_INLINE google_protobuf_ServiceOptions *google_protobuf_ServiceOptions_new(up UPB_INLINE google_protobuf_ServiceOptions *google_protobuf_ServiceOptions_parse(const char *buf, size_t size, upb_arena *arena) { google_protobuf_ServiceOptions *ret = google_protobuf_ServiceOptions_new(arena); - return (ret && upb_decode(buf, size, ret, &google_protobuf_ServiceOptions_msginit)) ? ret : NULL; + return (ret && upb_decode(buf, size, ret, &google_protobuf_ServiceOptions_msginit, arena)) ? ret : NULL; } UPB_INLINE char *google_protobuf_ServiceOptions_serialize(const google_protobuf_ServiceOptions *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_ServiceOptions_msginit, arena, len); @@ -1387,7 +1387,7 @@ UPB_INLINE google_protobuf_MethodOptions *google_protobuf_MethodOptions_new(upb_ UPB_INLINE google_protobuf_MethodOptions *google_protobuf_MethodOptions_parse(const char *buf, size_t size, upb_arena *arena) { google_protobuf_MethodOptions *ret = google_protobuf_MethodOptions_new(arena); - return (ret && upb_decode(buf, size, ret, &google_protobuf_MethodOptions_msginit)) ? ret : NULL; + return (ret && upb_decode(buf, size, ret, &google_protobuf_MethodOptions_msginit, arena)) ? ret : NULL; } UPB_INLINE char *google_protobuf_MethodOptions_serialize(const google_protobuf_MethodOptions *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_MethodOptions_msginit, arena, len); @@ -1429,7 +1429,7 @@ UPB_INLINE google_protobuf_UninterpretedOption *google_protobuf_UninterpretedOpt UPB_INLINE google_protobuf_UninterpretedOption *google_protobuf_UninterpretedOption_parse(const char *buf, size_t size, upb_arena *arena) { google_protobuf_UninterpretedOption *ret = google_protobuf_UninterpretedOption_new(arena); - return (ret && upb_decode(buf, size, ret, &google_protobuf_UninterpretedOption_msginit)) ? ret : NULL; + return (ret && upb_decode(buf, size, ret, &google_protobuf_UninterpretedOption_msginit, arena)) ? ret : NULL; } UPB_INLINE char *google_protobuf_UninterpretedOption_serialize(const google_protobuf_UninterpretedOption *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_UninterpretedOption_msginit, arena, len); @@ -1495,7 +1495,7 @@ UPB_INLINE google_protobuf_UninterpretedOption_NamePart *google_protobuf_Uninter UPB_INLINE google_protobuf_UninterpretedOption_NamePart *google_protobuf_UninterpretedOption_NamePart_parse(const char *buf, size_t size, upb_arena *arena) { google_protobuf_UninterpretedOption_NamePart *ret = google_protobuf_UninterpretedOption_NamePart_new(arena); - return (ret && upb_decode(buf, size, ret, &google_protobuf_UninterpretedOption_NamePart_msginit)) ? ret : NULL; + return (ret && upb_decode(buf, size, ret, &google_protobuf_UninterpretedOption_NamePart_msginit, arena)) ? ret : NULL; } UPB_INLINE char *google_protobuf_UninterpretedOption_NamePart_serialize(const google_protobuf_UninterpretedOption_NamePart *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_UninterpretedOption_NamePart_msginit, arena, len); @@ -1523,7 +1523,7 @@ UPB_INLINE google_protobuf_SourceCodeInfo *google_protobuf_SourceCodeInfo_new(up UPB_INLINE google_protobuf_SourceCodeInfo *google_protobuf_SourceCodeInfo_parse(const char *buf, size_t size, upb_arena *arena) { google_protobuf_SourceCodeInfo *ret = google_protobuf_SourceCodeInfo_new(arena); - return (ret && upb_decode(buf, size, ret, &google_protobuf_SourceCodeInfo_msginit)) ? ret : NULL; + return (ret && upb_decode(buf, size, ret, &google_protobuf_SourceCodeInfo_msginit, arena)) ? ret : NULL; } UPB_INLINE char *google_protobuf_SourceCodeInfo_serialize(const google_protobuf_SourceCodeInfo *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_SourceCodeInfo_msginit, arena, len); @@ -1553,7 +1553,7 @@ UPB_INLINE google_protobuf_SourceCodeInfo_Location *google_protobuf_SourceCodeIn UPB_INLINE google_protobuf_SourceCodeInfo_Location *google_protobuf_SourceCodeInfo_Location_parse(const char *buf, size_t size, upb_arena *arena) { google_protobuf_SourceCodeInfo_Location *ret = google_protobuf_SourceCodeInfo_Location_new(arena); - return (ret && upb_decode(buf, size, ret, &google_protobuf_SourceCodeInfo_Location_msginit)) ? ret : NULL; + return (ret && upb_decode(buf, size, ret, &google_protobuf_SourceCodeInfo_Location_msginit, arena)) ? ret : NULL; } UPB_INLINE char *google_protobuf_SourceCodeInfo_Location_serialize(const google_protobuf_SourceCodeInfo_Location *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_SourceCodeInfo_Location_msginit, arena, len); @@ -1614,7 +1614,7 @@ UPB_INLINE google_protobuf_GeneratedCodeInfo *google_protobuf_GeneratedCodeInfo_ UPB_INLINE google_protobuf_GeneratedCodeInfo *google_protobuf_GeneratedCodeInfo_parse(const char *buf, size_t size, upb_arena *arena) { google_protobuf_GeneratedCodeInfo *ret = google_protobuf_GeneratedCodeInfo_new(arena); - return (ret && upb_decode(buf, size, ret, &google_protobuf_GeneratedCodeInfo_msginit)) ? ret : NULL; + return (ret && upb_decode(buf, size, ret, &google_protobuf_GeneratedCodeInfo_msginit, arena)) ? ret : NULL; } UPB_INLINE char *google_protobuf_GeneratedCodeInfo_serialize(const google_protobuf_GeneratedCodeInfo *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_GeneratedCodeInfo_msginit, arena, len); @@ -1644,7 +1644,7 @@ UPB_INLINE google_protobuf_GeneratedCodeInfo_Annotation *google_protobuf_Generat UPB_INLINE google_protobuf_GeneratedCodeInfo_Annotation *google_protobuf_GeneratedCodeInfo_Annotation_parse(const char *buf, size_t size, upb_arena *arena) { google_protobuf_GeneratedCodeInfo_Annotation *ret = google_protobuf_GeneratedCodeInfo_Annotation_new(arena); - return (ret && upb_decode(buf, size, ret, &google_protobuf_GeneratedCodeInfo_Annotation_msginit)) ? ret : NULL; + return (ret && upb_decode(buf, size, ret, &google_protobuf_GeneratedCodeInfo_Annotation_msginit, arena)) ? ret : NULL; } UPB_INLINE char *google_protobuf_GeneratedCodeInfo_Annotation_serialize(const google_protobuf_GeneratedCodeInfo_Annotation *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_GeneratedCodeInfo_Annotation_msginit, arena, len); diff --git a/upb/bindings/lua/upb/pb.c b/upb/bindings/lua/upb/pb.c index b02fa3efef..266bd974c9 100644 --- a/upb/bindings/lua/upb/pb.c +++ b/upb/bindings/lua/upb/pb.c @@ -17,7 +17,7 @@ static int lupb_pb_decode(lua_State *L) { upb_msg *msg = lupb_msg_checkmsg2(L, 1, &layout); const char *pb = lua_tolstring(L, 2, &len); - upb_decode(pb, len, msg, layout); + upb_decode(pb, len, msg, layout, lupb_arena_get(L)); /* TODO(haberman): check for error. */ return 0; diff --git a/upb/decode.c b/upb/decode.c index 5719d12aec..eb3306f98d 100644 --- a/upb/decode.c +++ b/upb/decode.c @@ -150,14 +150,14 @@ static bool upb_skip_unknownfielddata(upb_decstate *d, uint32_t tag, } case UPB_WIRE_TYPE_DELIMITED: { int len; - return upb_decode_string(&d->ptr, d->limit, &len); + CHK(upb_decode_string(&d->ptr, d->limit, &len)); d->ptr += len; + return true; } case UPB_WIRE_TYPE_START_GROUP: return upb_skip_unknowngroup(d, tag >> 3); case UPB_WIRE_TYPE_END_GROUP: - CHK((tag >> 3) == group_fieldnum); - return true; + return (tag >> 3) == group_fieldnum; } return false; } @@ -479,7 +479,7 @@ static bool upb_decode_toarray(upb_decstate *d, upb_decframe *frame, const upb_msglayout *subm; upb_msg *submsg = upb_addmsg(frame, field, &subm); CHK(submsg); - return upb_decode_message(d, submsg, subm); + return upb_decode_msgfield(d, submsg, subm, len); } case UPB_DESCRIPTOR_TYPE_GROUP: return upb_append_unknown(d, frame); @@ -599,6 +599,8 @@ bool upb_decode(const char *buf, size_t size, void *msg, const upb_msglayout *l, state.ptr = buf; state.limit = buf + size; state.arena = arena; + state.depth = 64; + state.parse_status = 0; CHK(upb_decode_message(&state, msg, l)); return state.parse_status == 0; From da4e616860f303da11dd63da1472fa2a49f2160b Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 3 Jun 2019 15:05:30 -0700 Subject: [PATCH 5/6] Removed type from upb_array also. --- upb/bindings/lua/msg.c | 11 +++++--- upb/bindings/lua/upb.h | 1 + upb/decode.c | 50 ++++++++++++++++++------------------- upb/encode.c | 25 ------------------- upb/generated_util.h | 2 +- upb/legacy_msg_reflection.c | 16 +++++------- upb/legacy_msg_reflection.h | 5 ++-- upb/msg.c | 7 ++---- upb/msg.h | 4 +-- 9 files changed, 46 insertions(+), 75 deletions(-) diff --git a/upb/bindings/lua/msg.c b/upb/bindings/lua/msg.c index 4b8a854437..5e769b2f0d 100644 --- a/upb/bindings/lua/msg.c +++ b/upb/bindings/lua/msg.c @@ -129,7 +129,7 @@ char lupb_arena_cache_key; * Callers can be guaranteed that it will be alive as long as |L| is. * TODO(haberman): we shouldn't use a global arena! We should have * one arena for a parse, or per independently-created message. */ -static upb_arena *lupb_arena_get(lua_State *L) { +upb_arena *lupb_arena_get(lua_State *L) { upb_arena *arena; lua_pushlightuserdata(L, &lupb_arena_cache_key); @@ -473,6 +473,7 @@ typedef struct { * case but simplifies the code. Could optimize away if desired. */ const lupb_msgclass *lmsgclass; upb_array *arr; + upb_fieldtype_t type; } lupb_array; #define ARRAY_MSGCLASS_INDEX 0 @@ -540,8 +541,9 @@ static int lupb_array_new(lua_State *L) { } larray = lupb_newuserdata(L, sizeof(*larray), LUPB_ARRAY); + larray->type = type; larray->lmsgclass = lmsgclass; - larray->arr = upb_array_new(type, lupb_arena_get(L)); + larray->arr = upb_array_new(lupb_arena_get(L)); return 1; } @@ -552,7 +554,7 @@ static int lupb_array_newindex(lua_State *L) { uint32_t n = lupb_array_checkindex(L, 2, upb_array_size(larray->arr) + 1); upb_msgval msgval = lupb_tomsgval(L, type, 3, larray->lmsgclass); - upb_array_set(larray->arr, n, msgval); + upb_array_set(larray->arr, larray->type, n, msgval, lupb_arena_get(L)); if (lupb_istypewrapped(type)) { lupb_uservalseti(L, 1, n, 3); @@ -570,7 +572,8 @@ static int lupb_array_index(lua_State *L) { if (lupb_istypewrapped(type)) { lupb_uservalgeti(L, 1, n); } else { - lupb_pushmsgval(L, upb_array_type(array), upb_array_get(array, n)); + lupb_pushmsgval(L, upb_array_type(array), + upb_array_get(array, larray->type, n)); } return 1; diff --git a/upb/bindings/lua/upb.h b/upb/bindings/lua/upb.h index 6861286c75..51d8acf9e4 100644 --- a/upb/bindings/lua/upb.h +++ b/upb/bindings/lua/upb.h @@ -111,6 +111,7 @@ typedef struct lupb_msgclass lupb_msgclass; upb_arena *lupb_arena_check(lua_State *L, int narg); int lupb_arena_new(lua_State *L); +upb_arena *lupb_arena_get(lua_State *L); int lupb_msg_pushref(lua_State *L, int msgclass, void *msg); const upb_msg *lupb_msg_checkmsg(lua_State *L, int narg, const lupb_msgclass *lmsgclass); diff --git a/upb/decode.c b/upb/decode.c index eb3306f98d..559e640a8a 100644 --- a/upb/decode.c +++ b/upb/decode.c @@ -174,13 +174,14 @@ static bool upb_skip_unknowngroup(upb_decstate *d, int field_number) { return true; } -static bool upb_array_grow(upb_array *arr, size_t elements, size_t elem_size) { +static bool upb_array_grow(upb_array *arr, size_t elements, size_t elem_size, + upb_arena *arena) { size_t needed = arr->len + elements; size_t new_size = UPB_MAX(arr->size, 8); size_t new_bytes; size_t old_bytes; void *new_data; - upb_alloc *alloc = upb_arena_alloc(arr->arena); + upb_alloc *alloc = upb_arena_alloc(arena); while (new_size < needed) { new_size *= 2; @@ -197,16 +198,16 @@ static bool upb_array_grow(upb_array *arr, size_t elements, size_t elem_size) { } static void *upb_array_reserve(upb_array *arr, size_t elements, - size_t elem_size) { + size_t elem_size, upb_arena *arena) { if (arr->size - arr->len < elements) { - CHK(upb_array_grow(arr, elements, elem_size)); + CHK(upb_array_grow(arr, elements, elem_size, arena)); } return (char*)arr->data + (arr->len * elem_size); } bool upb_array_add(upb_array *arr, size_t elements, size_t elem_size, - const void *data) { - void *dest = upb_array_reserve(arr, elements, elem_size); + const void *data, upb_arena *arena) { + void *dest = upb_array_reserve(arr, elements, elem_size, arena); CHK(dest); arr->len += elements; @@ -226,8 +227,7 @@ static upb_array *upb_getorcreatearr(upb_decframe *frame, upb_array *arr = upb_getarr(frame, field); if (!arr) { - upb_fieldtype_t type = upb_desctype_to_fieldtype[field->descriptortype]; - arr = upb_array_new(type, frame->state->arena); + arr = upb_array_new(frame->state->arena); CHK(arr); *(upb_array**)&frame->msg[field->offset] = arr; } @@ -260,7 +260,7 @@ static upb_msg *upb_addmsg(upb_decframe *frame, *subm = frame->layout->submsgs[field->submsg_index]; submsg = upb_msg_new(*subm, frame->state->arena); CHK(submsg); - upb_array_add(arr, 1, sizeof(submsg), &submsg); + upb_array_add(arr, 1, sizeof(submsg), &submsg, frame->state->arena); return submsg; } @@ -287,7 +287,7 @@ static bool upb_decode_addval(upb_decframe *frame, if (field->label == UPB_LABEL_REPEATED) { arr = upb_getorcreatearr(frame, field); CHK(arr); - field_mem = upb_array_reserve(arr, 1, size); + field_mem = upb_array_reserve(arr, 1, size, frame->state->arena); CHK(field_mem); } @@ -414,7 +414,7 @@ static bool upb_decode_fixedpacked(upb_decstate *d, upb_array *arr, size_t elements = len / elem_size; CHK((size_t)(elements * elem_size) == len); - CHK(upb_array_add(arr, elements, elem_size, d->ptr)); + CHK(upb_array_add(arr, elements, elem_size, d->ptr, d->arena)); d->ptr += len; return true; @@ -433,26 +433,26 @@ static bool upb_decode_toarray(upb_decstate *d, upb_decframe *frame, upb_array *arr = upb_getorcreatearr(frame, field); CHK(arr); -#define VARINT_CASE(ctype, decode) \ - { \ - const char *ptr = d->ptr; \ - const char *limit = ptr + len; \ - while (ptr < limit) { \ - uint64_t val; \ - ctype decoded; \ - CHK(upb_decode_varint(&ptr, limit, &val)); \ - decoded = (decode)(val); \ - CHK(upb_array_add(arr, 1, sizeof(decoded), &decoded)); \ - } \ - d->ptr = ptr; \ - return true; \ +#define VARINT_CASE(ctype, decode) \ + { \ + const char *ptr = d->ptr; \ + const char *limit = ptr + len; \ + while (ptr < limit) { \ + uint64_t val; \ + ctype decoded; \ + CHK(upb_decode_varint(&ptr, limit, &val)); \ + decoded = (decode)(val); \ + CHK(upb_array_add(arr, 1, sizeof(decoded), &decoded, d->arena)); \ + } \ + d->ptr = ptr; \ + return true; \ } switch (field->descriptortype) { case UPB_DESCRIPTOR_TYPE_STRING: case UPB_DESCRIPTOR_TYPE_BYTES: { upb_strview str = upb_decode_strfield(d, len); - return upb_array_add(arr, 1, sizeof(str), &str); + return upb_array_add(arr, 1, sizeof(str), &str, d->arena); } case UPB_DESCRIPTOR_TYPE_FLOAT: case UPB_DESCRIPTOR_TYPE_FIXED32: diff --git a/upb/encode.c b/upb/encode.c index 576d23893b..0f1580374a 100644 --- a/upb/encode.c +++ b/upb/encode.c @@ -12,29 +12,6 @@ #define UPB_PB_VARINT_MAX_LEN 10 #define CHK(x) do { if (!(x)) { return false; } } while(0) -/* Maps descriptor type -> upb field type. */ -static const uint8_t upb_desctype_to_fieldtype2[] = { - UPB_WIRE_TYPE_END_GROUP, /* ENDGROUP */ - UPB_TYPE_DOUBLE, /* DOUBLE */ - UPB_TYPE_FLOAT, /* FLOAT */ - UPB_TYPE_INT64, /* INT64 */ - UPB_TYPE_UINT64, /* UINT64 */ - UPB_TYPE_INT32, /* INT32 */ - UPB_TYPE_UINT64, /* FIXED64 */ - UPB_TYPE_UINT32, /* FIXED32 */ - UPB_TYPE_BOOL, /* BOOL */ - UPB_TYPE_STRING, /* STRING */ - UPB_TYPE_MESSAGE, /* GROUP */ - UPB_TYPE_MESSAGE, /* MESSAGE */ - UPB_TYPE_BYTES, /* BYTES */ - UPB_TYPE_UINT32, /* UINT32 */ - UPB_TYPE_ENUM, /* ENUM */ - UPB_TYPE_INT32, /* SFIXED32 */ - UPB_TYPE_INT64, /* SFIXED64 */ - UPB_TYPE_INT32, /* SINT32 */ - UPB_TYPE_INT64, /* SINT64 */ -}; - static size_t upb_encode_varint(uint64_t val, char *buf) { size_t i; if (val < 128) { buf[0] = val; return 1; } @@ -166,8 +143,6 @@ static bool upb_encode_array(upb_encstate *e, const char *field_mem, return true; } - UPB_ASSERT(arr->type == upb_desctype_to_fieldtype2[f->descriptortype]); - #define VARINT_CASE(ctype, encode) { \ ctype *start = arr->data; \ ctype *ptr = start + arr->len; \ diff --git a/upb/generated_util.h b/upb/generated_util.h index 314a6d5b04..a0b3e8df4c 100644 --- a/upb/generated_util.h +++ b/upb/generated_util.h @@ -46,7 +46,7 @@ UPB_INLINE void *_upb_array_resize_accessor(void *msg, size_t ofs, size_t size, upb_array *arr = *PTR_AT(msg, ofs, upb_array*); if (!arr) { - arr = upb_array_new(type, arena); + arr = upb_array_new(arena); if (!arr) return NULL; *PTR_AT(msg, ofs, upb_array*) = arr; } diff --git a/upb/legacy_msg_reflection.c b/upb/legacy_msg_reflection.c index 0683dde545..81a878bb54 100644 --- a/upb/legacy_msg_reflection.c +++ b/upb/legacy_msg_reflection.c @@ -167,18 +167,15 @@ size_t upb_array_size(const upb_array *arr) { return arr->len; } -upb_fieldtype_t upb_array_type(const upb_array *arr) { - return arr->type; -} - -upb_msgval upb_array_get(const upb_array *arr, size_t i) { - size_t element_size = upb_msgval_sizeof(arr->type); +upb_msgval upb_array_get(const upb_array *arr, upb_fieldtype_t type, size_t i) { + size_t element_size = upb_msgval_sizeof(type); UPB_ASSERT(i < arr->len); return upb_msgval_read(arr->data, i * element_size, element_size); } -bool upb_array_set(upb_array *arr, size_t i, upb_msgval val) { - size_t element_size = upb_msgval_sizeof(arr->type); +bool upb_array_set(upb_array *arr, upb_fieldtype_t type, size_t i, + upb_msgval val, upb_arena *arena) { + size_t element_size = upb_msgval_sizeof(type); UPB_ASSERT(i <= arr->len); if (i == arr->len) { @@ -189,7 +186,7 @@ bool upb_array_set(upb_array *arr, size_t i, upb_msgval val) { size_t new_size = UPB_MAX(arr->size * 2, 8); size_t new_bytes = new_size * element_size; size_t old_bytes = arr->size * element_size; - upb_alloc *alloc = upb_arena_alloc(arr->arena); + upb_alloc *alloc = upb_arena_alloc(arena); upb_msgval *new_data = upb_realloc(alloc, arr->data, old_bytes, new_bytes); @@ -208,7 +205,6 @@ bool upb_array_set(upb_array *arr, size_t i, upb_msgval val) { return true; } - /** upb_map *******************************************************************/ struct upb_map { diff --git a/upb/legacy_msg_reflection.h b/upb/legacy_msg_reflection.h index 32a621b4b7..b33ec6804f 100644 --- a/upb/legacy_msg_reflection.h +++ b/upb/legacy_msg_reflection.h @@ -126,12 +126,13 @@ upb_fieldtype_t upb_array_type(const upb_array *arr); /* Read-only interface. Safe for anyone to call. */ size_t upb_array_size(const upb_array *arr); -upb_msgval upb_array_get(const upb_array *arr, size_t i); +upb_msgval upb_array_get(const upb_array *arr, upb_fieldtype_t type, size_t i); /* Write interface. May only be called by the message's owner who can enforce * its memory management invariants. */ -bool upb_array_set(upb_array *arr, size_t i, upb_msgval val); +bool upb_array_set(upb_array *arr, upb_fieldtype_t type, size_t i, + upb_msgval val, upb_arena *arena); /** upb_map *******************************************************************/ diff --git a/upb/msg.c b/upb/msg.c index 31607b75e8..93d89a5afa 100644 --- a/upb/msg.c +++ b/upb/msg.c @@ -74,19 +74,16 @@ upb_msg *upb_msg_new(const upb_msglayout *l, upb_arena *a) { return msg; } -upb_array *upb_array_new(upb_fieldtype_t type, upb_arena *a) { - upb_alloc *alloc = upb_arena_alloc(a); - upb_array *ret = upb_malloc(alloc, sizeof(upb_array)); +upb_array *upb_array_new(upb_arena *a) { + upb_array *ret = upb_arena_malloc(a, sizeof(upb_array)); if (!ret) { return NULL; } - ret->type = type; ret->data = NULL; ret->len = 0; ret->size = 0; - ret->arena = a; return ret; } diff --git a/upb/msg.h b/upb/msg.h index 7f1a6e7b7e..4bec023bd3 100644 --- a/upb/msg.h +++ b/upb/msg.h @@ -48,11 +48,9 @@ typedef struct upb_msglayout { /* Our internal representation for repeated fields. */ typedef struct { - upb_fieldtype_t type; void *data; /* Each element is element_size. */ size_t len; /* Measured in elements. */ size_t size; /* Measured in elements. */ - upb_arena *arena; } upb_array; upb_msg *upb_msg_new(const upb_msglayout *l, upb_arena *a); @@ -62,7 +60,7 @@ void upb_msg_addunknown(upb_msg *msg, const char *data, size_t len, upb_arena *arena); const char *upb_msg_getunknown(const upb_msg *msg, size_t *len); -upb_array *upb_array_new(upb_fieldtype_t type, upb_arena *a); +upb_array *upb_array_new(upb_arena *a); #ifdef __cplusplus } /* extern "C" */ From 9960d7c772381fef7a51097091faba71459e7169 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 3 Jun 2019 15:08:45 -0700 Subject: [PATCH 6/6] Renamed end_group field number and updated documentation. --- upb/decode.c | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/upb/decode.c b/upb/decode.c index 559e640a8a..5ba1f05d77 100644 --- a/upb/decode.c +++ b/upb/decode.c @@ -33,17 +33,9 @@ typedef struct { const char *ptr; /* Current parsing position. */ const char *field_start; /* Start of this field. */ const char *limit; /* End of delimited region or end of buffer. */ - int depth; - - /* Signals how the parse ended: - * - when 0: parse ended at delimited limit. - * - when 1: parse ended due to end-of-stream. - * - otherwise: parse ended due to a terminating tag (either 0 or END_GROUP). - * - * In the last case, tag-1 is stored, to avoid conflicting with case 0. */ - uint32_t parse_status; - upb_arena *arena; + int depth; + uint32_t end_group; /* Set to field number of END_GROUP tag, if any. */ } upb_decstate; /* Data passed by value to each parsing function. */ @@ -163,14 +155,14 @@ static bool upb_skip_unknownfielddata(upb_decstate *d, uint32_t tag, } static bool upb_skip_unknowngroup(upb_decstate *d, int field_number) { - while (d->ptr < d->limit && d->parse_status == 0) { + while (d->ptr < d->limit && d->end_group == 0) { uint32_t tag = 0; CHK(upb_decode_varint32(&d->ptr, d->limit, &tag)); CHK(upb_skip_unknownfielddata(d, tag, field_number)); } - CHK(d->parse_status == field_number); - d->parse_status = 0; + CHK(d->end_group == field_number); + d->end_group = 0; return true; } @@ -316,7 +308,7 @@ static bool upb_decode_msgfield(upb_decstate *d, upb_msg *msg, upb_decode_message(d, msg, layout); d->depth++; d->limit = saved_limit; - CHK(d->parse_status == 0); + CHK(d->end_group == 0); return true; } @@ -326,8 +318,8 @@ static bool upb_decode_groupfield(upb_decstate *d, upb_msg *msg, CHK(--d->depth >= 0); upb_decode_message(d, msg, layout); d->depth++; - CHK(d->parse_status == field_number); - d->parse_status = 0; + CHK(d->end_group == field_number); + d->end_group = 0; return true; } @@ -567,7 +559,7 @@ static bool upb_decode_field(upb_decstate *d, upb_decframe *frame) { return upb_decode_groupfield(d, group, layout, field_number); } case UPB_WIRE_TYPE_END_GROUP: - d->parse_status = field_number; + d->end_group = field_number; return true; default: CHK(false); @@ -600,10 +592,10 @@ bool upb_decode(const char *buf, size_t size, void *msg, const upb_msglayout *l, state.limit = buf + size; state.arena = arena; state.depth = 64; - state.parse_status = 0; + state.end_group = 0; CHK(upb_decode_message(&state, msg, l)); - return state.parse_status == 0; + return state.end_group == 0; } #undef CHK