Repeated string and primitive support.

Much of the code was adapted from Gerben's code in:
6333031195
pull/13171/head
Joshua Haberman 4 years ago
parent e9103eda9e
commit e2c709e047
  1. 14
      cmake/google/protobuf/descriptor.upb.c
  2. 6
      tests/test_generated_code.c
  3. 1
      upb/decode.int.h
  4. 581
      upb/decode_fast.c
  5. 32
      upb/decode_fast.h
  6. 6
      upb/upb.c
  7. 8
      upb/upb.h
  8. 17
      upbc/generator.cc

@ -89,15 +89,15 @@ const upb_msglayout google_protobuf_FileDescriptorProto_msginit = {
{&fastdecode_generic, UPB_SIZE(0, 0)}, {&fastdecode_generic, UPB_SIZE(0, 0)},
{&upb_pss_1bt, UPB_SIZE(1125899906973706, 2251799813816330)}, {&upb_pss_1bt, UPB_SIZE(1125899906973706, 2251799813816330)},
{&upb_pss_1bt, UPB_SIZE(3377699720790034, 6755399441317906)}, {&upb_pss_1bt, UPB_SIZE(3377699720790034, 6755399441317906)},
{&fastdecode_generic, UPB_SIZE(0, 0)}, {&upb_prs_1bt, UPB_SIZE(10133099161583642, 20266198323167258)},
{&upb_prm_1bt_max128b, UPB_SIZE(11258999068426274, 22517998136852514)}, {&upb_prm_1bt_max128b, UPB_SIZE(11258999068426274, 22517998136852514)},
{&upb_prm_1bt_max128b, UPB_SIZE(12384898975334442, 24769797950603306)}, {&upb_prm_1bt_max128b, UPB_SIZE(12384898975334442, 24769797950603306)},
{&upb_prm_1bt_max64b, UPB_SIZE(13510798882373682, 27021597764485170)}, {&upb_prm_1bt_max64b, UPB_SIZE(13510798882373682, 27021597764485170)},
{&upb_prm_1bt_max128b, UPB_SIZE(14636698789085242, 29273397578039354)}, {&upb_prm_1bt_max128b, UPB_SIZE(14636698789085242, 29273397578039354)},
{&upb_psm_1bt_max256b, UPB_SIZE(7881385247440962, 15762684595339330)}, {&upb_psm_1bt_max256b, UPB_SIZE(7881385247440962, 15762684595339330)},
{&upb_psm_1bt_max64b, UPB_SIZE(9007289449381962, 18014488704122954)}, {&upb_psm_1bt_max64b, UPB_SIZE(9007289449381962, 18014488704122954)},
{&fastdecode_generic, UPB_SIZE(0, 0)}, {&upb_prv4_1bt, UPB_SIZE(15762598695796816, 31525197391593552)},
{&fastdecode_generic, UPB_SIZE(0, 0)}, {&upb_prv4_1bt, UPB_SIZE(16888498602639448, 33776997205278808)},
{&upb_pss_1bt, UPB_SIZE(5629499534737506, 11258999068950626)}, {&upb_pss_1bt, UPB_SIZE(5629499534737506, 11258999068950626)},
{&fastdecode_generic, UPB_SIZE(0, 0)}, {&fastdecode_generic, UPB_SIZE(0, 0)},
{&fastdecode_generic, UPB_SIZE(0, 0)}, {&fastdecode_generic, UPB_SIZE(0, 0)},
@ -159,7 +159,7 @@ const upb_msglayout google_protobuf_DescriptorProto_msginit = {
{&upb_psm_1bt_max64b, UPB_SIZE(3377777030266938, 6755476750794810)}, {&upb_psm_1bt_max64b, UPB_SIZE(3377777030266938, 6755476750794810)},
{&upb_prm_1bt_max64b, UPB_SIZE(10133099161976898, 20266198323560514)}, {&upb_prm_1bt_max64b, UPB_SIZE(10133099161976898, 20266198323560514)},
{&upb_prm_1bt_max64b, UPB_SIZE(11258999068557386, 22517998136983626)}, {&upb_prm_1bt_max64b, UPB_SIZE(11258999068557386, 22517998136983626)},
{&fastdecode_generic, UPB_SIZE(0, 0)}, {&upb_prs_1bt, UPB_SIZE(12384898975268946, 24769797950537810)},
{&fastdecode_generic, UPB_SIZE(0, 0)}, {&fastdecode_generic, UPB_SIZE(0, 0)},
{&fastdecode_generic, UPB_SIZE(0, 0)}, {&fastdecode_generic, UPB_SIZE(0, 0)},
{&fastdecode_generic, UPB_SIZE(0, 0)}, {&fastdecode_generic, UPB_SIZE(0, 0)},
@ -458,7 +458,7 @@ const upb_msglayout google_protobuf_EnumDescriptorProto_msginit = {
{&upb_prm_1bt_max64b, UPB_SIZE(4503599627501586, 9007199254872082)}, {&upb_prm_1bt_max64b, UPB_SIZE(4503599627501586, 9007199254872082)},
{&upb_psm_1bt_max64b, UPB_SIZE(3377777030004762, 6755476750532634)}, {&upb_psm_1bt_max64b, UPB_SIZE(3377777030004762, 6755476750532634)},
{&upb_prm_1bt_max64b, UPB_SIZE(5629499534213154, 11258999068426274)}, {&upb_prm_1bt_max64b, UPB_SIZE(5629499534213154, 11258999068426274)},
{&fastdecode_generic, UPB_SIZE(0, 0)}, {&upb_prs_1bt, UPB_SIZE(6755399441055786, 13510798882111530)},
{&fastdecode_generic, UPB_SIZE(0, 0)}, {&fastdecode_generic, UPB_SIZE(0, 0)},
{&fastdecode_generic, UPB_SIZE(0, 0)}, {&fastdecode_generic, UPB_SIZE(0, 0)},
{&fastdecode_generic, UPB_SIZE(0, 0)}, {&fastdecode_generic, UPB_SIZE(0, 0)},
@ -1132,7 +1132,7 @@ const upb_msglayout google_protobuf_UninterpretedOption_msginit = {
{&upb_pss_1bt, UPB_SIZE(9007199255789594, 9007199255789594)}, {&upb_pss_1bt, UPB_SIZE(9007199255789594, 9007199255789594)},
{&upb_psv8_1bt, UPB_SIZE(2251799813816352, 2251799813816352)}, {&upb_psv8_1bt, UPB_SIZE(2251799813816352, 2251799813816352)},
{&upb_psv8_1bt, UPB_SIZE(4503599627632680, 4503599627632680)}, {&upb_psv8_1bt, UPB_SIZE(4503599627632680, 4503599627632680)},
{&fastdecode_generic, UPB_SIZE(0, 0)}, {&upb_psf8_1bt, UPB_SIZE(6755399441580080, 6755399441580080)},
{&upb_pss_1bt, UPB_SIZE(11258999070523450, 13510798884208698)}, {&upb_pss_1bt, UPB_SIZE(11258999070523450, 13510798884208698)},
{&upb_pss_1bt, UPB_SIZE(13510798886305858, 18014398513676354)}, {&upb_pss_1bt, UPB_SIZE(13510798886305858, 18014398513676354)},
{&fastdecode_generic, UPB_SIZE(0, 0)}, {&fastdecode_generic, UPB_SIZE(0, 0)},
@ -1273,7 +1273,7 @@ const upb_msglayout google_protobuf_SourceCodeInfo_Location_msginit = {
{&upb_pss_1bt, UPB_SIZE(1125899906973722, 2251799813816346)}, {&upb_pss_1bt, UPB_SIZE(1125899906973722, 2251799813816346)},
{&upb_pss_1bt, UPB_SIZE(3377699720790050, 6755399441317922)}, {&upb_pss_1bt, UPB_SIZE(3377699720790050, 6755399441317922)},
{&fastdecode_generic, UPB_SIZE(0, 0)}, {&fastdecode_generic, UPB_SIZE(0, 0)},
{&fastdecode_generic, UPB_SIZE(0, 0)}, {&upb_prs_1bt, UPB_SIZE(7881299347898418, 15762598695796786)},
{&fastdecode_generic, UPB_SIZE(0, 0)}, {&fastdecode_generic, UPB_SIZE(0, 0)},
{&fastdecode_generic, UPB_SIZE(0, 0)}, {&fastdecode_generic, UPB_SIZE(0, 0)},
{&fastdecode_generic, UPB_SIZE(0, 0)}, {&fastdecode_generic, UPB_SIZE(0, 0)},

@ -30,6 +30,7 @@ static void test_scalars() {
protobuf_test_messages_proto3_TestAllTypesProto3_new(arena); protobuf_test_messages_proto3_TestAllTypesProto3_new(arena);
protobuf_test_messages_proto3_TestAllTypesProto3 *msg2; protobuf_test_messages_proto3_TestAllTypesProto3 *msg2;
upb_strview serialized; upb_strview serialized;
upb_strview val;
protobuf_test_messages_proto3_TestAllTypesProto3_set_optional_int32(msg, 10); protobuf_test_messages_proto3_TestAllTypesProto3_set_optional_int32(msg, 10);
protobuf_test_messages_proto3_TestAllTypesProto3_set_optional_int64(msg, 20); protobuf_test_messages_proto3_TestAllTypesProto3_set_optional_int64(msg, 20);
@ -61,9 +62,8 @@ static void test_scalars() {
msg2) == 60.6); msg2) == 60.6);
ASSERT(protobuf_test_messages_proto3_TestAllTypesProto3_optional_bool( ASSERT(protobuf_test_messages_proto3_TestAllTypesProto3_optional_bool(
msg2) == 1); msg2) == 1);
ASSERT(upb_strview_eql( val = protobuf_test_messages_proto3_TestAllTypesProto3_optional_string(msg2);
protobuf_test_messages_proto3_TestAllTypesProto3_optional_string(msg2), ASSERT(upb_strview_eql(val, test_str_view));
test_str_view));
upb_arena_free(arena); upb_arena_free(arena);
} }

@ -58,6 +58,7 @@ const char *decode_isdonefallback_inl(upb_decstate *d, const char *ptr,
UPB_ASSERT(ptr < d->limit_ptr); UPB_ASSERT(ptr < d->limit_ptr);
return ptr; return ptr;
} else { } else {
/* Parse error: we read past our limit. */
return NULL; return NULL;
} }
} }

@ -29,25 +29,6 @@ typedef enum {
CARD_r = 2 /* Repeated */ CARD_r = 2 /* Repeated */
} upb_card; } upb_card;
UPB_INLINE
upb_msg *decode_newmsg_ceil(upb_decstate *d, const upb_msglayout *l,
int msg_ceil_bytes) {
size_t size = l->size + sizeof(upb_msg_internal);
char *msg_data;
if (UPB_LIKELY(msg_ceil_bytes > 0 && _upb_arenahas(&d->arena, msg_ceil_bytes))) {
UPB_ASSERT(size <= (size_t)msg_ceil_bytes);
msg_data = d->arena.head.ptr;
d->arena.head.ptr += size;
UPB_UNPOISON_MEMORY_REGION(msg_data, msg_ceil_bytes);
memset(msg_data, 0, msg_ceil_bytes);
UPB_POISON_MEMORY_REGION(msg_data + size, msg_ceil_bytes - size);
} else {
msg_data = (char*)upb_arena_malloc(&d->arena, size);
memset(msg_data, 0, size);
}
return msg_data + sizeof(upb_msg_internal);
}
UPB_FORCEINLINE UPB_FORCEINLINE
static const char *fastdecode_tagdispatch(upb_decstate *d, const char *ptr, static const char *fastdecode_tagdispatch(upb_decstate *d, const char *ptr,
upb_msg *msg, upb_msg *msg,
@ -113,12 +94,105 @@ static bool fastdecode_checktag(uint64_t data, int tagbytes) {
} }
} }
UPB_FORCEINLINE
static const char *fastdecode_longsize(const char *ptr, int *size) {
UPB_ASSERT(*size & 0x80);
*size &= 0xff;
for (int i = 0; i < 3; i++) {
ptr++;
size_t byte = (uint8_t)ptr[-1];
*size += (byte - 1) << (7 + 7 * i);
if (UPB_LIKELY((byte & 0x80) == 0)) return ptr;
}
ptr++;
size_t byte = (uint8_t)ptr[-1];
// len is limited by 2gb not 4gb, hence 8 and not 16 as normally expected
// for a 32 bit varint.
if (UPB_UNLIKELY(byte >= 8)) return NULL;
*size += (byte - 1) << 28;
return ptr;
}
/* singular, oneof, repeated field handling ***********************************/
typedef struct {
upb_array *arr;
void *end;
} fastdecode_arr;
typedef enum {
FD_NEXT_ATLIMIT,
FD_NEXT_SAMEFIELD,
FD_NEXT_OTHERFIELD
} fastdecode_next;
typedef struct {
void *dst;
fastdecode_next next;
uint32_t tag;
} fastdecode_nextret;
UPB_FORCEINLINE
static void *fastdecode_resizearr(upb_decstate *d, void *dst,
fastdecode_arr *farr, int valbytes) {
if (UPB_UNLIKELY(dst == farr->end)) {
size_t old_size = farr->arr->size;
size_t old_bytes = old_size * valbytes;
size_t new_size = old_size * 2;
size_t new_bytes = new_size * valbytes;
char *old_ptr = _upb_array_ptr(farr->arr);
char *new_ptr = upb_arena_realloc(&d->arena, old_ptr, old_bytes, new_bytes);
farr->arr->size = new_size;
farr->arr->data = _upb_array_tagptr(new_ptr, 3);
dst = (void*)(new_ptr + (old_size * valbytes));
farr->end = (void*)(new_ptr + (new_size * valbytes));
}
return dst;
}
UPB_FORCEINLINE
static bool fastdecode_tagmatch(uint32_t tag, uint64_t data, int tagbytes) {
if (tagbytes == 1) {
return (uint8_t)tag == (uint8_t)data;
} else {
return (uint16_t)tag == (uint16_t)data;
}
}
UPB_FORCEINLINE
static fastdecode_nextret fastdecode_nextrepeated(upb_decstate *d, void *dst,
const char **ptr,
fastdecode_arr *farr,
uint64_t data, int tagbytes,
int valbytes) {
fastdecode_nextret ret;
dst = (char *)dst + valbytes;
if (UPB_LIKELY(!decode_isdone(d, ptr))) {
ret.tag = fastdecode_loadtag(*ptr);
if (fastdecode_tagmatch(ret.tag, data, tagbytes)) {
ret.next = FD_NEXT_SAMEFIELD;
} else {
farr->arr->len =
(size_t)((char *)dst - (char *)_upb_array_ptr(farr->arr)) / valbytes;
ret.next = FD_NEXT_OTHERFIELD;
}
} else {
farr->arr->len =
(size_t)((char *)dst - (char *)_upb_array_ptr(farr->arr)) / valbytes;
ret.next = FD_NEXT_ATLIMIT;
}
ret.dst = dst;
return ret;
}
UPB_FORCEINLINE UPB_FORCEINLINE
static void *fastdecode_getfield_ofs(upb_decstate *d, const char *ptr, static void *fastdecode_getfield_ofs(upb_decstate *d, const char *ptr,
upb_msg *msg, uint64_t *data, upb_msg *msg, uint64_t *data,
uint64_t *hasbits, upb_array **outarr, uint64_t *hasbits, fastdecode_arr *farr,
void **end, int valbytes, int valbytes, upb_card card,
upb_card card, bool hasbit_is_idx) { bool hasbit_is_idx) {
size_t ofs = *data >> 48; size_t ofs = *data >> 48;
void *field = (char *)msg + ofs; void *field = (char *)msg + ofs;
@ -135,22 +209,20 @@ static void *fastdecode_getfield_ofs(upb_decstate *d, const char *ptr,
// Get pointer to upb_array and allocate/expand if necessary. // Get pointer to upb_array and allocate/expand if necessary.
uint8_t elem_size_lg2 = __builtin_ctz(valbytes); uint8_t elem_size_lg2 = __builtin_ctz(valbytes);
upb_array **arr_p = field; upb_array **arr_p = field;
upb_array *arr;
char *begin; char *begin;
*hasbits >>= 16; *hasbits >>= 16;
*(uint32_t*)msg |= *hasbits; *(uint32_t*)msg |= *hasbits;
*hasbits = 0; *hasbits = 0;
if (UPB_LIKELY(!*arr_p)) { if (UPB_LIKELY(!*arr_p)) {
arr = _upb_array_new(&d->arena, 8, elem_size_lg2); farr->arr = _upb_array_new(&d->arena, 8, elem_size_lg2);
*arr_p = arr; *arr_p = farr->arr;
} else { } else {
arr = *arr_p; farr->arr = *arr_p;
} }
begin = _upb_array_ptr(arr); begin = _upb_array_ptr(farr->arr);
field = begin + (arr->len * valbytes); field = begin + (farr->arr->len * valbytes);
*end = begin + (arr->size * valbytes); farr->end = begin + (farr->arr->size * valbytes);
*data = fastdecode_loadtag(ptr); *data = fastdecode_loadtag(ptr);
*outarr = arr;
return field; return field;
} }
default: default:
@ -158,14 +230,6 @@ static void *fastdecode_getfield_ofs(upb_decstate *d, const char *ptr,
} }
} }
UPB_FORCEINLINE
static void *fastdecode_getfield(upb_decstate *d, const char *ptr, upb_msg *msg,
uint64_t *data, uint64_t *hasbits,
int valbytes, upb_card card) {
return fastdecode_getfield_ofs(d, ptr, msg, data, hasbits, NULL, NULL,
valbytes, card, false);
}
/* varint fields **************************************************************/ /* varint fields **************************************************************/
UPB_FORCEINLINE UPB_FORCEINLINE
@ -189,11 +253,25 @@ static const char *fastdecode_varint(UPB_PARSE_PARAMS, int tagbytes,
int valbytes, upb_card card, bool zigzag) { int valbytes, upb_card card, bool zigzag) {
uint64_t val; uint64_t val;
void *dst; void *dst;
fastdecode_arr farr;
if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) {
RETURN_GENERIC("varint field tag mismatch\n"); RETURN_GENERIC("varint field tag mismatch\n");
} }
dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, valbytes,
card); dst = fastdecode_getfield_ofs(d, ptr, msg, &data, &hasbits, &farr, valbytes,
card, false);
if (card == CARD_r) {
if (UPB_UNLIKELY(!dst)) {
RETURN_GENERIC("need array resize\n");
}
}
again:
if (card == CARD_r) {
dst = fastdecode_resizearr(d, dst, &farr, valbytes);
}
ptr += tagbytes + 1; ptr += tagbytes + 1;
val = (uint8_t)ptr[-1]; val = (uint8_t)ptr[-1];
if (UPB_UNLIKELY(val & 0x80)) { if (UPB_UNLIKELY(val & 0x80)) {
@ -206,12 +284,30 @@ static const char *fastdecode_varint(UPB_PARSE_PARAMS, int tagbytes,
} }
ptr++; ptr++;
uint64_t byte = (uint8_t)ptr[-1]; uint64_t byte = (uint8_t)ptr[-1];
if (byte > 1) return fastdecode_err(d); if (byte > 1) {
return fastdecode_err(d);
}
val += (byte - 1) << 63; val += (byte - 1) << 63;
} }
done: done:
val = fastdecode_munge(val, valbytes, zigzag); val = fastdecode_munge(val, valbytes, zigzag);
memcpy(dst, &val, valbytes); memcpy(dst, &val, valbytes);
if (card == CARD_r) {
fastdecode_nextret ret =
fastdecode_nextrepeated(d, dst, &ptr, &farr, data, tagbytes, valbytes);
switch (ret.next) {
case FD_NEXT_SAMEFIELD:
dst = ret.dst;
goto again;
case FD_NEXT_OTHERFIELD:
return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag);
case FD_NEXT_ATLIMIT:
return ptr;
}
}
return fastdecode_dispatch(d, ptr, msg, table, hasbits); return fastdecode_dispatch(d, ptr, msg, table, hasbits);
} }
@ -219,7 +315,7 @@ done:
#define b_ZZ false #define b_ZZ false
#define v_ZZ false #define v_ZZ false
/* Generate all varint functions. /* Generate all combinations:
* {s,o,r} x {b1,v4,z4,v8,z8} x {1bt,2bt} */ * {s,o,r} x {b1,v4,z4,v8,z8} x {1bt,2bt} */
#define F(card, type, valbytes, tagbytes) \ #define F(card, type, valbytes, tagbytes) \
@ -241,7 +337,7 @@ done:
TAGBYTES(s) TAGBYTES(s)
TAGBYTES(o) TAGBYTES(o)
/* TAGBYTES(r) */ TAGBYTES(r)
#undef z_ZZ #undef z_ZZ
#undef b_ZZ #undef b_ZZ
@ -253,6 +349,78 @@ TAGBYTES(o)
#undef TYPES #undef TYPES
#undef TAGBYTES #undef TAGBYTES
/* fixed fields ***************************************************************/
UPB_FORCEINLINE
static const char *fastdecode_fixed(UPB_PARSE_PARAMS, int tagbytes,
int valbytes, upb_card card) {
void *dst;
fastdecode_arr farr;
if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) {
RETURN_GENERIC("fixed field tag mismatch\n");
}
dst = fastdecode_getfield_ofs(d, ptr, msg, &data, &hasbits, &farr, valbytes,
card, false);
if (card == CARD_r) {
if (UPB_UNLIKELY(!dst)) {
RETURN_GENERIC("couldn't allocate array in arena\n");
}
}
again:
if (card == CARD_r) {
dst = fastdecode_resizearr(d, dst, &farr, valbytes);
}
ptr += tagbytes;
memcpy(dst, ptr, valbytes);
ptr += valbytes;
if (card == CARD_r) {
fastdecode_nextret ret = fastdecode_nextrepeated(
d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_msg *));
switch (ret.next) {
case FD_NEXT_SAMEFIELD:
dst = ret.dst;
goto again;
case FD_NEXT_OTHERFIELD:
return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag);
case FD_NEXT_ATLIMIT:
return ptr;
}
}
return fastdecode_dispatch(d, ptr, msg, table, hasbits);
}
/* Generate all combinations:
* {s,o,r} x {f4,f8} x {1bt,2bt} */
#define F(card, valbytes, tagbytes) \
const char *upb_p##card##f##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \
return fastdecode_fixed(UPB_PARSE_ARGS, tagbytes, valbytes, CARD_##card); \
}
#define TYPES(card, tagbytes) \
F(card, 4, tagbytes) \
F(card, 8, tagbytes)
#define TAGBYTES(card) \
TYPES(card, 1) \
TYPES(card, 2)
TAGBYTES(s)
TAGBYTES(o)
TAGBYTES(r)
#undef F
#undef TYPES
#undef TAGBYTES
/* string fields **************************************************************/ /* string fields **************************************************************/
UPB_FORCEINLINE UPB_FORCEINLINE
@ -269,110 +437,230 @@ typedef const char *fastdecode_copystr_func(struct upb_decstate *d,
const upb_msglayout *table, const upb_msglayout *table,
uint64_t hasbits, upb_strview *dst); uint64_t hasbits, upb_strview *dst);
UPB_FORCEINLINE UPB_NOINLINE
static const char *fastdecode_copystring(struct upb_decstate *d, static const char *fastdecode_longstring(struct upb_decstate *d,
const char *ptr, upb_msg *msg, const char *ptr, upb_msg *msg,
const upb_msglayout *table, const upb_msglayout *table,
uint64_t hasbits, upb_strview *dst, uint64_t hasbits, upb_strview *dst) {
int tagbytes) { int size = (uint8_t)ptr[0]; // Could plumb through hasbits.
int64_t len = (int8_t)*ptr;
ptr++; ptr++;
if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, len, d->limit_ptr))) { if (size & 0x80) {
ptr -= tagbytes + 1; ptr = fastdecode_longsize(ptr, &size);
RETURN_GENERIC("string field len >1 byte\n");
} }
char *data = upb_arena_malloc(&d->arena, len);
if (!data) { if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->limit_ptr))) {
dst->size = 0;
return fastdecode_err(d); return fastdecode_err(d);
} }
memcpy(data, ptr, len);
if (d->alias) {
dst->data = ptr;
dst->size = size;
} else {
char *data = upb_arena_malloc(&d->arena, size);
if (!data) {
return fastdecode_err(d);
}
memcpy(data, ptr, size);
dst->data = data;
dst->size = size;
}
return fastdecode_dispatch(d, ptr + size, msg, table, hasbits);
}
UPB_FORCEINLINE
static void fastdecode_docopy(upb_decstate *d, const char *ptr, uint32_t size,
int copy, char *data, upb_strview *dst) {
UPB_UNPOISON_MEMORY_REGION(data, copy);
memcpy(data, ptr, copy);
UPB_POISON_MEMORY_REGION(data + size, copy - size);
dst->data = data; dst->data = data;
dst->size = len; d->arena.head.ptr += copy;
return fastdecode_dispatch(d, ptr + len, msg, table, hasbits); }
UPB_FORCEINLINE
static const char *fastdecode_copystring(UPB_PARSE_PARAMS, int tagbytes,
upb_card card) {
upb_strview *dst;
fastdecode_arr farr;
int64_t size;
size_t arena_has;
size_t common_has;
char *buf;
UPB_ASSERT(!d->alias);
UPB_ASSERT(fastdecode_checktag(data, tagbytes));
dst = fastdecode_getfield_ofs(d, ptr, msg, &data, &hasbits, &farr,
sizeof(upb_strview), card, false);
again:
if (card == CARD_r) {
dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_strview));
}
size = (uint8_t)ptr[tagbytes];
ptr += tagbytes + 1;
dst->size = size;
if (UPB_UNLIKELY(size == 0)) {
goto done;
} else if (UPB_UNLIKELY(size > 127)) {
goto longstr;
}
buf = d->arena.head.ptr;
arena_has = _upb_arenahas(&d->arena);
common_has = UPB_MIN(arena_has, (d->end - ptr) + 16);
if (UPB_LIKELY(size <= 15 - tagbytes)) {
if (arena_has < 16) goto longstr;
memcpy(buf, ptr - tagbytes - 1, 16);
dst->data = buf + tagbytes + 1;
d->arena.head.ptr += 16;
} else if (UPB_LIKELY(size <= 32)) {
if (UPB_UNLIKELY(common_has < 32)) goto longstr;
fastdecode_docopy(d, ptr, size, 32, buf, dst);
} else if (UPB_LIKELY(size <= 64 && common_has >= 64)) {
if (UPB_UNLIKELY(common_has < 64)) goto longstr;
fastdecode_docopy(d, ptr, size, 64, buf, dst);
} else {
if (UPB_UNLIKELY(common_has < 128)) goto longstr;
fastdecode_docopy(d, ptr, size, 128, buf, dst);
}
ptr += size;
done:
if (card == CARD_r) {
fastdecode_nextret ret = fastdecode_nextrepeated(
d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview));
switch (ret.next) {
case FD_NEXT_SAMEFIELD:
dst = ret.dst;
goto again;
case FD_NEXT_OTHERFIELD:
return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag);
case FD_NEXT_ATLIMIT:
return ptr;
}
}
return fastdecode_dispatch(d, ptr, msg, table, hasbits);
longstr:
ptr--;
return fastdecode_longstring(d, ptr, msg, table, hasbits, dst);
} }
UPB_FORCEINLINE UPB_FORCEINLINE
static const char *fastdecode_string(UPB_PARSE_PARAMS, int tagbytes, static const char *fastdecode_string(UPB_PARSE_PARAMS, int tagbytes,
upb_card card, fastdecode_copystr_func *func) { upb_card card,
_upb_field_parser *copyfunc) {
upb_strview *dst; upb_strview *dst;
int64_t len; fastdecode_arr farr;
int64_t size;
if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) {
RETURN_GENERIC("string field tag mismatch\n"); RETURN_GENERIC("string field tag mismatch\n");
} }
dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits,
sizeof(upb_strview), card);
if (UPB_UNLIKELY(!d->alias)) { if (UPB_UNLIKELY(!d->alias)) {
len = (uint8_t)ptr[tagbytes]; return copyfunc(UPB_PARSE_ARGS);
if (UPB_UNLIKELY(len > 15 - tagbytes || !_upb_arenahas(&d->arena, 16))) { }
return func(d, ptr + tagbytes, msg, table, hasbits, dst);
} dst = fastdecode_getfield_ofs(d, ptr, msg, &data, &hasbits, &farr,
char *data = d->arena.head.ptr; sizeof(upb_strview), card, false);
d->arena.head.ptr += 16;
UPB_UNPOISON_MEMORY_REGION(data, 16); again:
memcpy(data, ptr, 16); if (card == CARD_r) {
UPB_ASSERT(tagbytes + 1 + len <= 16); dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_strview));
ptr += tagbytes + 1; }
dst->data = data + tagbytes + 1;
dst->size = len; size = (int8_t)ptr[tagbytes];
UPB_POISON_MEMORY_REGION(data, 1);
UPB_POISON_MEMORY_REGION(data + 1 + len, 16 - len - 1);
return fastdecode_dispatch(d, ptr + len, msg, table, hasbits);
}
len = (int8_t)ptr[tagbytes];
ptr += tagbytes + 1; ptr += tagbytes + 1;
dst->data = ptr; dst->data = ptr;
dst->size = len; dst->size = size;
if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, len, d->limit_ptr))) {
dst->size = 0; if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->end))) {
ptr -= tagbytes + 1; return fastdecode_longstring(d, ptr, msg, table, hasbits, dst);
RETURN_GENERIC("string field len >1 byte\n");
} }
return fastdecode_dispatch(d, ptr + len, msg, table, hasbits);
}
UPB_NOINLINE ptr += size;
static const char *upb_copystr_1bt(upb_decstate *d, const char *ptr,
upb_msg *msg, const upb_msglayout *table,
uint64_t hasbits, upb_strview *dst) {
return fastdecode_copystring(d, ptr, msg, table, hasbits, dst, 1);
}
UPB_NOINLINE if (card == CARD_r) {
static const char *upb_copystr_2bt(upb_decstate *d, const char *ptr, upb_msg *msg, fastdecode_nextret ret = fastdecode_nextrepeated(
const upb_msglayout *table, uint64_t hasbits, d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview));
upb_strview *dst) { switch (ret.next) {
return fastdecode_copystring(d, ptr, msg, table, hasbits, dst, 2); case FD_NEXT_SAMEFIELD:
} dst = ret.dst;
goto again;
case FD_NEXT_OTHERFIELD:
return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag);
case FD_NEXT_ATLIMIT:
return ptr;
}
}
const char *upb_pss_1bt(UPB_PARSE_PARAMS) { return fastdecode_dispatch(d, ptr, msg, table, hasbits);
return fastdecode_string(UPB_PARSE_ARGS, 1, CARD_s, &upb_copystr_1bt);
} }
const char *upb_pos_1bt(UPB_PARSE_PARAMS) { /* Generate all combinations:
return fastdecode_string(UPB_PARSE_ARGS, 1, CARD_o, &upb_copystr_1bt); * {p,c} x {s,o,r} x {1bt,2bt} */
}
#define F(card, tagbytes) \
UPB_NOINLINE \
const char *upb_c##card##s_##tagbytes##bt(UPB_PARSE_PARAMS) { \
return fastdecode_copystring(UPB_PARSE_ARGS, tagbytes, CARD_##card); \
} \
const char *upb_p##card##s_##tagbytes##bt(UPB_PARSE_PARAMS) { \
return fastdecode_string(UPB_PARSE_ARGS, tagbytes, CARD_##card, \
&upb_c##card##s_##tagbytes##bt); \
}
const char *upb_pss_2bt(UPB_PARSE_PARAMS) { #define TAGBYTES(card) \
return fastdecode_string(UPB_PARSE_ARGS, 2, CARD_s, &upb_copystr_2bt); F(card, 1) \
} F(card, 2)
const char *upb_pos_2bt(UPB_PARSE_PARAMS) { TAGBYTES(s)
return fastdecode_string(UPB_PARSE_ARGS, 2, CARD_o, &upb_copystr_2bt); TAGBYTES(o)
} TAGBYTES(r)
#undef F
#undef TAGBYTES
/* message fields *************************************************************/ /* message fields *************************************************************/
UPB_FORCEINLINE UPB_FORCEINLINE
static bool fastdecode_boundscheck2(const char *ptr, size_t len, static bool fastdecode_boundscheck2(const char *ptr, size_t len,
const char *end) { const char *end) {
uintptr_t uptr = (uintptr_t)ptr; uintptr_t uptr = (uintptr_t)ptr;
uintptr_t uend = (uintptr_t)end; uintptr_t uend = (uintptr_t)end;
uintptr_t res = uptr + len; uintptr_t res = uptr + len;
return res < uptr || res > uend; return res < uptr || res > uend;
} }
UPB_INLINE
upb_msg *decode_newmsg_ceil(upb_decstate *d, const upb_msglayout *l,
int msg_ceil_bytes) {
size_t size = l->size + sizeof(upb_msg_internal);
char *msg_data;
if (UPB_LIKELY(msg_ceil_bytes > 0 &&
_upb_arenahas(&d->arena) >= msg_ceil_bytes)) {
UPB_ASSERT(size <= (size_t)msg_ceil_bytes);
msg_data = d->arena.head.ptr;
d->arena.head.ptr += size;
UPB_UNPOISON_MEMORY_REGION(msg_data, msg_ceil_bytes);
memset(msg_data, 0, msg_ceil_bytes);
UPB_POISON_MEMORY_REGION(msg_data + size, msg_ceil_bytes - size);
} else {
msg_data = (char*)upb_arena_malloc(&d->arena, size);
memset(msg_data, 0, size);
}
return msg_data + sizeof(upb_msg_internal);
}
UPB_FORCEINLINE UPB_FORCEINLINE
static const char *fastdecode_submsg(UPB_PARSE_PARAMS, int tagbytes, static const char *fastdecode_submsg(UPB_PARSE_PARAMS, int tagbytes,
int msg_ceil_bytes, upb_card card) { int msg_ceil_bytes, upb_card card) {
@ -384,12 +672,12 @@ static const char *fastdecode_submsg(UPB_PARSE_PARAMS, int tagbytes,
if (--d->depth == 0) return fastdecode_err(d); if (--d->depth == 0) return fastdecode_err(d);
upb_msg **submsg; upb_msg **submsg;
upb_array *arr;
void *end;
uint32_t submsg_idx = data; uint32_t submsg_idx = data;
submsg_idx >>= 16; submsg_idx >>= 16;
const upb_msglayout *subl = table->submsgs[submsg_idx]; const upb_msglayout *subl = table->submsgs[submsg_idx];
submsg = fastdecode_getfield_ofs(d, ptr, msg, &data, &hasbits, &arr, &end, fastdecode_arr farr;
submsg = fastdecode_getfield_ofs(d, ptr, msg, &data, &hasbits, &farr,
sizeof(upb_msg *), card, true); sizeof(upb_msg *), card, true);
if (card == CARD_s) { if (card == CARD_s) {
@ -399,18 +687,7 @@ static const char *fastdecode_submsg(UPB_PARSE_PARAMS, int tagbytes,
again: again:
if (card == CARD_r) { if (card == CARD_r) {
if (UPB_UNLIKELY(submsg == end)) { submsg = fastdecode_resizearr(d, submsg, &farr, sizeof(upb_msg*));
size_t old_size = arr->size;
size_t old_bytes = old_size * sizeof(upb_msg*);
size_t new_size = old_size * 2;
size_t new_bytes = new_size * sizeof(upb_msg*);
char *old_ptr = _upb_array_ptr(arr);
char *new_ptr = upb_arena_realloc(&d->arena, old_ptr, old_bytes, new_bytes);
arr->size = new_size;
arr->data = _upb_array_tagptr(new_ptr, 3);
submsg = (void*)(new_ptr + (old_size * sizeof(upb_msg*)));
end = (void*)(new_ptr + (new_size * sizeof(upb_msg*)));
}
} }
upb_msg* child = *submsg; upb_msg* child = *submsg;
@ -420,34 +697,29 @@ again:
} }
ptr += tagbytes + 1; ptr += tagbytes + 1;
int64_t len = (int8_t)ptr[-1]; int len = (int8_t)ptr[-1];
if (fastdecode_boundscheck2(ptr, len, d->limit_ptr)) { if (fastdecode_boundscheck2(ptr, len, d->limit_ptr)) {
// Slow case: Sub-message is >=128 bytes and/or exceeds the current buffer.
// If it exceeds the buffer limit, limit/limit_ptr will change during
// sub-message parsing, so we need to preserve delta, not limit.
if (UPB_UNLIKELY(len & 0x80)) { if (UPB_UNLIKELY(len & 0x80)) {
len &= 0xff; // Size varint >1 byte (length >= 128).
int i; ptr = fastdecode_longsize(ptr, &len);
for (i = 0; i < 3; i++) { if (!ptr) {
ptr++; // Corrupt wire format: size exceeded INT_MAX.
size_t byte = (uint8_t)ptr[-1]; return fastdecode_err(d);
len += (byte - 1) << (7 + 7 * i);
if (UPB_LIKELY((byte & 0x80) == 0)) goto done;
} }
ptr++;
size_t byte = (uint8_t)ptr[-1];
// len is limited by 2gb not 4gb, hence 8 and not 16 as normally expected
// for a 32 bit varint.
if (UPB_UNLIKELY(byte >= 8)) return fastdecode_err(d);
len += (byte - 1) << 28;
} }
done:
if (ptr - d->end + (int)len > d->limit) { if (ptr - d->end + (int)len > d->limit) {
// Corrupt wire format: invalid limit.
return fastdecode_err(d); return fastdecode_err(d);
} }
int delta = decode_pushlimit(d, ptr, len); int delta = decode_pushlimit(d, ptr, len);
ptr = fastdecode_dispatch(d, ptr, child, subl, 0); ptr = fastdecode_dispatch(d, ptr, child, subl, 0);
decode_poplimit(d, delta); decode_poplimit(d, delta);
} else { } else {
UPB_ASSERT(d->limit_ptr - ptr >= len); // Fast case: Sub-message is <128 bytes and fits in the current buffer.
UPB_ASSERT(d->limit_ptr == d->end + UPB_MIN(0, d->limit)); // This means we can preserve limit/limit_ptr verbatim.
const char *saved_limit_ptr = d->limit_ptr; const char *saved_limit_ptr = d->limit_ptr;
int saved_limit = d->limit; int saved_limit = d->limit;
d->limit_ptr = ptr + len; d->limit_ptr = ptr + len;
@ -463,21 +735,18 @@ again:
} }
if (card == CARD_r) { if (card == CARD_r) {
submsg++; fastdecode_nextret ret = fastdecode_nextrepeated(
if (UPB_LIKELY(!decode_isdone(d, &ptr))) { d, submsg, &ptr, &farr, data, tagbytes, sizeof(upb_msg *));
uint32_t tag = fastdecode_loadtag(ptr); switch (ret.next) {
if (tagbytes == 1) { case FD_NEXT_SAMEFIELD:
if ((uint8_t)tag == (uint8_t)data) goto again; submsg = ret.dst;
} else { goto again;
if ((uint16_t)tag == (uint16_t)data) goto again; case FD_NEXT_OTHERFIELD:
} d->depth++;
arr->len = submsg - (upb_msg**)_upb_array_ptr(arr); return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag);
d->depth++; case FD_NEXT_ATLIMIT:
return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, tag); d->depth++;
} else { return ptr;
arr->len = submsg - (upb_msg**)_upb_array_ptr(arr);
d->depth++;
return ptr;
} }
} }

@ -21,6 +21,8 @@ const char *fastdecode_generic(struct upb_decstate *d, const char *ptr,
struct upb_decstate *d, const char *ptr, upb_msg *msg, \ struct upb_decstate *d, const char *ptr, upb_msg *msg, \
const upb_msglayout *table, uint64_t hasbits, uint64_t data const upb_msglayout *table, uint64_t hasbits, uint64_t data
/* primitive fields ***********************************************************/
#define F(card, type, valbytes, tagbytes) \ #define F(card, type, valbytes, tagbytes) \
const char *upb_p##card##type##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS); const char *upb_p##card##type##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS);
@ -29,7 +31,9 @@ const char *fastdecode_generic(struct upb_decstate *d, const char *ptr,
F(card, v, 4, tagbytes) \ F(card, v, 4, tagbytes) \
F(card, v, 8, tagbytes) \ F(card, v, 8, tagbytes) \
F(card, z, 4, tagbytes) \ F(card, z, 4, tagbytes) \
F(card, z, 8, tagbytes) F(card, z, 8, tagbytes) \
F(card, f, 4, tagbytes) \
F(card, f, 8, tagbytes)
#define TAGBYTES(card) \ #define TAGBYTES(card) \
TYPES(card, 1) \ TYPES(card, 1) \
@ -37,17 +41,31 @@ const char *fastdecode_generic(struct upb_decstate *d, const char *ptr,
TAGBYTES(s) TAGBYTES(s)
TAGBYTES(o) TAGBYTES(o)
/* TAGBYTES(r) */ TAGBYTES(r)
const char *upb_pss_1bt(UPB_PARSE_PARAMS);
const char *upb_pss_2bt(UPB_PARSE_PARAMS);
const char *upb_pos_1bt(UPB_PARSE_PARAMS);
const char *upb_pos_2bt(UPB_PARSE_PARAMS);
#undef F #undef F
#undef TYPES #undef TYPES
#undef TAGBYTES #undef TAGBYTES
/* string fields **************************************************************/
#define F(card, tagbytes) \
const char *upb_p##card##s_##tagbytes##bt(UPB_PARSE_PARAMS); \
const char *upb_c##card##s_##tagbytes##bt(UPB_PARSE_PARAMS);
#define TAGBYTES(card) \
F(card, 1) \
F(card, 2)
TAGBYTES(s)
TAGBYTES(o)
TAGBYTES(r)
#undef F
#undef TAGBYTES
/* sub-message fields *********************************************************/
#define F(card, tagbytes, size_ceil, ceil_arg) \ #define F(card, tagbytes, size_ceil, ceil_arg) \
const char *upb_p##card##m_##tagbytes##bt_max##size_ceil##b(UPB_PARSE_PARAMS); const char *upb_p##card##m_##tagbytes##bt_max##size_ceil##b(UPB_PARSE_PARAMS);

@ -129,7 +129,7 @@ static bool upb_arena_allocblock(upb_arena *a, size_t size) {
void *_upb_arena_slowmalloc(upb_arena *a, size_t size) { void *_upb_arena_slowmalloc(upb_arena *a, size_t size) {
if (!upb_arena_allocblock(a, size)) return NULL; /* Out of memory. */ if (!upb_arena_allocblock(a, size)) return NULL; /* Out of memory. */
UPB_ASSERT(_upb_arenahas(a, size)); UPB_ASSERT(_upb_arenahas(a) >= size);
return upb_arena_malloc(a, size); return upb_arena_malloc(a, size);
} }
@ -224,9 +224,9 @@ void upb_arena_free(upb_arena *a) {
bool upb_arena_addcleanup(upb_arena *a, void *ud, upb_cleanup_func *func) { bool upb_arena_addcleanup(upb_arena *a, void *ud, upb_cleanup_func *func) {
cleanup_ent *ent; cleanup_ent *ent;
if (!a->cleanups || !_upb_arenahas(a, sizeof(cleanup_ent))) { if (!a->cleanups || _upb_arenahas(a) < sizeof(cleanup_ent)) {
if (!upb_arena_allocblock(a, 128)) return false; /* Out of memory. */ if (!upb_arena_allocblock(a, 128)) return false; /* Out of memory. */
UPB_ASSERT(_upb_arenahas(a, sizeof(cleanup_ent))); UPB_ASSERT(_upb_arenahas(a) >= sizeof(cleanup_ent));
} }
a->head.end -= sizeof(cleanup_ent); a->head.end -= sizeof(cleanup_ent);

@ -161,9 +161,9 @@ void *_upb_arena_slowmalloc(upb_arena *a, size_t size);
UPB_INLINE upb_alloc *upb_arena_alloc(upb_arena *a) { return (upb_alloc*)a; } UPB_INLINE upb_alloc *upb_arena_alloc(upb_arena *a) { return (upb_alloc*)a; }
UPB_INLINE bool _upb_arenahas(upb_arena *a, size_t size) { UPB_INLINE size_t _upb_arenahas(upb_arena *a) {
_upb_arena_head *h = (_upb_arena_head*)a; _upb_arena_head *h = (_upb_arena_head*)a;
return (size_t)(h->end - h->ptr) >= size; return (size_t)(h->end - h->ptr);
} }
UPB_INLINE void *upb_arena_malloc(upb_arena *a, size_t size) { UPB_INLINE void *upb_arena_malloc(upb_arena *a, size_t size) {
@ -171,7 +171,7 @@ UPB_INLINE void *upb_arena_malloc(upb_arena *a, size_t size) {
void* ret; void* ret;
size = UPB_ALIGN_MALLOC(size); size = UPB_ALIGN_MALLOC(size);
if (UPB_UNLIKELY(!_upb_arenahas(a, size))) { if (UPB_UNLIKELY(_upb_arenahas(a) < size)) {
return _upb_arena_slowmalloc(a, size); return _upb_arena_slowmalloc(a, size);
} }
@ -182,7 +182,7 @@ UPB_INLINE void *upb_arena_malloc(upb_arena *a, size_t size) {
#if UPB_ASAN #if UPB_ASAN
{ {
size_t guard_size = 32; size_t guard_size = 32;
if (_upb_arenahas(a, guard_size)) { if (_upb_arenahas(a) >= guard_size) {
h->ptr += guard_size; h->ptr += guard_size;
} else { } else {
h->ptr = h->end; h->ptr = h->end;

@ -759,6 +759,16 @@ void TryFillTableEntry(const protobuf::Descriptor* message,
case protobuf::FieldDescriptor::TYPE_UINT64: case protobuf::FieldDescriptor::TYPE_UINT64:
type = "v8"; type = "v8";
break; break;
case protobuf::FieldDescriptor::TYPE_FIXED32:
case protobuf::FieldDescriptor::TYPE_SFIXED32:
case protobuf::FieldDescriptor::TYPE_FLOAT:
type = "f4";
break;
case protobuf::FieldDescriptor::TYPE_FIXED64:
case protobuf::FieldDescriptor::TYPE_SFIXED64:
case protobuf::FieldDescriptor::TYPE_DOUBLE:
type = "f8";
break;
case protobuf::FieldDescriptor::TYPE_SINT32: case protobuf::FieldDescriptor::TYPE_SINT32:
type = "z4"; type = "z4";
break; break;
@ -783,12 +793,11 @@ void TryFillTableEntry(const protobuf::Descriptor* message,
switch (field->label()) { switch (field->label()) {
case protobuf::FieldDescriptor::LABEL_REPEATED: case protobuf::FieldDescriptor::LABEL_REPEATED:
if (field->type() == protobuf::FieldDescriptor::TYPE_MESSAGE) { cardinality = "r";
cardinality = "r"; if (field->is_packed()) {
break;
} else {
return; // Not supported yet. return; // Not supported yet.
} }
break;
case protobuf::FieldDescriptor::LABEL_OPTIONAL: case protobuf::FieldDescriptor::LABEL_OPTIONAL:
case protobuf::FieldDescriptor::LABEL_REQUIRED: case protobuf::FieldDescriptor::LABEL_REQUIRED:
if (field->real_containing_oneof()) { if (field->real_containing_oneof()) {

Loading…
Cancel
Save