|
|
|
/*
|
|
|
|
* Copyright (c) 2009-2021, Google LLC
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions are met:
|
|
|
|
* * Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* * Neither the name of Google LLC nor the
|
|
|
|
* names of its contributors may be used to endorse or promote products
|
|
|
|
* derived from this software without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
|
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
|
|
* DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
|
|
|
|
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
|
|
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
|
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
|
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
|
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "upb/decode.h"
|
|
|
|
|
|
|
|
#include <setjmp.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
#include "upb/decode_internal.h"
|
|
|
|
#include "upb/upb.h"
|
|
|
|
#include "upb/upb_internal.h"
|
|
|
|
|
|
|
|
/* Must be last. */
|
|
|
|
#include "upb/port_def.inc"
|
|
|
|
|
|
|
|
/* Maps descriptor type -> elem_size_lg2. */
|
|
|
|
static const uint8_t desctype_to_elem_size_lg2[] = {
|
|
|
|
-1, /* invalid descriptor type */
|
|
|
|
3, /* DOUBLE */
|
|
|
|
2, /* FLOAT */
|
|
|
|
3, /* INT64 */
|
|
|
|
3, /* UINT64 */
|
|
|
|
2, /* INT32 */
|
|
|
|
3, /* FIXED64 */
|
|
|
|
2, /* FIXED32 */
|
|
|
|
0, /* BOOL */
|
|
|
|
UPB_SIZE(3, 4), /* STRING */
|
|
|
|
UPB_SIZE(2, 3), /* GROUP */
|
|
|
|
UPB_SIZE(2, 3), /* MESSAGE */
|
|
|
|
UPB_SIZE(3, 4), /* BYTES */
|
|
|
|
2, /* UINT32 */
|
|
|
|
2, /* ENUM */
|
|
|
|
2, /* SFIXED32 */
|
|
|
|
3, /* SFIXED64 */
|
|
|
|
2, /* SINT32 */
|
|
|
|
3, /* SINT64 */
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Maps descriptor type -> upb map size. */
|
|
|
|
static const uint8_t desctype_to_mapsize[] = {
|
|
|
|
-1, /* invalid descriptor type */
|
|
|
|
8, /* DOUBLE */
|
|
|
|
4, /* FLOAT */
|
|
|
|
8, /* INT64 */
|
|
|
|
8, /* UINT64 */
|
|
|
|
4, /* INT32 */
|
|
|
|
8, /* FIXED64 */
|
|
|
|
4, /* FIXED32 */
|
|
|
|
1, /* BOOL */
|
|
|
|
UPB_MAPTYPE_STRING, /* STRING */
|
|
|
|
sizeof(void *), /* GROUP */
|
|
|
|
sizeof(void *), /* MESSAGE */
|
|
|
|
UPB_MAPTYPE_STRING, /* BYTES */
|
|
|
|
4, /* UINT32 */
|
|
|
|
4, /* ENUM */
|
|
|
|
4, /* SFIXED32 */
|
|
|
|
8, /* SFIXED64 */
|
|
|
|
4, /* SINT32 */
|
|
|
|
8, /* SINT64 */
|
|
|
|
};
|
|
|
|
|
|
|
|
static const unsigned FIXED32_OK_MASK = (1 << UPB_DTYPE_FLOAT) |
|
|
|
|
(1 << UPB_DTYPE_FIXED32) |
|
|
|
|
(1 << UPB_DTYPE_SFIXED32);
|
|
|
|
|
|
|
|
static const unsigned FIXED64_OK_MASK = (1 << UPB_DTYPE_DOUBLE) |
|
|
|
|
(1 << UPB_DTYPE_FIXED64) |
|
|
|
|
(1 << UPB_DTYPE_SFIXED64);
|
|
|
|
|
|
|
|
/* Three fake field types for MessageSet. */
|
|
|
|
#define TYPE_MSGSET_ITEM 19
|
|
|
|
#define TYPE_MSGSET_TYPE_ID 20
|
|
|
|
#define TYPE_COUNT 20
|
|
|
|
|
|
|
|
/* Op: an action to be performed for a wire-type/field-type combination. */
|
|
|
|
#define OP_UNKNOWN -1 /* Unknown field. */
|
|
|
|
#define OP_MSGSET_ITEM -2
|
|
|
|
#define OP_MSGSET_TYPEID -3
|
|
|
|
#define OP_SCALAR_LG2(n) (n) /* n in [0, 2, 3] => op in [0, 2, 3] */
|
|
|
|
#define OP_ENUM 1
|
|
|
|
#define OP_STRING 4
|
|
|
|
#define OP_BYTES 5
|
|
|
|
#define OP_SUBMSG 6
|
|
|
|
/* Scalar fields use only ops above. Repeated fields can use any op. */
|
|
|
|
#define OP_FIXPCK_LG2(n) (n + 5) /* n in [2, 3] => op in [7, 8] */
|
|
|
|
#define OP_VARPCK_LG2(n) (n + 9) /* n in [0, 2, 3] => op in [9, 11, 12] */
|
|
|
|
#define OP_PACKED_ENUM 13
|
|
|
|
|
|
|
|
static const int8_t varint_ops[] = {
|
|
|
|
OP_UNKNOWN, /* field not found */
|
|
|
|
OP_UNKNOWN, /* DOUBLE */
|
|
|
|
OP_UNKNOWN, /* FLOAT */
|
|
|
|
OP_SCALAR_LG2(3), /* INT64 */
|
|
|
|
OP_SCALAR_LG2(3), /* UINT64 */
|
|
|
|
OP_SCALAR_LG2(2), /* INT32 */
|
|
|
|
OP_UNKNOWN, /* FIXED64 */
|
|
|
|
OP_UNKNOWN, /* FIXED32 */
|
|
|
|
OP_SCALAR_LG2(0), /* BOOL */
|
|
|
|
OP_UNKNOWN, /* STRING */
|
|
|
|
OP_UNKNOWN, /* GROUP */
|
|
|
|
OP_UNKNOWN, /* MESSAGE */
|
|
|
|
OP_UNKNOWN, /* BYTES */
|
|
|
|
OP_SCALAR_LG2(2), /* UINT32 */
|
|
|
|
OP_ENUM, /* ENUM */
|
|
|
|
OP_UNKNOWN, /* SFIXED32 */
|
|
|
|
OP_UNKNOWN, /* SFIXED64 */
|
|
|
|
OP_SCALAR_LG2(2), /* SINT32 */
|
|
|
|
OP_SCALAR_LG2(3), /* SINT64 */
|
|
|
|
OP_UNKNOWN, /* MSGSET_ITEM */
|
|
|
|
OP_MSGSET_TYPEID, /* MSGSET TYPEID */
|
|
|
|
};
|
|
|
|
|
|
|
|
static const int8_t delim_ops[] = {
|
|
|
|
/* For non-repeated field type. */
|
|
|
|
OP_UNKNOWN, /* field not found */
|
|
|
|
OP_UNKNOWN, /* DOUBLE */
|
|
|
|
OP_UNKNOWN, /* FLOAT */
|
|
|
|
OP_UNKNOWN, /* INT64 */
|
|
|
|
OP_UNKNOWN, /* UINT64 */
|
|
|
|
OP_UNKNOWN, /* INT32 */
|
|
|
|
OP_UNKNOWN, /* FIXED64 */
|
|
|
|
OP_UNKNOWN, /* FIXED32 */
|
|
|
|
OP_UNKNOWN, /* BOOL */
|
|
|
|
OP_STRING, /* STRING */
|
|
|
|
OP_UNKNOWN, /* GROUP */
|
|
|
|
OP_SUBMSG, /* MESSAGE */
|
|
|
|
OP_BYTES, /* BYTES */
|
|
|
|
OP_UNKNOWN, /* UINT32 */
|
|
|
|
OP_UNKNOWN, /* ENUM */
|
|
|
|
OP_UNKNOWN, /* SFIXED32 */
|
|
|
|
OP_UNKNOWN, /* SFIXED64 */
|
|
|
|
OP_UNKNOWN, /* SINT32 */
|
|
|
|
OP_UNKNOWN, /* SINT64 */
|
|
|
|
OP_UNKNOWN, /* MSGSET_ITEM */
|
|
|
|
OP_UNKNOWN, /* MSGSET TYPEID */
|
|
|
|
/* For repeated field type. */
|
|
|
|
OP_FIXPCK_LG2(3), /* REPEATED DOUBLE */
|
|
|
|
OP_FIXPCK_LG2(2), /* REPEATED FLOAT */
|
|
|
|
OP_VARPCK_LG2(3), /* REPEATED INT64 */
|
|
|
|
OP_VARPCK_LG2(3), /* REPEATED UINT64 */
|
|
|
|
OP_VARPCK_LG2(2), /* REPEATED INT32 */
|
|
|
|
OP_FIXPCK_LG2(3), /* REPEATED FIXED64 */
|
|
|
|
OP_FIXPCK_LG2(2), /* REPEATED FIXED32 */
|
|
|
|
OP_VARPCK_LG2(0), /* REPEATED BOOL */
|
|
|
|
OP_STRING, /* REPEATED STRING */
|
|
|
|
OP_SUBMSG, /* REPEATED GROUP */
|
|
|
|
OP_SUBMSG, /* REPEATED MESSAGE */
|
|
|
|
OP_BYTES, /* REPEATED BYTES */
|
|
|
|
OP_VARPCK_LG2(2), /* REPEATED UINT32 */
|
|
|
|
OP_PACKED_ENUM, /* REPEATED ENUM */
|
|
|
|
OP_FIXPCK_LG2(2), /* REPEATED SFIXED32 */
|
|
|
|
OP_FIXPCK_LG2(3), /* REPEATED SFIXED64 */
|
|
|
|
OP_VARPCK_LG2(2), /* REPEATED SINT32 */
|
|
|
|
OP_VARPCK_LG2(3), /* REPEATED SINT64 */
|
|
|
|
/* Omitting MSGSET_*, because we never emit a repeated msgset type */
|
|
|
|
};
|
|
|
|
|
|
|
|
typedef union {
|
|
|
|
bool bool_val;
|
|
|
|
uint32_t uint32_val;
|
|
|
|
uint64_t uint64_val;
|
|
|
|
uint32_t size;
|
|
|
|
} wireval;
|
|
|
|
|
|
|
|
static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
|
|
|
|
const upb_msglayout *layout);
|
|
|
|
|
|
|
|
UPB_NORETURN static void *decode_err(upb_decstate *d, upb_DecodeStatus status) {
|
|
|
|
assert(status != kUpb_DecodeStatus_Ok);
|
|
|
|
UPB_LONGJMP(d->err, status);
|
|
|
|
}
|
|
|
|
|
|
|
|
const char *fastdecode_err(upb_decstate *d, int status) {
|
|
|
|
assert(status != kUpb_DecodeStatus_Ok);
|
|
|
|
UPB_LONGJMP(d->err, status);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
static void decode_verifyutf8(upb_decstate *d, const char *buf, int len) {
|
|
|
|
if (!decode_verifyutf8_inl(buf, len)) decode_err(d, kUpb_DecodeStatus_BadUtf8);
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool decode_reserve(upb_decstate *d, upb_array *arr, size_t elem) {
|
|
|
|
bool need_realloc = arr->size - arr->len < elem;
|
|
|
|
if (need_realloc && !_upb_array_realloc(arr, arr->len + elem, &d->arena)) {
|
|
|
|
decode_err(d, kUpb_DecodeStatus_OutOfMemory);
|
|
|
|
}
|
|
|
|
return need_realloc;
|
|
|
|
}
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
const char *ptr;
|
|
|
|
uint64_t val;
|
|
|
|
} decode_vret;
|
|
|
|
|
|
|
|
UPB_NOINLINE
|
|
|
|
static decode_vret decode_longvarint64(const char *ptr, uint64_t val) {
|
|
|
|
decode_vret ret = {NULL, 0};
|
|
|
|
uint64_t byte;
|
|
|
|
int i;
|
|
|
|
for (i = 1; i < 10; i++) {
|
|
|
|
byte = (uint8_t)ptr[i];
|
|
|
|
val += (byte - 1) << (i * 7);
|
|
|
|
if (!(byte & 0x80)) {
|
|
|
|
ret.ptr = ptr + i + 1;
|
|
|
|
ret.val = val;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
UPB_FORCEINLINE
|
|
|
|
static const char *decode_varint64(upb_decstate *d, const char *ptr,
|
|
|
|
uint64_t *val) {
|
|
|
|
uint64_t byte = (uint8_t)*ptr;
|
|
|
|
if (UPB_LIKELY((byte & 0x80) == 0)) {
|
|
|
|
*val = byte;
|
|
|
|
return ptr + 1;
|
|
|
|
} else {
|
|
|
|
decode_vret res = decode_longvarint64(ptr, byte);
|
|
|
|
if (!res.ptr) return decode_err(d, kUpb_DecodeStatus_Malformed);
|
|
|
|
*val = res.val;
|
|
|
|
return res.ptr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
UPB_FORCEINLINE
|
|
|
|
static const char *decode_tag(upb_decstate *d, const char *ptr, uint32_t *val) {
|
|
|
|
uint64_t byte = (uint8_t)*ptr;
|
|
|
|
if (UPB_LIKELY((byte & 0x80) == 0)) {
|
|
|
|
*val = byte;
|
|
|
|
return ptr + 1;
|
|
|
|
} else {
|
|
|
|
const char *start = ptr;
|
|
|
|
decode_vret res = decode_longvarint64(ptr, byte);
|
|
|
|
if (!res.ptr || res.ptr - start > 5 || res.val > UINT32_MAX) {
|
|
|
|
return decode_err(d, kUpb_DecodeStatus_Malformed);
|
|
|
|
}
|
|
|
|
*val = res.val;
|
|
|
|
return res.ptr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void decode_munge_int32(wireval *val) {
|
|
|
|
if (!_upb_isle()) {
|
|
|
|
/* The next stage will memcpy(dst, &val, 4) */
|
|
|
|
val->uint32_val = val->uint64_val;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void decode_munge(int type, wireval *val) {
|
|
|
|
switch (type) {
|
|
|
|
case UPB_DESCRIPTOR_TYPE_BOOL:
|
|
|
|
val->bool_val = val->uint64_val != 0;
|
|
|
|
break;
|
|
|
|
case UPB_DESCRIPTOR_TYPE_SINT32: {
|
|
|
|
uint32_t n = val->uint64_val;
|
|
|
|
val->uint32_val = (n >> 1) ^ -(int32_t)(n & 1);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case UPB_DESCRIPTOR_TYPE_SINT64: {
|
|
|
|
uint64_t n = val->uint64_val;
|
|
|
|
val->uint64_val = (n >> 1) ^ -(int64_t)(n & 1);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case UPB_DESCRIPTOR_TYPE_INT32:
|
|
|
|
case UPB_DESCRIPTOR_TYPE_UINT32:
|
|
|
|
case UPB_DESCRIPTOR_TYPE_ENUM:
|
|
|
|
decode_munge_int32(val);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static upb_msg *decode_newsubmsg(upb_decstate *d, const upb_msglayout_sub *subs,
|
|
|
|
const upb_msglayout_field *field) {
|
|
|
|
const upb_msglayout *subl = subs[field->submsg_index].submsg;
|
|
|
|
return _upb_msg_new_inl(subl, &d->arena);
|
|
|
|
}
|
|
|
|
|
|
|
|
UPB_NOINLINE
|
|
|
|
const char *decode_isdonefallback(upb_decstate *d, const char *ptr,
|
|
|
|
int overrun) {
|
|
|
|
int status;
|
|
|
|
ptr = decode_isdonefallback_inl(d, ptr, overrun, &status);
|
|
|
|
if (ptr == NULL) {
|
|
|
|
return decode_err(d, status);
|
|
|
|
}
|
|
|
|
return ptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char *decode_readstr(upb_decstate *d, const char *ptr, int size,
|
|
|
|
upb_strview *str) {
|
|
|
|
if (d->options & kUpb_DecodeOption_AliasString) {
|
|
|
|
str->data = ptr;
|
|
|
|
} else {
|
|
|
|
char *data = upb_arena_malloc(&d->arena, size);
|
|
|
|
if (!data) return decode_err(d, kUpb_DecodeStatus_OutOfMemory);
|
|
|
|
memcpy(data, ptr, size);
|
|
|
|
str->data = data;
|
|
|
|
}
|
|
|
|
str->size = size;
|
|
|
|
return ptr + size;
|
|
|
|
}
|
|
|
|
|
|
|
|
UPB_FORCEINLINE
|
|
|
|
static const char *decode_tosubmsg2(upb_decstate *d, const char *ptr,
|
|
|
|
upb_msg *submsg, const upb_msglayout *subl,
|
|
|
|
int size) {
|
|
|
|
int saved_delta = decode_pushlimit(d, ptr, size);
|
|
|
|
if (--d->depth < 0) return decode_err(d, kUpb_DecodeStatus_MaxDepthExceeded);
|
|
|
|
ptr = decode_msg(d, ptr, submsg, subl);
|
|
|
|
if (d->end_group != DECODE_NOGROUP) return decode_err(d, kUpb_DecodeStatus_Malformed);
|
|
|
|
decode_poplimit(d, ptr, saved_delta);
|
|
|
|
d->depth++;
|
|
|
|
return ptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
UPB_FORCEINLINE
|
|
|
|
static const char *decode_tosubmsg(upb_decstate *d, const char *ptr,
|
|
|
|
upb_msg *submsg,
|
|
|
|
const upb_msglayout_sub *subs,
|
|
|
|
const upb_msglayout_field *field, int size) {
|
|
|
|
return decode_tosubmsg2(d, ptr, submsg, subs[field->submsg_index].submsg,
|
|
|
|
size);
|
|
|
|
}
|
|
|
|
|
|
|
|
UPB_FORCEINLINE
|
|
|
|
static const char *decode_group(upb_decstate *d, const char *ptr,
|
|
|
|
upb_msg *submsg, const upb_msglayout *subl,
|
|
|
|
uint32_t number) {
|
|
|
|
if (--d->depth < 0) return decode_err(d, kUpb_DecodeStatus_MaxDepthExceeded);
|
|
|
|
if (decode_isdone(d, &ptr)) {
|
|
|
|
return decode_err(d, kUpb_DecodeStatus_Malformed);
|
|
|
|
}
|
|
|
|
ptr = decode_msg(d, ptr, submsg, subl);
|
|
|
|
if (d->end_group != number) return decode_err(d, kUpb_DecodeStatus_Malformed);
|
|
|
|
d->end_group = DECODE_NOGROUP;
|
|
|
|
d->depth++;
|
|
|
|
return ptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
UPB_FORCEINLINE
|
|
|
|
static const char *decode_togroup(upb_decstate *d, const char *ptr,
|
Optimized decoder and paved the way for parsing extensions.
The primary motivation for this change is to avoid referring to the
`upb_msglayout` object when we are trying to fetch the `upb_msglayout`
object for a sub-message. This will help pave the way for parsing
extensions. We also implement several optimizations so that we can
make this change without regressing performance.
Normally we compute the layout for a sub-message field like so:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *layout,
const upb_msglayout_field *field) {
return layout->submsgs[field->submsg_index]
}
```
The reason for this indirection is to avoid storing a pointer directly
in `upb_msglayout_field`, as this would double its size (from 12 to 24
bytes on 64-bit architectures) which is wasteful as this pointer is
only needed for message typed fields.
However `get_submsg_layout` as written above does not work for
extensions, as they will not have entries in the message's
`layout->submsgs` array by nature, and we want to avoid creating
an entire fake `upb_msglayout` for each such extension since that
would also be wasteful.
This change removes the dependency on `upb_msglayout` by passing down
the `submsgs` array instead:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *const *submsgs,
const upb_msglayout_field *field) {
return submsgs[field->submsg_index]
}
```
This will pave the way for parsing extensions, as we can more easily
create an alternative `submsgs` array for extension fields without
extra overhead or waste.
Along the way several optimizations presented themselves that allow
a nice increase in performance:
1. Passing the parsed `wireval` by address instead of by value ended
up avoiding an expensive and useless stack copy (this is on Clang,
which was used for all measurements).
2. When field numbers are densely packed, we can find a field by number
with a single indexed lookup instead of linear search. At codegen
time we can compute the maximum field number that will allow such
an indexed lookup.
3. For fields that do require linear search, we can start the linear
search at the location where we found the previous field, taking
advantage of the fact that field numbers are generally increasing.
4. When the hasbit index is less than 32 (the common case) we can use
a less expensive code sequence to set it.
5. We check for the hasbit case before the oneof case, as optional
fields are more common than oneof fields.
Benchmark results indicate a 20% improvement in parse speed with a
small code size increase:
```
name old time/op new time/op delta
ArenaOneAlloc 21.3ns ± 0% 21.5ns ± 0% +0.96% (p=0.000 n=12+12)
ArenaInitialBlockOneAlloc 6.32ns ± 0% 6.32ns ± 0% +0.03% (p=0.000 n=12+10)
LoadDescriptor_Upb 53.5µs ± 1% 51.5µs ± 2% -3.70% (p=0.000 n=12+12)
LoadAdsDescriptor_Upb 2.78ms ± 2% 2.68ms ± 0% -3.57% (p=0.000 n=12+12)
LoadDescriptor_Proto2 240µs ± 0% 240µs ± 0% +0.12% (p=0.001 n=12+12)
LoadAdsDescriptor_Proto2 12.8ms ± 0% 12.7ms ± 0% -1.15% (p=0.000 n=12+10)
Parse_Upb_FileDesc<UseArena,Copy> 13.2µs ± 2% 10.7µs ± 0% -18.49% (p=0.000 n=10+12)
Parse_Upb_FileDesc<UseArena,Alias> 11.3µs ± 0% 9.6µs ± 0% -15.11% (p=0.000 n=12+11)
Parse_Upb_FileDesc<InitBlock,Copy> 12.7µs ± 0% 10.3µs ± 0% -19.00% (p=0.000 n=10+12)
Parse_Upb_FileDesc<InitBlock,Alias> 10.9µs ± 0% 9.2µs ± 0% -15.82% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,NoArena,Copy> 29.4µs ± 0% 29.5µs ± 0% +0.61% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,UseArena,Copy> 20.7µs ± 2% 20.6µs ± 2% ~ (p=0.260 n=12+11)
Parse_Proto2<FileDesc,InitBlock,Copy> 16.7µs ± 1% 16.7µs ± 0% -0.25% (p=0.036 n=12+10)
Parse_Proto2<FileDescSV,InitBlock,Alias> 16.5µs ± 0% 16.5µs ± 0% +0.20% (p=0.016 n=12+11)
SerializeDescriptor_Proto2 5.30µs ± 1% 5.36µs ± 1% +1.09% (p=0.000 n=12+11)
SerializeDescriptor_Upb 12.9µs ± 0% 13.0µs ± 0% +0.90% (p=0.000 n=12+11)
FILE SIZE VM SIZE
-------------- --------------
+1.5% +176 +1.6% +176 upb/decode.c
+1.8% +176 +1.9% +176 decode_msg
+0.4% +64 +0.4% +64 upb/def.c
+1.4% +64 +1.4% +64 _upb_symtab_addfile
+1.2% +48 +1.4% +48 upb/reflection.c
+15% +32 +18% +32 upb_msg_set
+2.9% +16 +3.1% +16 upb_msg_mutable
-9.3% -288 [ = ] 0 [Unmapped]
[ = ] 0 +0.2% +288 TOTAL
```
4 years ago
|
|
|
upb_msg *submsg,
|
|
|
|
const upb_msglayout_sub *subs,
|
|
|
|
const upb_msglayout_field *field) {
|
|
|
|
const upb_msglayout *subl = subs[field->submsg_index].submsg;
|
|
|
|
return decode_group(d, ptr, submsg, subl, field->number);
|
|
|
|
}
|
|
|
|
|
|
|
|
static char *encode_varint32(uint32_t val, char *ptr) {
|
|
|
|
do {
|
|
|
|
uint8_t byte = val & 0x7fU;
|
|
|
|
val >>= 7;
|
|
|
|
if (val) byte |= 0x80U;
|
|
|
|
*(ptr++) = byte;
|
|
|
|
} while (val);
|
|
|
|
return ptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
UPB_NOINLINE
|
|
|
|
static bool decode_checkenum_slow(upb_decstate *d, const char *ptr, upb_msg *msg,
|
|
|
|
const upb_enumlayout *e,
|
|
|
|
const upb_msglayout_field *field, uint32_t v) {
|
|
|
|
// OPT: binary search long lists?
|
|
|
|
int n = e->value_count;
|
|
|
|
for (int i = 0; i < n; i++) {
|
|
|
|
if ((uint32_t)e->values[i] == v) return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Unrecognized enum goes into unknown fields.
|
|
|
|
// For packed fields the tag could be arbitrarily far in the past, so we
|
|
|
|
// just re-encode the tag here.
|
|
|
|
char buf[20];
|
|
|
|
char *end = buf;
|
|
|
|
uint32_t tag = ((uint32_t)field->number << 3) | UPB_WIRE_TYPE_VARINT;
|
|
|
|
end = encode_varint32(tag, end);
|
|
|
|
end = encode_varint32(v, end);
|
|
|
|
|
|
|
|
if (!_upb_msg_addunknown(msg, buf, end - buf, &d->arena)) {
|
|
|
|
decode_err(d, kUpb_DecodeStatus_OutOfMemory);
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
UPB_FORCEINLINE
|
|
|
|
static bool decode_checkenum(upb_decstate *d, const char *ptr, upb_msg *msg,
|
|
|
|
const upb_enumlayout *e,
|
|
|
|
const upb_msglayout_field *field, wireval *val) {
|
|
|
|
uint32_t v = val->uint32_val;
|
|
|
|
|
|
|
|
if (UPB_LIKELY(v < 64) && UPB_LIKELY(((1ULL << v) & e->mask))) return true;
|
|
|
|
|
|
|
|
return decode_checkenum_slow(d, ptr, msg, e, field, v);
|
|
|
|
}
|
|
|
|
|
|
|
|
UPB_NOINLINE
|
|
|
|
static const char *decode_enum_toarray(upb_decstate *d, const char *ptr,
|
|
|
|
upb_msg *msg, upb_array *arr,
|
|
|
|
const upb_msglayout_sub *subs,
|
|
|
|
const upb_msglayout_field *field,
|
|
|
|
wireval *val) {
|
|
|
|
const upb_enumlayout *e = subs[field->submsg_index].subenum;
|
|
|
|
if (!decode_checkenum(d, ptr, msg, e, field, val)) return ptr;
|
|
|
|
void *mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len * 4, void);
|
|
|
|
arr->len++;
|
|
|
|
memcpy(mem, val, 4);
|
|
|
|
return ptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
UPB_FORCEINLINE
|
|
|
|
static const char *decode_fixed_packed(upb_decstate *d, const char *ptr,
|
|
|
|
upb_array *arr, wireval *val,
|
|
|
|
const upb_msglayout_field *field,
|
|
|
|
int lg2) {
|
|
|
|
int mask = (1 << lg2) - 1;
|
|
|
|
size_t count = val->size >> lg2;
|
|
|
|
if ((val->size & mask) != 0) {
|
|
|
|
// Length isn't a round multiple of elem size.
|
|
|
|
return decode_err(d, kUpb_DecodeStatus_Malformed);
|
|
|
|
}
|
|
|
|
decode_reserve(d, arr, count);
|
|
|
|
void *mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
|
|
|
|
arr->len += count;
|
|
|
|
// Note: if/when the decoder supports multi-buffer input, we will need to
|
|
|
|
// handle buffer seams here.
|
|
|
|
if (_upb_isle()) {
|
|
|
|
memcpy(mem, ptr, val->size);
|
|
|
|
ptr += val->size;
|
|
|
|
} else {
|
|
|
|
const char *end = ptr + val->size;
|
|
|
|
char *dst = mem;
|
|
|
|
while (ptr < end) {
|
|
|
|
if (lg2 == 2) {
|
|
|
|
uint32_t val;
|
|
|
|
memcpy(&val, ptr, sizeof(val));
|
|
|
|
val = _upb_be_swap32(val);
|
|
|
|
memcpy(dst, &val, sizeof(val));
|
|
|
|
} else {
|
|
|
|
UPB_ASSERT(lg2 == 3);
|
|
|
|
uint64_t val;
|
|
|
|
memcpy(&val, ptr, sizeof(val));
|
|
|
|
val = _upb_be_swap64(val);
|
|
|
|
memcpy(dst, &val, sizeof(val));
|
|
|
|
}
|
|
|
|
ptr += 1 << lg2;
|
|
|
|
dst += 1 << lg2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
UPB_FORCEINLINE
|
|
|
|
static const char *decode_varint_packed(upb_decstate *d, const char *ptr,
|
|
|
|
upb_array *arr, wireval *val,
|
|
|
|
const upb_msglayout_field *field,
|
|
|
|
int lg2) {
|
|
|
|
int scale = 1 << lg2;
|
|
|
|
int saved_limit = decode_pushlimit(d, ptr, val->size);
|
|
|
|
char *out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
|
|
|
|
while (!decode_isdone(d, &ptr)) {
|
|
|
|
wireval elem;
|
|
|
|
ptr = decode_varint64(d, ptr, &elem.uint64_val);
|
|
|
|
decode_munge(field->descriptortype, &elem);
|
|
|
|
if (decode_reserve(d, arr, 1)) {
|
|
|
|
out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
|
|
|
|
}
|
|
|
|
arr->len++;
|
|
|
|
memcpy(out, &elem, scale);
|
|
|
|
out += scale;
|
|
|
|
}
|
|
|
|
decode_poplimit(d, ptr, saved_limit);
|
|
|
|
return ptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
UPB_NOINLINE
|
|
|
|
static const char *decode_enum_packed(upb_decstate *d, const char *ptr,
|
|
|
|
upb_msg *msg, upb_array *arr,
|
|
|
|
const upb_msglayout_sub *subs,
|
|
|
|
const upb_msglayout_field *field,
|
|
|
|
wireval *val) {
|
|
|
|
const upb_enumlayout *e = subs[field->submsg_index].subenum;
|
|
|
|
int saved_limit = decode_pushlimit(d, ptr, val->size);
|
|
|
|
char *out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len * 4, void);
|
|
|
|
while (!decode_isdone(d, &ptr)) {
|
|
|
|
wireval elem;
|
|
|
|
ptr = decode_varint64(d, ptr, &elem.uint64_val);
|
|
|
|
decode_munge_int32(&elem);
|
|
|
|
if (!decode_checkenum(d, ptr, msg, e, field, &elem)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (decode_reserve(d, arr, 1)) {
|
|
|
|
out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len * 4, void);
|
|
|
|
}
|
|
|
|
arr->len++;
|
|
|
|
memcpy(out, &elem, 4);
|
|
|
|
out += 4;
|
|
|
|
}
|
|
|
|
decode_poplimit(d, ptr, saved_limit);
|
|
|
|
return ptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char *decode_toarray(upb_decstate *d, const char *ptr,
|
Optimized decoder and paved the way for parsing extensions.
The primary motivation for this change is to avoid referring to the
`upb_msglayout` object when we are trying to fetch the `upb_msglayout`
object for a sub-message. This will help pave the way for parsing
extensions. We also implement several optimizations so that we can
make this change without regressing performance.
Normally we compute the layout for a sub-message field like so:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *layout,
const upb_msglayout_field *field) {
return layout->submsgs[field->submsg_index]
}
```
The reason for this indirection is to avoid storing a pointer directly
in `upb_msglayout_field`, as this would double its size (from 12 to 24
bytes on 64-bit architectures) which is wasteful as this pointer is
only needed for message typed fields.
However `get_submsg_layout` as written above does not work for
extensions, as they will not have entries in the message's
`layout->submsgs` array by nature, and we want to avoid creating
an entire fake `upb_msglayout` for each such extension since that
would also be wasteful.
This change removes the dependency on `upb_msglayout` by passing down
the `submsgs` array instead:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *const *submsgs,
const upb_msglayout_field *field) {
return submsgs[field->submsg_index]
}
```
This will pave the way for parsing extensions, as we can more easily
create an alternative `submsgs` array for extension fields without
extra overhead or waste.
Along the way several optimizations presented themselves that allow
a nice increase in performance:
1. Passing the parsed `wireval` by address instead of by value ended
up avoiding an expensive and useless stack copy (this is on Clang,
which was used for all measurements).
2. When field numbers are densely packed, we can find a field by number
with a single indexed lookup instead of linear search. At codegen
time we can compute the maximum field number that will allow such
an indexed lookup.
3. For fields that do require linear search, we can start the linear
search at the location where we found the previous field, taking
advantage of the fact that field numbers are generally increasing.
4. When the hasbit index is less than 32 (the common case) we can use
a less expensive code sequence to set it.
5. We check for the hasbit case before the oneof case, as optional
fields are more common than oneof fields.
Benchmark results indicate a 20% improvement in parse speed with a
small code size increase:
```
name old time/op new time/op delta
ArenaOneAlloc 21.3ns ± 0% 21.5ns ± 0% +0.96% (p=0.000 n=12+12)
ArenaInitialBlockOneAlloc 6.32ns ± 0% 6.32ns ± 0% +0.03% (p=0.000 n=12+10)
LoadDescriptor_Upb 53.5µs ± 1% 51.5µs ± 2% -3.70% (p=0.000 n=12+12)
LoadAdsDescriptor_Upb 2.78ms ± 2% 2.68ms ± 0% -3.57% (p=0.000 n=12+12)
LoadDescriptor_Proto2 240µs ± 0% 240µs ± 0% +0.12% (p=0.001 n=12+12)
LoadAdsDescriptor_Proto2 12.8ms ± 0% 12.7ms ± 0% -1.15% (p=0.000 n=12+10)
Parse_Upb_FileDesc<UseArena,Copy> 13.2µs ± 2% 10.7µs ± 0% -18.49% (p=0.000 n=10+12)
Parse_Upb_FileDesc<UseArena,Alias> 11.3µs ± 0% 9.6µs ± 0% -15.11% (p=0.000 n=12+11)
Parse_Upb_FileDesc<InitBlock,Copy> 12.7µs ± 0% 10.3µs ± 0% -19.00% (p=0.000 n=10+12)
Parse_Upb_FileDesc<InitBlock,Alias> 10.9µs ± 0% 9.2µs ± 0% -15.82% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,NoArena,Copy> 29.4µs ± 0% 29.5µs ± 0% +0.61% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,UseArena,Copy> 20.7µs ± 2% 20.6µs ± 2% ~ (p=0.260 n=12+11)
Parse_Proto2<FileDesc,InitBlock,Copy> 16.7µs ± 1% 16.7µs ± 0% -0.25% (p=0.036 n=12+10)
Parse_Proto2<FileDescSV,InitBlock,Alias> 16.5µs ± 0% 16.5µs ± 0% +0.20% (p=0.016 n=12+11)
SerializeDescriptor_Proto2 5.30µs ± 1% 5.36µs ± 1% +1.09% (p=0.000 n=12+11)
SerializeDescriptor_Upb 12.9µs ± 0% 13.0µs ± 0% +0.90% (p=0.000 n=12+11)
FILE SIZE VM SIZE
-------------- --------------
+1.5% +176 +1.6% +176 upb/decode.c
+1.8% +176 +1.9% +176 decode_msg
+0.4% +64 +0.4% +64 upb/def.c
+1.4% +64 +1.4% +64 _upb_symtab_addfile
+1.2% +48 +1.4% +48 upb/reflection.c
+15% +32 +18% +32 upb_msg_set
+2.9% +16 +3.1% +16 upb_msg_mutable
-9.3% -288 [ = ] 0 [Unmapped]
[ = ] 0 +0.2% +288 TOTAL
```
4 years ago
|
|
|
upb_msg *msg,
|
|
|
|
const upb_msglayout_sub *subs,
|
Optimized decoder and paved the way for parsing extensions.
The primary motivation for this change is to avoid referring to the
`upb_msglayout` object when we are trying to fetch the `upb_msglayout`
object for a sub-message. This will help pave the way for parsing
extensions. We also implement several optimizations so that we can
make this change without regressing performance.
Normally we compute the layout for a sub-message field like so:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *layout,
const upb_msglayout_field *field) {
return layout->submsgs[field->submsg_index]
}
```
The reason for this indirection is to avoid storing a pointer directly
in `upb_msglayout_field`, as this would double its size (from 12 to 24
bytes on 64-bit architectures) which is wasteful as this pointer is
only needed for message typed fields.
However `get_submsg_layout` as written above does not work for
extensions, as they will not have entries in the message's
`layout->submsgs` array by nature, and we want to avoid creating
an entire fake `upb_msglayout` for each such extension since that
would also be wasteful.
This change removes the dependency on `upb_msglayout` by passing down
the `submsgs` array instead:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *const *submsgs,
const upb_msglayout_field *field) {
return submsgs[field->submsg_index]
}
```
This will pave the way for parsing extensions, as we can more easily
create an alternative `submsgs` array for extension fields without
extra overhead or waste.
Along the way several optimizations presented themselves that allow
a nice increase in performance:
1. Passing the parsed `wireval` by address instead of by value ended
up avoiding an expensive and useless stack copy (this is on Clang,
which was used for all measurements).
2. When field numbers are densely packed, we can find a field by number
with a single indexed lookup instead of linear search. At codegen
time we can compute the maximum field number that will allow such
an indexed lookup.
3. For fields that do require linear search, we can start the linear
search at the location where we found the previous field, taking
advantage of the fact that field numbers are generally increasing.
4. When the hasbit index is less than 32 (the common case) we can use
a less expensive code sequence to set it.
5. We check for the hasbit case before the oneof case, as optional
fields are more common than oneof fields.
Benchmark results indicate a 20% improvement in parse speed with a
small code size increase:
```
name old time/op new time/op delta
ArenaOneAlloc 21.3ns ± 0% 21.5ns ± 0% +0.96% (p=0.000 n=12+12)
ArenaInitialBlockOneAlloc 6.32ns ± 0% 6.32ns ± 0% +0.03% (p=0.000 n=12+10)
LoadDescriptor_Upb 53.5µs ± 1% 51.5µs ± 2% -3.70% (p=0.000 n=12+12)
LoadAdsDescriptor_Upb 2.78ms ± 2% 2.68ms ± 0% -3.57% (p=0.000 n=12+12)
LoadDescriptor_Proto2 240µs ± 0% 240µs ± 0% +0.12% (p=0.001 n=12+12)
LoadAdsDescriptor_Proto2 12.8ms ± 0% 12.7ms ± 0% -1.15% (p=0.000 n=12+10)
Parse_Upb_FileDesc<UseArena,Copy> 13.2µs ± 2% 10.7µs ± 0% -18.49% (p=0.000 n=10+12)
Parse_Upb_FileDesc<UseArena,Alias> 11.3µs ± 0% 9.6µs ± 0% -15.11% (p=0.000 n=12+11)
Parse_Upb_FileDesc<InitBlock,Copy> 12.7µs ± 0% 10.3µs ± 0% -19.00% (p=0.000 n=10+12)
Parse_Upb_FileDesc<InitBlock,Alias> 10.9µs ± 0% 9.2µs ± 0% -15.82% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,NoArena,Copy> 29.4µs ± 0% 29.5µs ± 0% +0.61% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,UseArena,Copy> 20.7µs ± 2% 20.6µs ± 2% ~ (p=0.260 n=12+11)
Parse_Proto2<FileDesc,InitBlock,Copy> 16.7µs ± 1% 16.7µs ± 0% -0.25% (p=0.036 n=12+10)
Parse_Proto2<FileDescSV,InitBlock,Alias> 16.5µs ± 0% 16.5µs ± 0% +0.20% (p=0.016 n=12+11)
SerializeDescriptor_Proto2 5.30µs ± 1% 5.36µs ± 1% +1.09% (p=0.000 n=12+11)
SerializeDescriptor_Upb 12.9µs ± 0% 13.0µs ± 0% +0.90% (p=0.000 n=12+11)
FILE SIZE VM SIZE
-------------- --------------
+1.5% +176 +1.6% +176 upb/decode.c
+1.8% +176 +1.9% +176 decode_msg
+0.4% +64 +0.4% +64 upb/def.c
+1.4% +64 +1.4% +64 _upb_symtab_addfile
+1.2% +48 +1.4% +48 upb/reflection.c
+15% +32 +18% +32 upb_msg_set
+2.9% +16 +3.1% +16 upb_msg_mutable
-9.3% -288 [ = ] 0 [Unmapped]
[ = ] 0 +0.2% +288 TOTAL
```
4 years ago
|
|
|
const upb_msglayout_field *field, wireval *val,
|
|
|
|
int op) {
|
|
|
|
upb_array **arrp = UPB_PTR_AT(msg, field->offset, void);
|
|
|
|
upb_array *arr = *arrp;
|
|
|
|
void *mem;
|
|
|
|
|
|
|
|
if (arr) {
|
|
|
|
decode_reserve(d, arr, 1);
|
|
|
|
} else {
|
|
|
|
size_t lg2 = desctype_to_elem_size_lg2[field->descriptortype];
|
|
|
|
arr = _upb_array_new(&d->arena, 4, lg2);
|
|
|
|
if (!arr) return decode_err(d, kUpb_DecodeStatus_OutOfMemory);
|
|
|
|
*arrp = arr;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (op) {
|
|
|
|
case OP_SCALAR_LG2(0):
|
|
|
|
case OP_SCALAR_LG2(2):
|
|
|
|
case OP_SCALAR_LG2(3):
|
|
|
|
/* Append scalar value. */
|
|
|
|
mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << op, void);
|
|
|
|
arr->len++;
|
Optimized decoder and paved the way for parsing extensions.
The primary motivation for this change is to avoid referring to the
`upb_msglayout` object when we are trying to fetch the `upb_msglayout`
object for a sub-message. This will help pave the way for parsing
extensions. We also implement several optimizations so that we can
make this change without regressing performance.
Normally we compute the layout for a sub-message field like so:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *layout,
const upb_msglayout_field *field) {
return layout->submsgs[field->submsg_index]
}
```
The reason for this indirection is to avoid storing a pointer directly
in `upb_msglayout_field`, as this would double its size (from 12 to 24
bytes on 64-bit architectures) which is wasteful as this pointer is
only needed for message typed fields.
However `get_submsg_layout` as written above does not work for
extensions, as they will not have entries in the message's
`layout->submsgs` array by nature, and we want to avoid creating
an entire fake `upb_msglayout` for each such extension since that
would also be wasteful.
This change removes the dependency on `upb_msglayout` by passing down
the `submsgs` array instead:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *const *submsgs,
const upb_msglayout_field *field) {
return submsgs[field->submsg_index]
}
```
This will pave the way for parsing extensions, as we can more easily
create an alternative `submsgs` array for extension fields without
extra overhead or waste.
Along the way several optimizations presented themselves that allow
a nice increase in performance:
1. Passing the parsed `wireval` by address instead of by value ended
up avoiding an expensive and useless stack copy (this is on Clang,
which was used for all measurements).
2. When field numbers are densely packed, we can find a field by number
with a single indexed lookup instead of linear search. At codegen
time we can compute the maximum field number that will allow such
an indexed lookup.
3. For fields that do require linear search, we can start the linear
search at the location where we found the previous field, taking
advantage of the fact that field numbers are generally increasing.
4. When the hasbit index is less than 32 (the common case) we can use
a less expensive code sequence to set it.
5. We check for the hasbit case before the oneof case, as optional
fields are more common than oneof fields.
Benchmark results indicate a 20% improvement in parse speed with a
small code size increase:
```
name old time/op new time/op delta
ArenaOneAlloc 21.3ns ± 0% 21.5ns ± 0% +0.96% (p=0.000 n=12+12)
ArenaInitialBlockOneAlloc 6.32ns ± 0% 6.32ns ± 0% +0.03% (p=0.000 n=12+10)
LoadDescriptor_Upb 53.5µs ± 1% 51.5µs ± 2% -3.70% (p=0.000 n=12+12)
LoadAdsDescriptor_Upb 2.78ms ± 2% 2.68ms ± 0% -3.57% (p=0.000 n=12+12)
LoadDescriptor_Proto2 240µs ± 0% 240µs ± 0% +0.12% (p=0.001 n=12+12)
LoadAdsDescriptor_Proto2 12.8ms ± 0% 12.7ms ± 0% -1.15% (p=0.000 n=12+10)
Parse_Upb_FileDesc<UseArena,Copy> 13.2µs ± 2% 10.7µs ± 0% -18.49% (p=0.000 n=10+12)
Parse_Upb_FileDesc<UseArena,Alias> 11.3µs ± 0% 9.6µs ± 0% -15.11% (p=0.000 n=12+11)
Parse_Upb_FileDesc<InitBlock,Copy> 12.7µs ± 0% 10.3µs ± 0% -19.00% (p=0.000 n=10+12)
Parse_Upb_FileDesc<InitBlock,Alias> 10.9µs ± 0% 9.2µs ± 0% -15.82% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,NoArena,Copy> 29.4µs ± 0% 29.5µs ± 0% +0.61% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,UseArena,Copy> 20.7µs ± 2% 20.6µs ± 2% ~ (p=0.260 n=12+11)
Parse_Proto2<FileDesc,InitBlock,Copy> 16.7µs ± 1% 16.7µs ± 0% -0.25% (p=0.036 n=12+10)
Parse_Proto2<FileDescSV,InitBlock,Alias> 16.5µs ± 0% 16.5µs ± 0% +0.20% (p=0.016 n=12+11)
SerializeDescriptor_Proto2 5.30µs ± 1% 5.36µs ± 1% +1.09% (p=0.000 n=12+11)
SerializeDescriptor_Upb 12.9µs ± 0% 13.0µs ± 0% +0.90% (p=0.000 n=12+11)
FILE SIZE VM SIZE
-------------- --------------
+1.5% +176 +1.6% +176 upb/decode.c
+1.8% +176 +1.9% +176 decode_msg
+0.4% +64 +0.4% +64 upb/def.c
+1.4% +64 +1.4% +64 _upb_symtab_addfile
+1.2% +48 +1.4% +48 upb/reflection.c
+15% +32 +18% +32 upb_msg_set
+2.9% +16 +3.1% +16 upb_msg_mutable
-9.3% -288 [ = ] 0 [Unmapped]
[ = ] 0 +0.2% +288 TOTAL
```
4 years ago
|
|
|
memcpy(mem, val, 1 << op);
|
|
|
|
return ptr;
|
|
|
|
case OP_STRING:
|
Optimized decoder and paved the way for parsing extensions.
The primary motivation for this change is to avoid referring to the
`upb_msglayout` object when we are trying to fetch the `upb_msglayout`
object for a sub-message. This will help pave the way for parsing
extensions. We also implement several optimizations so that we can
make this change without regressing performance.
Normally we compute the layout for a sub-message field like so:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *layout,
const upb_msglayout_field *field) {
return layout->submsgs[field->submsg_index]
}
```
The reason for this indirection is to avoid storing a pointer directly
in `upb_msglayout_field`, as this would double its size (from 12 to 24
bytes on 64-bit architectures) which is wasteful as this pointer is
only needed for message typed fields.
However `get_submsg_layout` as written above does not work for
extensions, as they will not have entries in the message's
`layout->submsgs` array by nature, and we want to avoid creating
an entire fake `upb_msglayout` for each such extension since that
would also be wasteful.
This change removes the dependency on `upb_msglayout` by passing down
the `submsgs` array instead:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *const *submsgs,
const upb_msglayout_field *field) {
return submsgs[field->submsg_index]
}
```
This will pave the way for parsing extensions, as we can more easily
create an alternative `submsgs` array for extension fields without
extra overhead or waste.
Along the way several optimizations presented themselves that allow
a nice increase in performance:
1. Passing the parsed `wireval` by address instead of by value ended
up avoiding an expensive and useless stack copy (this is on Clang,
which was used for all measurements).
2. When field numbers are densely packed, we can find a field by number
with a single indexed lookup instead of linear search. At codegen
time we can compute the maximum field number that will allow such
an indexed lookup.
3. For fields that do require linear search, we can start the linear
search at the location where we found the previous field, taking
advantage of the fact that field numbers are generally increasing.
4. When the hasbit index is less than 32 (the common case) we can use
a less expensive code sequence to set it.
5. We check for the hasbit case before the oneof case, as optional
fields are more common than oneof fields.
Benchmark results indicate a 20% improvement in parse speed with a
small code size increase:
```
name old time/op new time/op delta
ArenaOneAlloc 21.3ns ± 0% 21.5ns ± 0% +0.96% (p=0.000 n=12+12)
ArenaInitialBlockOneAlloc 6.32ns ± 0% 6.32ns ± 0% +0.03% (p=0.000 n=12+10)
LoadDescriptor_Upb 53.5µs ± 1% 51.5µs ± 2% -3.70% (p=0.000 n=12+12)
LoadAdsDescriptor_Upb 2.78ms ± 2% 2.68ms ± 0% -3.57% (p=0.000 n=12+12)
LoadDescriptor_Proto2 240µs ± 0% 240µs ± 0% +0.12% (p=0.001 n=12+12)
LoadAdsDescriptor_Proto2 12.8ms ± 0% 12.7ms ± 0% -1.15% (p=0.000 n=12+10)
Parse_Upb_FileDesc<UseArena,Copy> 13.2µs ± 2% 10.7µs ± 0% -18.49% (p=0.000 n=10+12)
Parse_Upb_FileDesc<UseArena,Alias> 11.3µs ± 0% 9.6µs ± 0% -15.11% (p=0.000 n=12+11)
Parse_Upb_FileDesc<InitBlock,Copy> 12.7µs ± 0% 10.3µs ± 0% -19.00% (p=0.000 n=10+12)
Parse_Upb_FileDesc<InitBlock,Alias> 10.9µs ± 0% 9.2µs ± 0% -15.82% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,NoArena,Copy> 29.4µs ± 0% 29.5µs ± 0% +0.61% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,UseArena,Copy> 20.7µs ± 2% 20.6µs ± 2% ~ (p=0.260 n=12+11)
Parse_Proto2<FileDesc,InitBlock,Copy> 16.7µs ± 1% 16.7µs ± 0% -0.25% (p=0.036 n=12+10)
Parse_Proto2<FileDescSV,InitBlock,Alias> 16.5µs ± 0% 16.5µs ± 0% +0.20% (p=0.016 n=12+11)
SerializeDescriptor_Proto2 5.30µs ± 1% 5.36µs ± 1% +1.09% (p=0.000 n=12+11)
SerializeDescriptor_Upb 12.9µs ± 0% 13.0µs ± 0% +0.90% (p=0.000 n=12+11)
FILE SIZE VM SIZE
-------------- --------------
+1.5% +176 +1.6% +176 upb/decode.c
+1.8% +176 +1.9% +176 decode_msg
+0.4% +64 +0.4% +64 upb/def.c
+1.4% +64 +1.4% +64 _upb_symtab_addfile
+1.2% +48 +1.4% +48 upb/reflection.c
+15% +32 +18% +32 upb_msg_set
+2.9% +16 +3.1% +16 upb_msg_mutable
-9.3% -288 [ = ] 0 [Unmapped]
[ = ] 0 +0.2% +288 TOTAL
```
4 years ago
|
|
|
decode_verifyutf8(d, ptr, val->size);
|
|
|
|
/* Fallthrough. */
|
|
|
|
case OP_BYTES: {
|
|
|
|
/* Append bytes. */
|
|
|
|
upb_strview *str = (upb_strview*)_upb_array_ptr(arr) + arr->len;
|
|
|
|
arr->len++;
|
Optimized decoder and paved the way for parsing extensions.
The primary motivation for this change is to avoid referring to the
`upb_msglayout` object when we are trying to fetch the `upb_msglayout`
object for a sub-message. This will help pave the way for parsing
extensions. We also implement several optimizations so that we can
make this change without regressing performance.
Normally we compute the layout for a sub-message field like so:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *layout,
const upb_msglayout_field *field) {
return layout->submsgs[field->submsg_index]
}
```
The reason for this indirection is to avoid storing a pointer directly
in `upb_msglayout_field`, as this would double its size (from 12 to 24
bytes on 64-bit architectures) which is wasteful as this pointer is
only needed for message typed fields.
However `get_submsg_layout` as written above does not work for
extensions, as they will not have entries in the message's
`layout->submsgs` array by nature, and we want to avoid creating
an entire fake `upb_msglayout` for each such extension since that
would also be wasteful.
This change removes the dependency on `upb_msglayout` by passing down
the `submsgs` array instead:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *const *submsgs,
const upb_msglayout_field *field) {
return submsgs[field->submsg_index]
}
```
This will pave the way for parsing extensions, as we can more easily
create an alternative `submsgs` array for extension fields without
extra overhead or waste.
Along the way several optimizations presented themselves that allow
a nice increase in performance:
1. Passing the parsed `wireval` by address instead of by value ended
up avoiding an expensive and useless stack copy (this is on Clang,
which was used for all measurements).
2. When field numbers are densely packed, we can find a field by number
with a single indexed lookup instead of linear search. At codegen
time we can compute the maximum field number that will allow such
an indexed lookup.
3. For fields that do require linear search, we can start the linear
search at the location where we found the previous field, taking
advantage of the fact that field numbers are generally increasing.
4. When the hasbit index is less than 32 (the common case) we can use
a less expensive code sequence to set it.
5. We check for the hasbit case before the oneof case, as optional
fields are more common than oneof fields.
Benchmark results indicate a 20% improvement in parse speed with a
small code size increase:
```
name old time/op new time/op delta
ArenaOneAlloc 21.3ns ± 0% 21.5ns ± 0% +0.96% (p=0.000 n=12+12)
ArenaInitialBlockOneAlloc 6.32ns ± 0% 6.32ns ± 0% +0.03% (p=0.000 n=12+10)
LoadDescriptor_Upb 53.5µs ± 1% 51.5µs ± 2% -3.70% (p=0.000 n=12+12)
LoadAdsDescriptor_Upb 2.78ms ± 2% 2.68ms ± 0% -3.57% (p=0.000 n=12+12)
LoadDescriptor_Proto2 240µs ± 0% 240µs ± 0% +0.12% (p=0.001 n=12+12)
LoadAdsDescriptor_Proto2 12.8ms ± 0% 12.7ms ± 0% -1.15% (p=0.000 n=12+10)
Parse_Upb_FileDesc<UseArena,Copy> 13.2µs ± 2% 10.7µs ± 0% -18.49% (p=0.000 n=10+12)
Parse_Upb_FileDesc<UseArena,Alias> 11.3µs ± 0% 9.6µs ± 0% -15.11% (p=0.000 n=12+11)
Parse_Upb_FileDesc<InitBlock,Copy> 12.7µs ± 0% 10.3µs ± 0% -19.00% (p=0.000 n=10+12)
Parse_Upb_FileDesc<InitBlock,Alias> 10.9µs ± 0% 9.2µs ± 0% -15.82% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,NoArena,Copy> 29.4µs ± 0% 29.5µs ± 0% +0.61% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,UseArena,Copy> 20.7µs ± 2% 20.6µs ± 2% ~ (p=0.260 n=12+11)
Parse_Proto2<FileDesc,InitBlock,Copy> 16.7µs ± 1% 16.7µs ± 0% -0.25% (p=0.036 n=12+10)
Parse_Proto2<FileDescSV,InitBlock,Alias> 16.5µs ± 0% 16.5µs ± 0% +0.20% (p=0.016 n=12+11)
SerializeDescriptor_Proto2 5.30µs ± 1% 5.36µs ± 1% +1.09% (p=0.000 n=12+11)
SerializeDescriptor_Upb 12.9µs ± 0% 13.0µs ± 0% +0.90% (p=0.000 n=12+11)
FILE SIZE VM SIZE
-------------- --------------
+1.5% +176 +1.6% +176 upb/decode.c
+1.8% +176 +1.9% +176 decode_msg
+0.4% +64 +0.4% +64 upb/def.c
+1.4% +64 +1.4% +64 _upb_symtab_addfile
+1.2% +48 +1.4% +48 upb/reflection.c
+15% +32 +18% +32 upb_msg_set
+2.9% +16 +3.1% +16 upb_msg_mutable
-9.3% -288 [ = ] 0 [Unmapped]
[ = ] 0 +0.2% +288 TOTAL
```
4 years ago
|
|
|
return decode_readstr(d, ptr, val->size, str);
|
|
|
|
}
|
|
|
|
case OP_SUBMSG: {
|
|
|
|
/* Append submessage / group. */
|
|
|
|
upb_msg *submsg = decode_newsubmsg(d, subs, field);
|
|
|
|
*UPB_PTR_AT(_upb_array_ptr(arr), arr->len * sizeof(void *), upb_msg *) =
|
|
|
|
submsg;
|
|
|
|
arr->len++;
|
|
|
|
if (UPB_UNLIKELY(field->descriptortype == UPB_DTYPE_GROUP)) {
|
|
|
|
return decode_togroup(d, ptr, submsg, subs, field);
|
|
|
|
} else {
|
|
|
|
return decode_tosubmsg(d, ptr, submsg, subs, field, val->size);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
case OP_FIXPCK_LG2(2):
|
|
|
|
case OP_FIXPCK_LG2(3):
|
|
|
|
return decode_fixed_packed(d, ptr, arr, val, field,
|
|
|
|
op - OP_FIXPCK_LG2(0));
|
|
|
|
case OP_VARPCK_LG2(0):
|
|
|
|
case OP_VARPCK_LG2(2):
|
|
|
|
case OP_VARPCK_LG2(3):
|
|
|
|
return decode_varint_packed(d, ptr, arr, val, field,
|
|
|
|
op - OP_VARPCK_LG2(0));
|
|
|
|
case OP_ENUM:
|
|
|
|
return decode_enum_toarray(d, ptr, msg, arr, subs, field, val);
|
|
|
|
case OP_PACKED_ENUM:
|
|
|
|
return decode_enum_packed(d, ptr, msg, arr, subs, field, val);
|
|
|
|
default:
|
|
|
|
UPB_UNREACHABLE();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char *decode_tomap(upb_decstate *d, const char *ptr, upb_msg *msg,
|
|
|
|
const upb_msglayout_sub *subs,
|
|
|
|
const upb_msglayout_field *field,
|
|
|
|
wireval *val) {
|
|
|
|
upb_map **map_p = UPB_PTR_AT(msg, field->offset, upb_map *);
|
|
|
|
upb_map *map = *map_p;
|
|
|
|
upb_map_entry ent;
|
|
|
|
const upb_msglayout *entry = subs[field->submsg_index].submsg;
|
|
|
|
|
|
|
|
if (!map) {
|
|
|
|
/* Lazily create map. */
|
|
|
|
const upb_msglayout_field *key_field = &entry->fields[0];
|
|
|
|
const upb_msglayout_field *val_field = &entry->fields[1];
|
|
|
|
char key_size = desctype_to_mapsize[key_field->descriptortype];
|
|
|
|
char val_size = desctype_to_mapsize[val_field->descriptortype];
|
|
|
|
UPB_ASSERT(key_field->offset == 0);
|
|
|
|
UPB_ASSERT(val_field->offset == sizeof(upb_strview));
|
|
|
|
map = _upb_map_new(&d->arena, key_size, val_size);
|
|
|
|
*map_p = map;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Parse map entry. */
|
|
|
|
memset(&ent, 0, sizeof(ent));
|
|
|
|
|
|
|
|
if (entry->fields[1].descriptortype == UPB_DESCRIPTOR_TYPE_MESSAGE ||
|
|
|
|
entry->fields[1].descriptortype == UPB_DESCRIPTOR_TYPE_GROUP) {
|
|
|
|
/* Create proactively to handle the case where it doesn't appear. */
|
|
|
|
ent.v.val = upb_value_ptr(_upb_msg_new(entry->subs[0].submsg, &d->arena));
|
|
|
|
}
|
|
|
|
|
|
|
|
ptr = decode_tosubmsg(d, ptr, &ent.k, subs, field, val->size);
|
|
|
|
_upb_map_set(map, &ent.k, map->key_size, &ent.v, map->val_size, &d->arena);
|
|
|
|
return ptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char *decode_tomsg(upb_decstate *d, const char *ptr, upb_msg *msg,
|
|
|
|
const upb_msglayout_sub *subs,
|
Optimized decoder and paved the way for parsing extensions.
The primary motivation for this change is to avoid referring to the
`upb_msglayout` object when we are trying to fetch the `upb_msglayout`
object for a sub-message. This will help pave the way for parsing
extensions. We also implement several optimizations so that we can
make this change without regressing performance.
Normally we compute the layout for a sub-message field like so:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *layout,
const upb_msglayout_field *field) {
return layout->submsgs[field->submsg_index]
}
```
The reason for this indirection is to avoid storing a pointer directly
in `upb_msglayout_field`, as this would double its size (from 12 to 24
bytes on 64-bit architectures) which is wasteful as this pointer is
only needed for message typed fields.
However `get_submsg_layout` as written above does not work for
extensions, as they will not have entries in the message's
`layout->submsgs` array by nature, and we want to avoid creating
an entire fake `upb_msglayout` for each such extension since that
would also be wasteful.
This change removes the dependency on `upb_msglayout` by passing down
the `submsgs` array instead:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *const *submsgs,
const upb_msglayout_field *field) {
return submsgs[field->submsg_index]
}
```
This will pave the way for parsing extensions, as we can more easily
create an alternative `submsgs` array for extension fields without
extra overhead or waste.
Along the way several optimizations presented themselves that allow
a nice increase in performance:
1. Passing the parsed `wireval` by address instead of by value ended
up avoiding an expensive and useless stack copy (this is on Clang,
which was used for all measurements).
2. When field numbers are densely packed, we can find a field by number
with a single indexed lookup instead of linear search. At codegen
time we can compute the maximum field number that will allow such
an indexed lookup.
3. For fields that do require linear search, we can start the linear
search at the location where we found the previous field, taking
advantage of the fact that field numbers are generally increasing.
4. When the hasbit index is less than 32 (the common case) we can use
a less expensive code sequence to set it.
5. We check for the hasbit case before the oneof case, as optional
fields are more common than oneof fields.
Benchmark results indicate a 20% improvement in parse speed with a
small code size increase:
```
name old time/op new time/op delta
ArenaOneAlloc 21.3ns ± 0% 21.5ns ± 0% +0.96% (p=0.000 n=12+12)
ArenaInitialBlockOneAlloc 6.32ns ± 0% 6.32ns ± 0% +0.03% (p=0.000 n=12+10)
LoadDescriptor_Upb 53.5µs ± 1% 51.5µs ± 2% -3.70% (p=0.000 n=12+12)
LoadAdsDescriptor_Upb 2.78ms ± 2% 2.68ms ± 0% -3.57% (p=0.000 n=12+12)
LoadDescriptor_Proto2 240µs ± 0% 240µs ± 0% +0.12% (p=0.001 n=12+12)
LoadAdsDescriptor_Proto2 12.8ms ± 0% 12.7ms ± 0% -1.15% (p=0.000 n=12+10)
Parse_Upb_FileDesc<UseArena,Copy> 13.2µs ± 2% 10.7µs ± 0% -18.49% (p=0.000 n=10+12)
Parse_Upb_FileDesc<UseArena,Alias> 11.3µs ± 0% 9.6µs ± 0% -15.11% (p=0.000 n=12+11)
Parse_Upb_FileDesc<InitBlock,Copy> 12.7µs ± 0% 10.3µs ± 0% -19.00% (p=0.000 n=10+12)
Parse_Upb_FileDesc<InitBlock,Alias> 10.9µs ± 0% 9.2µs ± 0% -15.82% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,NoArena,Copy> 29.4µs ± 0% 29.5µs ± 0% +0.61% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,UseArena,Copy> 20.7µs ± 2% 20.6µs ± 2% ~ (p=0.260 n=12+11)
Parse_Proto2<FileDesc,InitBlock,Copy> 16.7µs ± 1% 16.7µs ± 0% -0.25% (p=0.036 n=12+10)
Parse_Proto2<FileDescSV,InitBlock,Alias> 16.5µs ± 0% 16.5µs ± 0% +0.20% (p=0.016 n=12+11)
SerializeDescriptor_Proto2 5.30µs ± 1% 5.36µs ± 1% +1.09% (p=0.000 n=12+11)
SerializeDescriptor_Upb 12.9µs ± 0% 13.0µs ± 0% +0.90% (p=0.000 n=12+11)
FILE SIZE VM SIZE
-------------- --------------
+1.5% +176 +1.6% +176 upb/decode.c
+1.8% +176 +1.9% +176 decode_msg
+0.4% +64 +0.4% +64 upb/def.c
+1.4% +64 +1.4% +64 _upb_symtab_addfile
+1.2% +48 +1.4% +48 upb/reflection.c
+15% +32 +18% +32 upb_msg_set
+2.9% +16 +3.1% +16 upb_msg_mutable
-9.3% -288 [ = ] 0 [Unmapped]
[ = ] 0 +0.2% +288 TOTAL
```
4 years ago
|
|
|
const upb_msglayout_field *field, wireval *val,
|
|
|
|
int op) {
|
|
|
|
void *mem = UPB_PTR_AT(msg, field->offset, void);
|
|
|
|
int type = field->descriptortype;
|
|
|
|
|
|
|
|
if (UPB_UNLIKELY(op == OP_ENUM) &&
|
|
|
|
!decode_checkenum(d, ptr, msg, subs[field->submsg_index].subenum, field,
|
|
|
|
val)) {
|
|
|
|
return ptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Set presence if necessary. */
|
Optimized decoder and paved the way for parsing extensions.
The primary motivation for this change is to avoid referring to the
`upb_msglayout` object when we are trying to fetch the `upb_msglayout`
object for a sub-message. This will help pave the way for parsing
extensions. We also implement several optimizations so that we can
make this change without regressing performance.
Normally we compute the layout for a sub-message field like so:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *layout,
const upb_msglayout_field *field) {
return layout->submsgs[field->submsg_index]
}
```
The reason for this indirection is to avoid storing a pointer directly
in `upb_msglayout_field`, as this would double its size (from 12 to 24
bytes on 64-bit architectures) which is wasteful as this pointer is
only needed for message typed fields.
However `get_submsg_layout` as written above does not work for
extensions, as they will not have entries in the message's
`layout->submsgs` array by nature, and we want to avoid creating
an entire fake `upb_msglayout` for each such extension since that
would also be wasteful.
This change removes the dependency on `upb_msglayout` by passing down
the `submsgs` array instead:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *const *submsgs,
const upb_msglayout_field *field) {
return submsgs[field->submsg_index]
}
```
This will pave the way for parsing extensions, as we can more easily
create an alternative `submsgs` array for extension fields without
extra overhead or waste.
Along the way several optimizations presented themselves that allow
a nice increase in performance:
1. Passing the parsed `wireval` by address instead of by value ended
up avoiding an expensive and useless stack copy (this is on Clang,
which was used for all measurements).
2. When field numbers are densely packed, we can find a field by number
with a single indexed lookup instead of linear search. At codegen
time we can compute the maximum field number that will allow such
an indexed lookup.
3. For fields that do require linear search, we can start the linear
search at the location where we found the previous field, taking
advantage of the fact that field numbers are generally increasing.
4. When the hasbit index is less than 32 (the common case) we can use
a less expensive code sequence to set it.
5. We check for the hasbit case before the oneof case, as optional
fields are more common than oneof fields.
Benchmark results indicate a 20% improvement in parse speed with a
small code size increase:
```
name old time/op new time/op delta
ArenaOneAlloc 21.3ns ± 0% 21.5ns ± 0% +0.96% (p=0.000 n=12+12)
ArenaInitialBlockOneAlloc 6.32ns ± 0% 6.32ns ± 0% +0.03% (p=0.000 n=12+10)
LoadDescriptor_Upb 53.5µs ± 1% 51.5µs ± 2% -3.70% (p=0.000 n=12+12)
LoadAdsDescriptor_Upb 2.78ms ± 2% 2.68ms ± 0% -3.57% (p=0.000 n=12+12)
LoadDescriptor_Proto2 240µs ± 0% 240µs ± 0% +0.12% (p=0.001 n=12+12)
LoadAdsDescriptor_Proto2 12.8ms ± 0% 12.7ms ± 0% -1.15% (p=0.000 n=12+10)
Parse_Upb_FileDesc<UseArena,Copy> 13.2µs ± 2% 10.7µs ± 0% -18.49% (p=0.000 n=10+12)
Parse_Upb_FileDesc<UseArena,Alias> 11.3µs ± 0% 9.6µs ± 0% -15.11% (p=0.000 n=12+11)
Parse_Upb_FileDesc<InitBlock,Copy> 12.7µs ± 0% 10.3µs ± 0% -19.00% (p=0.000 n=10+12)
Parse_Upb_FileDesc<InitBlock,Alias> 10.9µs ± 0% 9.2µs ± 0% -15.82% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,NoArena,Copy> 29.4µs ± 0% 29.5µs ± 0% +0.61% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,UseArena,Copy> 20.7µs ± 2% 20.6µs ± 2% ~ (p=0.260 n=12+11)
Parse_Proto2<FileDesc,InitBlock,Copy> 16.7µs ± 1% 16.7µs ± 0% -0.25% (p=0.036 n=12+10)
Parse_Proto2<FileDescSV,InitBlock,Alias> 16.5µs ± 0% 16.5µs ± 0% +0.20% (p=0.016 n=12+11)
SerializeDescriptor_Proto2 5.30µs ± 1% 5.36µs ± 1% +1.09% (p=0.000 n=12+11)
SerializeDescriptor_Upb 12.9µs ± 0% 13.0µs ± 0% +0.90% (p=0.000 n=12+11)
FILE SIZE VM SIZE
-------------- --------------
+1.5% +176 +1.6% +176 upb/decode.c
+1.8% +176 +1.9% +176 decode_msg
+0.4% +64 +0.4% +64 upb/def.c
+1.4% +64 +1.4% +64 _upb_symtab_addfile
+1.2% +48 +1.4% +48 upb/reflection.c
+15% +32 +18% +32 upb_msg_set
+2.9% +16 +3.1% +16 upb_msg_mutable
-9.3% -288 [ = ] 0 [Unmapped]
[ = ] 0 +0.2% +288 TOTAL
```
4 years ago
|
|
|
if (field->presence > 0) {
|
|
|
|
_upb_sethas_field(msg, field);
|
|
|
|
} else if (field->presence < 0) {
|
|
|
|
/* Oneof case */
|
|
|
|
uint32_t *oneof_case = _upb_oneofcase_field(msg, field);
|
|
|
|
if (op == OP_SUBMSG && *oneof_case != field->number) {
|
|
|
|
memset(mem, 0, sizeof(void*));
|
|
|
|
}
|
|
|
|
*oneof_case = field->number;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Store into message. */
|
|
|
|
switch (op) {
|
|
|
|
case OP_SUBMSG: {
|
|
|
|
upb_msg **submsgp = mem;
|
|
|
|
upb_msg *submsg = *submsgp;
|
|
|
|
if (!submsg) {
|
|
|
|
submsg = decode_newsubmsg(d, subs, field);
|
|
|
|
*submsgp = submsg;
|
|
|
|
}
|
|
|
|
if (UPB_UNLIKELY(type == UPB_DTYPE_GROUP)) {
|
|
|
|
ptr = decode_togroup(d, ptr, submsg, subs, field);
|
|
|
|
} else {
|
|
|
|
ptr = decode_tosubmsg(d, ptr, submsg, subs, field, val->size);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case OP_STRING:
|
Optimized decoder and paved the way for parsing extensions.
The primary motivation for this change is to avoid referring to the
`upb_msglayout` object when we are trying to fetch the `upb_msglayout`
object for a sub-message. This will help pave the way for parsing
extensions. We also implement several optimizations so that we can
make this change without regressing performance.
Normally we compute the layout for a sub-message field like so:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *layout,
const upb_msglayout_field *field) {
return layout->submsgs[field->submsg_index]
}
```
The reason for this indirection is to avoid storing a pointer directly
in `upb_msglayout_field`, as this would double its size (from 12 to 24
bytes on 64-bit architectures) which is wasteful as this pointer is
only needed for message typed fields.
However `get_submsg_layout` as written above does not work for
extensions, as they will not have entries in the message's
`layout->submsgs` array by nature, and we want to avoid creating
an entire fake `upb_msglayout` for each such extension since that
would also be wasteful.
This change removes the dependency on `upb_msglayout` by passing down
the `submsgs` array instead:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *const *submsgs,
const upb_msglayout_field *field) {
return submsgs[field->submsg_index]
}
```
This will pave the way for parsing extensions, as we can more easily
create an alternative `submsgs` array for extension fields without
extra overhead or waste.
Along the way several optimizations presented themselves that allow
a nice increase in performance:
1. Passing the parsed `wireval` by address instead of by value ended
up avoiding an expensive and useless stack copy (this is on Clang,
which was used for all measurements).
2. When field numbers are densely packed, we can find a field by number
with a single indexed lookup instead of linear search. At codegen
time we can compute the maximum field number that will allow such
an indexed lookup.
3. For fields that do require linear search, we can start the linear
search at the location where we found the previous field, taking
advantage of the fact that field numbers are generally increasing.
4. When the hasbit index is less than 32 (the common case) we can use
a less expensive code sequence to set it.
5. We check for the hasbit case before the oneof case, as optional
fields are more common than oneof fields.
Benchmark results indicate a 20% improvement in parse speed with a
small code size increase:
```
name old time/op new time/op delta
ArenaOneAlloc 21.3ns ± 0% 21.5ns ± 0% +0.96% (p=0.000 n=12+12)
ArenaInitialBlockOneAlloc 6.32ns ± 0% 6.32ns ± 0% +0.03% (p=0.000 n=12+10)
LoadDescriptor_Upb 53.5µs ± 1% 51.5µs ± 2% -3.70% (p=0.000 n=12+12)
LoadAdsDescriptor_Upb 2.78ms ± 2% 2.68ms ± 0% -3.57% (p=0.000 n=12+12)
LoadDescriptor_Proto2 240µs ± 0% 240µs ± 0% +0.12% (p=0.001 n=12+12)
LoadAdsDescriptor_Proto2 12.8ms ± 0% 12.7ms ± 0% -1.15% (p=0.000 n=12+10)
Parse_Upb_FileDesc<UseArena,Copy> 13.2µs ± 2% 10.7µs ± 0% -18.49% (p=0.000 n=10+12)
Parse_Upb_FileDesc<UseArena,Alias> 11.3µs ± 0% 9.6µs ± 0% -15.11% (p=0.000 n=12+11)
Parse_Upb_FileDesc<InitBlock,Copy> 12.7µs ± 0% 10.3µs ± 0% -19.00% (p=0.000 n=10+12)
Parse_Upb_FileDesc<InitBlock,Alias> 10.9µs ± 0% 9.2µs ± 0% -15.82% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,NoArena,Copy> 29.4µs ± 0% 29.5µs ± 0% +0.61% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,UseArena,Copy> 20.7µs ± 2% 20.6µs ± 2% ~ (p=0.260 n=12+11)
Parse_Proto2<FileDesc,InitBlock,Copy> 16.7µs ± 1% 16.7µs ± 0% -0.25% (p=0.036 n=12+10)
Parse_Proto2<FileDescSV,InitBlock,Alias> 16.5µs ± 0% 16.5µs ± 0% +0.20% (p=0.016 n=12+11)
SerializeDescriptor_Proto2 5.30µs ± 1% 5.36µs ± 1% +1.09% (p=0.000 n=12+11)
SerializeDescriptor_Upb 12.9µs ± 0% 13.0µs ± 0% +0.90% (p=0.000 n=12+11)
FILE SIZE VM SIZE
-------------- --------------
+1.5% +176 +1.6% +176 upb/decode.c
+1.8% +176 +1.9% +176 decode_msg
+0.4% +64 +0.4% +64 upb/def.c
+1.4% +64 +1.4% +64 _upb_symtab_addfile
+1.2% +48 +1.4% +48 upb/reflection.c
+15% +32 +18% +32 upb_msg_set
+2.9% +16 +3.1% +16 upb_msg_mutable
-9.3% -288 [ = ] 0 [Unmapped]
[ = ] 0 +0.2% +288 TOTAL
```
4 years ago
|
|
|
decode_verifyutf8(d, ptr, val->size);
|
|
|
|
/* Fallthrough. */
|
|
|
|
case OP_BYTES:
|
Optimized decoder and paved the way for parsing extensions.
The primary motivation for this change is to avoid referring to the
`upb_msglayout` object when we are trying to fetch the `upb_msglayout`
object for a sub-message. This will help pave the way for parsing
extensions. We also implement several optimizations so that we can
make this change without regressing performance.
Normally we compute the layout for a sub-message field like so:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *layout,
const upb_msglayout_field *field) {
return layout->submsgs[field->submsg_index]
}
```
The reason for this indirection is to avoid storing a pointer directly
in `upb_msglayout_field`, as this would double its size (from 12 to 24
bytes on 64-bit architectures) which is wasteful as this pointer is
only needed for message typed fields.
However `get_submsg_layout` as written above does not work for
extensions, as they will not have entries in the message's
`layout->submsgs` array by nature, and we want to avoid creating
an entire fake `upb_msglayout` for each such extension since that
would also be wasteful.
This change removes the dependency on `upb_msglayout` by passing down
the `submsgs` array instead:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *const *submsgs,
const upb_msglayout_field *field) {
return submsgs[field->submsg_index]
}
```
This will pave the way for parsing extensions, as we can more easily
create an alternative `submsgs` array for extension fields without
extra overhead or waste.
Along the way several optimizations presented themselves that allow
a nice increase in performance:
1. Passing the parsed `wireval` by address instead of by value ended
up avoiding an expensive and useless stack copy (this is on Clang,
which was used for all measurements).
2. When field numbers are densely packed, we can find a field by number
with a single indexed lookup instead of linear search. At codegen
time we can compute the maximum field number that will allow such
an indexed lookup.
3. For fields that do require linear search, we can start the linear
search at the location where we found the previous field, taking
advantage of the fact that field numbers are generally increasing.
4. When the hasbit index is less than 32 (the common case) we can use
a less expensive code sequence to set it.
5. We check for the hasbit case before the oneof case, as optional
fields are more common than oneof fields.
Benchmark results indicate a 20% improvement in parse speed with a
small code size increase:
```
name old time/op new time/op delta
ArenaOneAlloc 21.3ns ± 0% 21.5ns ± 0% +0.96% (p=0.000 n=12+12)
ArenaInitialBlockOneAlloc 6.32ns ± 0% 6.32ns ± 0% +0.03% (p=0.000 n=12+10)
LoadDescriptor_Upb 53.5µs ± 1% 51.5µs ± 2% -3.70% (p=0.000 n=12+12)
LoadAdsDescriptor_Upb 2.78ms ± 2% 2.68ms ± 0% -3.57% (p=0.000 n=12+12)
LoadDescriptor_Proto2 240µs ± 0% 240µs ± 0% +0.12% (p=0.001 n=12+12)
LoadAdsDescriptor_Proto2 12.8ms ± 0% 12.7ms ± 0% -1.15% (p=0.000 n=12+10)
Parse_Upb_FileDesc<UseArena,Copy> 13.2µs ± 2% 10.7µs ± 0% -18.49% (p=0.000 n=10+12)
Parse_Upb_FileDesc<UseArena,Alias> 11.3µs ± 0% 9.6µs ± 0% -15.11% (p=0.000 n=12+11)
Parse_Upb_FileDesc<InitBlock,Copy> 12.7µs ± 0% 10.3µs ± 0% -19.00% (p=0.000 n=10+12)
Parse_Upb_FileDesc<InitBlock,Alias> 10.9µs ± 0% 9.2µs ± 0% -15.82% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,NoArena,Copy> 29.4µs ± 0% 29.5µs ± 0% +0.61% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,UseArena,Copy> 20.7µs ± 2% 20.6µs ± 2% ~ (p=0.260 n=12+11)
Parse_Proto2<FileDesc,InitBlock,Copy> 16.7µs ± 1% 16.7µs ± 0% -0.25% (p=0.036 n=12+10)
Parse_Proto2<FileDescSV,InitBlock,Alias> 16.5µs ± 0% 16.5µs ± 0% +0.20% (p=0.016 n=12+11)
SerializeDescriptor_Proto2 5.30µs ± 1% 5.36µs ± 1% +1.09% (p=0.000 n=12+11)
SerializeDescriptor_Upb 12.9µs ± 0% 13.0µs ± 0% +0.90% (p=0.000 n=12+11)
FILE SIZE VM SIZE
-------------- --------------
+1.5% +176 +1.6% +176 upb/decode.c
+1.8% +176 +1.9% +176 decode_msg
+0.4% +64 +0.4% +64 upb/def.c
+1.4% +64 +1.4% +64 _upb_symtab_addfile
+1.2% +48 +1.4% +48 upb/reflection.c
+15% +32 +18% +32 upb_msg_set
+2.9% +16 +3.1% +16 upb_msg_mutable
-9.3% -288 [ = ] 0 [Unmapped]
[ = ] 0 +0.2% +288 TOTAL
```
4 years ago
|
|
|
return decode_readstr(d, ptr, val->size, mem);
|
|
|
|
case OP_SCALAR_LG2(3):
|
Optimized decoder and paved the way for parsing extensions.
The primary motivation for this change is to avoid referring to the
`upb_msglayout` object when we are trying to fetch the `upb_msglayout`
object for a sub-message. This will help pave the way for parsing
extensions. We also implement several optimizations so that we can
make this change without regressing performance.
Normally we compute the layout for a sub-message field like so:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *layout,
const upb_msglayout_field *field) {
return layout->submsgs[field->submsg_index]
}
```
The reason for this indirection is to avoid storing a pointer directly
in `upb_msglayout_field`, as this would double its size (from 12 to 24
bytes on 64-bit architectures) which is wasteful as this pointer is
only needed for message typed fields.
However `get_submsg_layout` as written above does not work for
extensions, as they will not have entries in the message's
`layout->submsgs` array by nature, and we want to avoid creating
an entire fake `upb_msglayout` for each such extension since that
would also be wasteful.
This change removes the dependency on `upb_msglayout` by passing down
the `submsgs` array instead:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *const *submsgs,
const upb_msglayout_field *field) {
return submsgs[field->submsg_index]
}
```
This will pave the way for parsing extensions, as we can more easily
create an alternative `submsgs` array for extension fields without
extra overhead or waste.
Along the way several optimizations presented themselves that allow
a nice increase in performance:
1. Passing the parsed `wireval` by address instead of by value ended
up avoiding an expensive and useless stack copy (this is on Clang,
which was used for all measurements).
2. When field numbers are densely packed, we can find a field by number
with a single indexed lookup instead of linear search. At codegen
time we can compute the maximum field number that will allow such
an indexed lookup.
3. For fields that do require linear search, we can start the linear
search at the location where we found the previous field, taking
advantage of the fact that field numbers are generally increasing.
4. When the hasbit index is less than 32 (the common case) we can use
a less expensive code sequence to set it.
5. We check for the hasbit case before the oneof case, as optional
fields are more common than oneof fields.
Benchmark results indicate a 20% improvement in parse speed with a
small code size increase:
```
name old time/op new time/op delta
ArenaOneAlloc 21.3ns ± 0% 21.5ns ± 0% +0.96% (p=0.000 n=12+12)
ArenaInitialBlockOneAlloc 6.32ns ± 0% 6.32ns ± 0% +0.03% (p=0.000 n=12+10)
LoadDescriptor_Upb 53.5µs ± 1% 51.5µs ± 2% -3.70% (p=0.000 n=12+12)
LoadAdsDescriptor_Upb 2.78ms ± 2% 2.68ms ± 0% -3.57% (p=0.000 n=12+12)
LoadDescriptor_Proto2 240µs ± 0% 240µs ± 0% +0.12% (p=0.001 n=12+12)
LoadAdsDescriptor_Proto2 12.8ms ± 0% 12.7ms ± 0% -1.15% (p=0.000 n=12+10)
Parse_Upb_FileDesc<UseArena,Copy> 13.2µs ± 2% 10.7µs ± 0% -18.49% (p=0.000 n=10+12)
Parse_Upb_FileDesc<UseArena,Alias> 11.3µs ± 0% 9.6µs ± 0% -15.11% (p=0.000 n=12+11)
Parse_Upb_FileDesc<InitBlock,Copy> 12.7µs ± 0% 10.3µs ± 0% -19.00% (p=0.000 n=10+12)
Parse_Upb_FileDesc<InitBlock,Alias> 10.9µs ± 0% 9.2µs ± 0% -15.82% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,NoArena,Copy> 29.4µs ± 0% 29.5µs ± 0% +0.61% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,UseArena,Copy> 20.7µs ± 2% 20.6µs ± 2% ~ (p=0.260 n=12+11)
Parse_Proto2<FileDesc,InitBlock,Copy> 16.7µs ± 1% 16.7µs ± 0% -0.25% (p=0.036 n=12+10)
Parse_Proto2<FileDescSV,InitBlock,Alias> 16.5µs ± 0% 16.5µs ± 0% +0.20% (p=0.016 n=12+11)
SerializeDescriptor_Proto2 5.30µs ± 1% 5.36µs ± 1% +1.09% (p=0.000 n=12+11)
SerializeDescriptor_Upb 12.9µs ± 0% 13.0µs ± 0% +0.90% (p=0.000 n=12+11)
FILE SIZE VM SIZE
-------------- --------------
+1.5% +176 +1.6% +176 upb/decode.c
+1.8% +176 +1.9% +176 decode_msg
+0.4% +64 +0.4% +64 upb/def.c
+1.4% +64 +1.4% +64 _upb_symtab_addfile
+1.2% +48 +1.4% +48 upb/reflection.c
+15% +32 +18% +32 upb_msg_set
+2.9% +16 +3.1% +16 upb_msg_mutable
-9.3% -288 [ = ] 0 [Unmapped]
[ = ] 0 +0.2% +288 TOTAL
```
4 years ago
|
|
|
memcpy(mem, val, 8);
|
|
|
|
break;
|
|
|
|
case OP_ENUM:
|
|
|
|
case OP_SCALAR_LG2(2):
|
Optimized decoder and paved the way for parsing extensions.
The primary motivation for this change is to avoid referring to the
`upb_msglayout` object when we are trying to fetch the `upb_msglayout`
object for a sub-message. This will help pave the way for parsing
extensions. We also implement several optimizations so that we can
make this change without regressing performance.
Normally we compute the layout for a sub-message field like so:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *layout,
const upb_msglayout_field *field) {
return layout->submsgs[field->submsg_index]
}
```
The reason for this indirection is to avoid storing a pointer directly
in `upb_msglayout_field`, as this would double its size (from 12 to 24
bytes on 64-bit architectures) which is wasteful as this pointer is
only needed for message typed fields.
However `get_submsg_layout` as written above does not work for
extensions, as they will not have entries in the message's
`layout->submsgs` array by nature, and we want to avoid creating
an entire fake `upb_msglayout` for each such extension since that
would also be wasteful.
This change removes the dependency on `upb_msglayout` by passing down
the `submsgs` array instead:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *const *submsgs,
const upb_msglayout_field *field) {
return submsgs[field->submsg_index]
}
```
This will pave the way for parsing extensions, as we can more easily
create an alternative `submsgs` array for extension fields without
extra overhead or waste.
Along the way several optimizations presented themselves that allow
a nice increase in performance:
1. Passing the parsed `wireval` by address instead of by value ended
up avoiding an expensive and useless stack copy (this is on Clang,
which was used for all measurements).
2. When field numbers are densely packed, we can find a field by number
with a single indexed lookup instead of linear search. At codegen
time we can compute the maximum field number that will allow such
an indexed lookup.
3. For fields that do require linear search, we can start the linear
search at the location where we found the previous field, taking
advantage of the fact that field numbers are generally increasing.
4. When the hasbit index is less than 32 (the common case) we can use
a less expensive code sequence to set it.
5. We check for the hasbit case before the oneof case, as optional
fields are more common than oneof fields.
Benchmark results indicate a 20% improvement in parse speed with a
small code size increase:
```
name old time/op new time/op delta
ArenaOneAlloc 21.3ns ± 0% 21.5ns ± 0% +0.96% (p=0.000 n=12+12)
ArenaInitialBlockOneAlloc 6.32ns ± 0% 6.32ns ± 0% +0.03% (p=0.000 n=12+10)
LoadDescriptor_Upb 53.5µs ± 1% 51.5µs ± 2% -3.70% (p=0.000 n=12+12)
LoadAdsDescriptor_Upb 2.78ms ± 2% 2.68ms ± 0% -3.57% (p=0.000 n=12+12)
LoadDescriptor_Proto2 240µs ± 0% 240µs ± 0% +0.12% (p=0.001 n=12+12)
LoadAdsDescriptor_Proto2 12.8ms ± 0% 12.7ms ± 0% -1.15% (p=0.000 n=12+10)
Parse_Upb_FileDesc<UseArena,Copy> 13.2µs ± 2% 10.7µs ± 0% -18.49% (p=0.000 n=10+12)
Parse_Upb_FileDesc<UseArena,Alias> 11.3µs ± 0% 9.6µs ± 0% -15.11% (p=0.000 n=12+11)
Parse_Upb_FileDesc<InitBlock,Copy> 12.7µs ± 0% 10.3µs ± 0% -19.00% (p=0.000 n=10+12)
Parse_Upb_FileDesc<InitBlock,Alias> 10.9µs ± 0% 9.2µs ± 0% -15.82% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,NoArena,Copy> 29.4µs ± 0% 29.5µs ± 0% +0.61% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,UseArena,Copy> 20.7µs ± 2% 20.6µs ± 2% ~ (p=0.260 n=12+11)
Parse_Proto2<FileDesc,InitBlock,Copy> 16.7µs ± 1% 16.7µs ± 0% -0.25% (p=0.036 n=12+10)
Parse_Proto2<FileDescSV,InitBlock,Alias> 16.5µs ± 0% 16.5µs ± 0% +0.20% (p=0.016 n=12+11)
SerializeDescriptor_Proto2 5.30µs ± 1% 5.36µs ± 1% +1.09% (p=0.000 n=12+11)
SerializeDescriptor_Upb 12.9µs ± 0% 13.0µs ± 0% +0.90% (p=0.000 n=12+11)
FILE SIZE VM SIZE
-------------- --------------
+1.5% +176 +1.6% +176 upb/decode.c
+1.8% +176 +1.9% +176 decode_msg
+0.4% +64 +0.4% +64 upb/def.c
+1.4% +64 +1.4% +64 _upb_symtab_addfile
+1.2% +48 +1.4% +48 upb/reflection.c
+15% +32 +18% +32 upb_msg_set
+2.9% +16 +3.1% +16 upb_msg_mutable
-9.3% -288 [ = ] 0 [Unmapped]
[ = ] 0 +0.2% +288 TOTAL
```
4 years ago
|
|
|
memcpy(mem, val, 4);
|
|
|
|
break;
|
|
|
|
case OP_SCALAR_LG2(0):
|
Optimized decoder and paved the way for parsing extensions.
The primary motivation for this change is to avoid referring to the
`upb_msglayout` object when we are trying to fetch the `upb_msglayout`
object for a sub-message. This will help pave the way for parsing
extensions. We also implement several optimizations so that we can
make this change without regressing performance.
Normally we compute the layout for a sub-message field like so:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *layout,
const upb_msglayout_field *field) {
return layout->submsgs[field->submsg_index]
}
```
The reason for this indirection is to avoid storing a pointer directly
in `upb_msglayout_field`, as this would double its size (from 12 to 24
bytes on 64-bit architectures) which is wasteful as this pointer is
only needed for message typed fields.
However `get_submsg_layout` as written above does not work for
extensions, as they will not have entries in the message's
`layout->submsgs` array by nature, and we want to avoid creating
an entire fake `upb_msglayout` for each such extension since that
would also be wasteful.
This change removes the dependency on `upb_msglayout` by passing down
the `submsgs` array instead:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *const *submsgs,
const upb_msglayout_field *field) {
return submsgs[field->submsg_index]
}
```
This will pave the way for parsing extensions, as we can more easily
create an alternative `submsgs` array for extension fields without
extra overhead or waste.
Along the way several optimizations presented themselves that allow
a nice increase in performance:
1. Passing the parsed `wireval` by address instead of by value ended
up avoiding an expensive and useless stack copy (this is on Clang,
which was used for all measurements).
2. When field numbers are densely packed, we can find a field by number
with a single indexed lookup instead of linear search. At codegen
time we can compute the maximum field number that will allow such
an indexed lookup.
3. For fields that do require linear search, we can start the linear
search at the location where we found the previous field, taking
advantage of the fact that field numbers are generally increasing.
4. When the hasbit index is less than 32 (the common case) we can use
a less expensive code sequence to set it.
5. We check for the hasbit case before the oneof case, as optional
fields are more common than oneof fields.
Benchmark results indicate a 20% improvement in parse speed with a
small code size increase:
```
name old time/op new time/op delta
ArenaOneAlloc 21.3ns ± 0% 21.5ns ± 0% +0.96% (p=0.000 n=12+12)
ArenaInitialBlockOneAlloc 6.32ns ± 0% 6.32ns ± 0% +0.03% (p=0.000 n=12+10)
LoadDescriptor_Upb 53.5µs ± 1% 51.5µs ± 2% -3.70% (p=0.000 n=12+12)
LoadAdsDescriptor_Upb 2.78ms ± 2% 2.68ms ± 0% -3.57% (p=0.000 n=12+12)
LoadDescriptor_Proto2 240µs ± 0% 240µs ± 0% +0.12% (p=0.001 n=12+12)
LoadAdsDescriptor_Proto2 12.8ms ± 0% 12.7ms ± 0% -1.15% (p=0.000 n=12+10)
Parse_Upb_FileDesc<UseArena,Copy> 13.2µs ± 2% 10.7µs ± 0% -18.49% (p=0.000 n=10+12)
Parse_Upb_FileDesc<UseArena,Alias> 11.3µs ± 0% 9.6µs ± 0% -15.11% (p=0.000 n=12+11)
Parse_Upb_FileDesc<InitBlock,Copy> 12.7µs ± 0% 10.3µs ± 0% -19.00% (p=0.000 n=10+12)
Parse_Upb_FileDesc<InitBlock,Alias> 10.9µs ± 0% 9.2µs ± 0% -15.82% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,NoArena,Copy> 29.4µs ± 0% 29.5µs ± 0% +0.61% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,UseArena,Copy> 20.7µs ± 2% 20.6µs ± 2% ~ (p=0.260 n=12+11)
Parse_Proto2<FileDesc,InitBlock,Copy> 16.7µs ± 1% 16.7µs ± 0% -0.25% (p=0.036 n=12+10)
Parse_Proto2<FileDescSV,InitBlock,Alias> 16.5µs ± 0% 16.5µs ± 0% +0.20% (p=0.016 n=12+11)
SerializeDescriptor_Proto2 5.30µs ± 1% 5.36µs ± 1% +1.09% (p=0.000 n=12+11)
SerializeDescriptor_Upb 12.9µs ± 0% 13.0µs ± 0% +0.90% (p=0.000 n=12+11)
FILE SIZE VM SIZE
-------------- --------------
+1.5% +176 +1.6% +176 upb/decode.c
+1.8% +176 +1.9% +176 decode_msg
+0.4% +64 +0.4% +64 upb/def.c
+1.4% +64 +1.4% +64 _upb_symtab_addfile
+1.2% +48 +1.4% +48 upb/reflection.c
+15% +32 +18% +32 upb_msg_set
+2.9% +16 +3.1% +16 upb_msg_mutable
-9.3% -288 [ = ] 0 [Unmapped]
[ = ] 0 +0.2% +288 TOTAL
```
4 years ago
|
|
|
memcpy(mem, val, 1);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
UPB_UNREACHABLE();
|
|
|
|
}
|
|
|
|
|
|
|
|
return ptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Computes a bitmask in which the |n| lowest bits are set, except that we
|
|
|
|
// skip the lowest bit (because upb never uses hasbit 0).
|
|
|
|
//
|
|
|
|
// Sample output:
|
|
|
|
// decode_requiredmask(1) => 0b10 (0x2)
|
|
|
|
// decode_requiredmask(5) => 0b111110 (0x3e)
|
|
|
|
uint64_t decode_requiredmask(int n) {
|
|
|
|
assert(0 < n && n < 63);
|
|
|
|
return ((1 << n) - 1) << 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
UPB_NOINLINE
|
|
|
|
const char *decode_checkrequired(upb_decstate *d, const char *ptr,
|
|
|
|
const upb_msg *msg, const upb_msglayout *l) {
|
|
|
|
assert(l->required_count);
|
|
|
|
if (UPB_LIKELY((d->options & kUpb_DecodeOption_CheckRequired) == 0)) {
|
|
|
|
return ptr;
|
|
|
|
}
|
|
|
|
uint64_t msg_head;
|
|
|
|
memcpy(&msg_head, msg, 8);
|
|
|
|
msg_head = _upb_be_swap64(msg_head);
|
|
|
|
if (decode_requiredmask(l->required_count) & ~msg_head) {
|
|
|
|
d->missing_required = true;
|
|
|
|
}
|
|
|
|
return ptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
UPB_FORCEINLINE
|
|
|
|
static bool decode_tryfastdispatch(upb_decstate *d, const char **ptr,
|
|
|
|
upb_msg *msg, const upb_msglayout *layout) {
|
|
|
|
#if UPB_FASTTABLE
|
|
|
|
if (layout && layout->table_mask != (unsigned char)-1) {
|
|
|
|
uint16_t tag = fastdecode_loadtag(*ptr);
|
|
|
|
intptr_t table = decode_totable(layout);
|
|
|
|
*ptr = fastdecode_tagdispatch(d, *ptr, msg, table, 0, tag);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char *decode_msgset(upb_decstate *d, const char *ptr, upb_msg *msg,
|
|
|
|
const upb_msglayout *layout) {
|
|
|
|
// We create a temporary upb_msglayout here and abuse its fields as temporary
|
|
|
|
// storage, to avoid creating lots of MessageSet-specific parsing code-paths:
|
|
|
|
// 1. We store 'layout' in item_layout.subs. We will need this later as
|
|
|
|
// a key to look up extensions for this MessageSet.
|
|
|
|
// 2. We use item_layout.fields as temporary storage to store the extension we
|
|
|
|
// found when parsing the type id.
|
|
|
|
upb_msglayout item_layout = {
|
|
|
|
.subs = (const upb_msglayout_sub[]){{.submsg = layout}},
|
|
|
|
.fields = NULL,
|
|
|
|
.size = 0,
|
|
|
|
.field_count = 0,
|
|
|
|
.ext = _UPB_MSGEXT_MSGSET_ITEM,
|
|
|
|
.dense_below = 0,
|
|
|
|
.table_mask = -1};
|
|
|
|
return decode_group(d, ptr, msg, &item_layout, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const upb_msglayout_field *decode_findfield(upb_decstate *d,
|
|
|
|
const upb_msglayout *l,
|
|
|
|
uint32_t field_number,
|
|
|
|
int *last_field_index) {
|
|
|
|
static upb_msglayout_field none = {0, 0, 0, 0, 0, 0};
|
|
|
|
if (l == NULL) return &none;
|
|
|
|
|
|
|
|
size_t idx = ((size_t)field_number) - 1; // 0 wraps to SIZE_MAX
|
|
|
|
if (idx < l->dense_below) {
|
|
|
|
/* Fastest case: index into dense fields. */
|
|
|
|
goto found;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (l->dense_below < l->field_count) {
|
|
|
|
/* Linear search non-dense fields. Resume scanning from last_field_index
|
|
|
|
* since fields are usually in order. */
|
|
|
|
int last = *last_field_index;
|
|
|
|
for (idx = last; idx < l->field_count; idx++) {
|
|
|
|
if (l->fields[idx].number == field_number) {
|
|
|
|
goto found;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (idx = l->dense_below; idx < last; idx++) {
|
|
|
|
if (l->fields[idx].number == field_number) {
|
|
|
|
goto found;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (d->extreg) {
|
|
|
|
switch (l->ext) {
|
|
|
|
case _UPB_MSGEXT_EXTENDABLE: {
|
|
|
|
const upb_msglayout_ext *ext =
|
|
|
|
_upb_extreg_get(d->extreg, l, field_number);
|
|
|
|
if (ext) return &ext->field;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case _UPB_MSGEXT_MSGSET:
|
|
|
|
if (field_number == _UPB_MSGSET_ITEM) {
|
|
|
|
static upb_msglayout_field item = {0, 0, 0, 0, TYPE_MSGSET_ITEM, 0};
|
|
|
|
return &item;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case _UPB_MSGEXT_MSGSET_ITEM:
|
|
|
|
switch (field_number) {
|
|
|
|
case _UPB_MSGSET_TYPEID: {
|
|
|
|
static upb_msglayout_field type_id = {
|
|
|
|
0, 0, 0, 0, TYPE_MSGSET_TYPE_ID, 0};
|
|
|
|
return &type_id;
|
|
|
|
}
|
|
|
|
case _UPB_MSGSET_MESSAGE:
|
|
|
|
if (l->fields) {
|
|
|
|
// We saw type_id previously and succeeded in looking up msg.
|
|
|
|
return l->fields;
|
|
|
|
} else {
|
|
|
|
// TODO: out of order MessageSet.
|
|
|
|
// This is a very rare case: all serializers will emit in-order
|
|
|
|
// MessageSets. To hit this case there has to be some kind of
|
|
|
|
// re-ordering proxy. We should eventually handle this case, but
|
|
|
|
// not today.
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return &none; /* Unknown field. */
|
|
|
|
|
|
|
|
found:
|
|
|
|
UPB_ASSERT(l->fields[idx].number == field_number);
|
|
|
|
*last_field_index = idx;
|
|
|
|
return &l->fields[idx];
|
|
|
|
}
|
|
|
|
|
|
|
|
UPB_FORCEINLINE
|
|
|
|
static const char *decode_wireval(upb_decstate *d, const char *ptr,
|
|
|
|
const upb_msglayout_field *field,
|
|
|
|
int wire_type, wireval *val, int *op) {
|
|
|
|
switch (wire_type) {
|
|
|
|
case UPB_WIRE_TYPE_VARINT:
|
|
|
|
ptr = decode_varint64(d, ptr, &val->uint64_val);
|
|
|
|
*op = varint_ops[field->descriptortype];
|
|
|
|
decode_munge(field->descriptortype, val);
|
|
|
|
return ptr;
|
|
|
|
case UPB_WIRE_TYPE_32BIT:
|
|
|
|
memcpy(&val->uint32_val, ptr, 4);
|
|
|
|
val->uint32_val = _upb_be_swap32(val->uint32_val);
|
|
|
|
*op = OP_SCALAR_LG2(2);
|
|
|
|
if (((1 << field->descriptortype) & FIXED32_OK_MASK) == 0) {
|
|
|
|
*op = OP_UNKNOWN;
|
|
|
|
}
|
|
|
|
return ptr + 4;
|
|
|
|
case UPB_WIRE_TYPE_64BIT:
|
|
|
|
memcpy(&val->uint64_val, ptr, 8);
|
|
|
|
val->uint64_val = _upb_be_swap64(val->uint64_val);
|
|
|
|
*op = OP_SCALAR_LG2(3);
|
|
|
|
if (((1 << field->descriptortype) & FIXED64_OK_MASK) == 0) {
|
|
|
|
*op = OP_UNKNOWN;
|
|
|
|
}
|
|
|
|
return ptr + 8;
|
|
|
|
case UPB_WIRE_TYPE_DELIMITED: {
|
|
|
|
int ndx = field->descriptortype;
|
|
|
|
uint64_t size;
|
|
|
|
if (_upb_getmode(field) == _UPB_MODE_ARRAY) ndx += TYPE_COUNT;
|
|
|
|
ptr = decode_varint64(d, ptr, &size);
|
|
|
|
if (size >= INT32_MAX || ptr - d->end + (int32_t)size > d->limit) {
|
|
|
|
break; /* Length overflow. */
|
|
|
|
}
|
|
|
|
*op = delim_ops[ndx];
|
|
|
|
val->size = size;
|
|
|
|
return ptr;
|
|
|
|
}
|
|
|
|
case UPB_WIRE_TYPE_START_GROUP:
|
|
|
|
val->uint32_val = field->number;
|
|
|
|
if (field->descriptortype == UPB_DTYPE_GROUP) {
|
|
|
|
*op = OP_SUBMSG;
|
|
|
|
} else if (field->descriptortype == TYPE_MSGSET_ITEM) {
|
|
|
|
*op = OP_MSGSET_ITEM;
|
|
|
|
} else {
|
|
|
|
*op = OP_UNKNOWN;
|
|
|
|
}
|
|
|
|
return ptr;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return decode_err(d, kUpb_DecodeStatus_Malformed);
|
|
|
|
}
|
|
|
|
|
|
|
|
UPB_FORCEINLINE
|
|
|
|
static const char *decode_known(upb_decstate *d, const char *ptr, upb_msg *msg,
|
|
|
|
const upb_msglayout *layout,
|
|
|
|
const upb_msglayout_field *field, int op,
|
|
|
|
wireval *val) {
|
|
|
|
const upb_msglayout_sub *subs = layout->subs;
|
|
|
|
uint8_t mode = field->mode;
|
|
|
|
|
|
|
|
if (UPB_UNLIKELY(mode & _UPB_MODE_IS_EXTENSION)) {
|
|
|
|
const upb_msglayout_ext *ext_layout = (const upb_msglayout_ext*)field;
|
|
|
|
upb_msg_ext *ext = _upb_msg_getorcreateext(msg, ext_layout, &d->arena);
|
|
|
|
if (UPB_UNLIKELY(!ext)) return decode_err(d, kUpb_DecodeStatus_OutOfMemory);
|
|
|
|
msg = &ext->data;
|
|
|
|
subs = &ext->ext->sub;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (mode & _UPB_MODE_MASK) {
|
|
|
|
case _UPB_MODE_ARRAY:
|
|
|
|
return decode_toarray(d, ptr, msg, subs, field, val, op);
|
|
|
|
case _UPB_MODE_MAP:
|
|
|
|
return decode_tomap(d, ptr, msg, subs, field, val);
|
|
|
|
case _UPB_MODE_SCALAR:
|
|
|
|
return decode_tomsg(d, ptr, msg, subs, field, val, op);
|
|
|
|
default:
|
|
|
|
UPB_UNREACHABLE();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char *decode_reverse_skip_varint(const char *ptr, uint32_t val) {
|
|
|
|
uint32_t seen = 0;
|
|
|
|
do {
|
|
|
|
ptr--;
|
|
|
|
seen <<= 7;
|
|
|
|
seen |= *ptr & 0x7f;
|
|
|
|
} while (seen != val);
|
|
|
|
return ptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char *decode_unknown(upb_decstate *d, const char *ptr,
|
|
|
|
upb_msg *msg, int field_number, int wire_type,
|
|
|
|
wireval val) {
|
|
|
|
if (field_number == 0) return decode_err(d, kUpb_DecodeStatus_Malformed);
|
|
|
|
|
|
|
|
// Since unknown fields are the uncommon case, we do a little extra work here
|
|
|
|
// to walk backwards through the buffer to find the field start. This frees
|
|
|
|
// up a register in the fast paths (when the field is known), which leads to
|
|
|
|
// significant speedups in benchmarks.
|
|
|
|
const char *start = ptr;
|
|
|
|
|
|
|
|
if (wire_type == UPB_WIRE_TYPE_DELIMITED) ptr += val.size;
|
|
|
|
if (msg) {
|
|
|
|
switch (wire_type) {
|
|
|
|
case UPB_WIRE_TYPE_VARINT:
|
|
|
|
case UPB_WIRE_TYPE_DELIMITED:
|
|
|
|
start--;
|
|
|
|
while (start[-1] & 0x80) start--;
|
|
|
|
break;
|
|
|
|
case UPB_WIRE_TYPE_32BIT:
|
|
|
|
start -= 4;
|
|
|
|
break;
|
|
|
|
case UPB_WIRE_TYPE_64BIT:
|
|
|
|
start -= 8;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(start == d->debug_valstart);
|
|
|
|
uint32_t tag = ((uint32_t)field_number << 3) | wire_type;
|
|
|
|
start = decode_reverse_skip_varint(start, tag);
|
|
|
|
assert(start == d->debug_tagstart);
|
|
|
|
|
|
|
|
if (wire_type == UPB_WIRE_TYPE_START_GROUP) {
|
|
|
|
d->unknown = start;
|
|
|
|
d->unknown_msg = msg;
|
|
|
|
ptr = decode_group(d, ptr, NULL, NULL, field_number);
|
|
|
|
start = d->unknown;
|
|
|
|
d->unknown_msg = NULL;
|
|
|
|
d->unknown = NULL;
|
|
|
|
}
|
|
|
|
if (!_upb_msg_addunknown(msg, start, ptr - start, &d->arena)) {
|
|
|
|
return decode_err(d, kUpb_DecodeStatus_OutOfMemory);
|
|
|
|
}
|
|
|
|
} else if (wire_type == UPB_WIRE_TYPE_START_GROUP) {
|
|
|
|
ptr = decode_group(d, ptr, NULL, NULL, field_number);
|
|
|
|
}
|
|
|
|
return ptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
UPB_NOINLINE
|
|
|
|
static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
|
|
|
|
const upb_msglayout *layout) {
|
Optimized decoder and paved the way for parsing extensions.
The primary motivation for this change is to avoid referring to the
`upb_msglayout` object when we are trying to fetch the `upb_msglayout`
object for a sub-message. This will help pave the way for parsing
extensions. We also implement several optimizations so that we can
make this change without regressing performance.
Normally we compute the layout for a sub-message field like so:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *layout,
const upb_msglayout_field *field) {
return layout->submsgs[field->submsg_index]
}
```
The reason for this indirection is to avoid storing a pointer directly
in `upb_msglayout_field`, as this would double its size (from 12 to 24
bytes on 64-bit architectures) which is wasteful as this pointer is
only needed for message typed fields.
However `get_submsg_layout` as written above does not work for
extensions, as they will not have entries in the message's
`layout->submsgs` array by nature, and we want to avoid creating
an entire fake `upb_msglayout` for each such extension since that
would also be wasteful.
This change removes the dependency on `upb_msglayout` by passing down
the `submsgs` array instead:
```
const upb_msglayout *get_submsg_layout(
const upb_msglayout *const *submsgs,
const upb_msglayout_field *field) {
return submsgs[field->submsg_index]
}
```
This will pave the way for parsing extensions, as we can more easily
create an alternative `submsgs` array for extension fields without
extra overhead or waste.
Along the way several optimizations presented themselves that allow
a nice increase in performance:
1. Passing the parsed `wireval` by address instead of by value ended
up avoiding an expensive and useless stack copy (this is on Clang,
which was used for all measurements).
2. When field numbers are densely packed, we can find a field by number
with a single indexed lookup instead of linear search. At codegen
time we can compute the maximum field number that will allow such
an indexed lookup.
3. For fields that do require linear search, we can start the linear
search at the location where we found the previous field, taking
advantage of the fact that field numbers are generally increasing.
4. When the hasbit index is less than 32 (the common case) we can use
a less expensive code sequence to set it.
5. We check for the hasbit case before the oneof case, as optional
fields are more common than oneof fields.
Benchmark results indicate a 20% improvement in parse speed with a
small code size increase:
```
name old time/op new time/op delta
ArenaOneAlloc 21.3ns ± 0% 21.5ns ± 0% +0.96% (p=0.000 n=12+12)
ArenaInitialBlockOneAlloc 6.32ns ± 0% 6.32ns ± 0% +0.03% (p=0.000 n=12+10)
LoadDescriptor_Upb 53.5µs ± 1% 51.5µs ± 2% -3.70% (p=0.000 n=12+12)
LoadAdsDescriptor_Upb 2.78ms ± 2% 2.68ms ± 0% -3.57% (p=0.000 n=12+12)
LoadDescriptor_Proto2 240µs ± 0% 240µs ± 0% +0.12% (p=0.001 n=12+12)
LoadAdsDescriptor_Proto2 12.8ms ± 0% 12.7ms ± 0% -1.15% (p=0.000 n=12+10)
Parse_Upb_FileDesc<UseArena,Copy> 13.2µs ± 2% 10.7µs ± 0% -18.49% (p=0.000 n=10+12)
Parse_Upb_FileDesc<UseArena,Alias> 11.3µs ± 0% 9.6µs ± 0% -15.11% (p=0.000 n=12+11)
Parse_Upb_FileDesc<InitBlock,Copy> 12.7µs ± 0% 10.3µs ± 0% -19.00% (p=0.000 n=10+12)
Parse_Upb_FileDesc<InitBlock,Alias> 10.9µs ± 0% 9.2µs ± 0% -15.82% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,NoArena,Copy> 29.4µs ± 0% 29.5µs ± 0% +0.61% (p=0.000 n=12+12)
Parse_Proto2<FileDesc,UseArena,Copy> 20.7µs ± 2% 20.6µs ± 2% ~ (p=0.260 n=12+11)
Parse_Proto2<FileDesc,InitBlock,Copy> 16.7µs ± 1% 16.7µs ± 0% -0.25% (p=0.036 n=12+10)
Parse_Proto2<FileDescSV,InitBlock,Alias> 16.5µs ± 0% 16.5µs ± 0% +0.20% (p=0.016 n=12+11)
SerializeDescriptor_Proto2 5.30µs ± 1% 5.36µs ± 1% +1.09% (p=0.000 n=12+11)
SerializeDescriptor_Upb 12.9µs ± 0% 13.0µs ± 0% +0.90% (p=0.000 n=12+11)
FILE SIZE VM SIZE
-------------- --------------
+1.5% +176 +1.6% +176 upb/decode.c
+1.8% +176 +1.9% +176 decode_msg
+0.4% +64 +0.4% +64 upb/def.c
+1.4% +64 +1.4% +64 _upb_symtab_addfile
+1.2% +48 +1.4% +48 upb/reflection.c
+15% +32 +18% +32 upb_msg_set
+2.9% +16 +3.1% +16 upb_msg_mutable
-9.3% -288 [ = ] 0 [Unmapped]
[ = ] 0 +0.2% +288 TOTAL
```
4 years ago
|
|
|
int last_field_index = 0;
|
|
|
|
|
|
|
|
#if UPB_FASTTABLE
|
|
|
|
// The first time we want to skip fast dispatch, because we may have just been
|
|
|
|
// invoked by the fast parser to handle a case that it bailed on.
|
|
|
|
if (!decode_isdone(d, &ptr)) goto nofast;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
while (!decode_isdone(d, &ptr)) {
|
|
|
|
uint32_t tag;
|
|
|
|
const upb_msglayout_field *field;
|
|
|
|
int field_number;
|
|
|
|
int wire_type;
|
|
|
|
wireval val;
|
|
|
|
int op;
|
|
|
|
|
|
|
|
if (decode_tryfastdispatch(d, &ptr, msg, layout)) break;
|
|
|
|
|
|
|
|
#if UPB_FASTTABLE
|
|
|
|
nofast:
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef NDEBUG
|
|
|
|
d->debug_tagstart = ptr;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
UPB_ASSERT(ptr < d->limit_ptr);
|
|
|
|
ptr = decode_tag(d, ptr, &tag);
|
|
|
|
field_number = tag >> 3;
|
|
|
|
wire_type = tag & 7;
|
|
|
|
|
|
|
|
#ifndef NDEBUG
|
|
|
|
d->debug_valstart = ptr;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if (wire_type == UPB_WIRE_TYPE_END_GROUP) {
|
|
|
|
d->end_group = field_number;
|
|
|
|
return ptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
field = decode_findfield(d, layout, field_number, &last_field_index);
|
|
|
|
ptr = decode_wireval(d, ptr, field, wire_type, &val, &op);
|
|
|
|
|
|
|
|
if (op >= 0) {
|
|
|
|
ptr = decode_known(d, ptr, msg, layout, field, op, &val);
|
|
|
|
} else {
|
|
|
|
switch (op) {
|
|
|
|
case OP_UNKNOWN:
|
|
|
|
ptr = decode_unknown(d, ptr, msg, field_number, wire_type, val);
|
|
|
|
break;
|
|
|
|
case OP_MSGSET_ITEM:
|
|
|
|
ptr = decode_msgset(d, ptr, msg, layout);
|
|
|
|
break;
|
|
|
|
case OP_MSGSET_TYPEID: {
|
|
|
|
const upb_msglayout_ext *ext = _upb_extreg_get(
|
|
|
|
d->extreg, layout->subs[0].submsg, val.uint64_val);
|
|
|
|
if (ext) ((upb_msglayout *)layout)->fields = &ext->field;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return UPB_UNLIKELY(layout && layout->required_count)
|
|
|
|
? decode_checkrequired(d, ptr, msg, layout)
|
|
|
|
: ptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
const char *fastdecode_generic(struct upb_decstate *d, const char *ptr,
|
|
|
|
upb_msg *msg, intptr_t table, uint64_t hasbits,
|
|
|
|
uint64_t data) {
|
|
|
|
(void)data;
|
|
|
|
*(uint32_t*)msg |= hasbits;
|
|
|
|
return decode_msg(d, ptr, msg, decode_totablep(table));
|
|
|
|
}
|
|
|
|
|
|
|
|
static upb_DecodeStatus decode_top(struct upb_decstate *d, const char *buf,
|
|
|
|
void *msg, const upb_msglayout *l) {
|
|
|
|
if (!decode_tryfastdispatch(d, &buf, msg, l)) {
|
|
|
|
decode_msg(d, buf, msg, l);
|
|
|
|
}
|
|
|
|
if (d->end_group != DECODE_NOGROUP) return kUpb_DecodeStatus_Malformed;
|
|
|
|
if (d->missing_required) return kUpb_DecodeStatus_MissingRequired;
|
|
|
|
return kUpb_DecodeStatus_Ok;
|
|
|
|
}
|
|
|
|
|
|
|
|
upb_DecodeStatus _upb_decode(const char *buf, size_t size, void *msg,
|
|
|
|
const upb_msglayout *l, const upb_extreg *extreg,
|
|
|
|
int options, upb_arena *arena) {
|
|
|
|
upb_decstate state;
|
Added API for copy vs. alias and added benchmarks to test both.
Benchmark output:
$ bazel-bin/benchmarks/benchmark '--benchmark_filter=BM_Parse'
2020-11-11 15:39:04
Running bazel-bin/benchmarks/benchmark
Run on (72 X 3700 MHz CPU s)
CPU Caches:
L1 Data 32K (x36)
L1 Instruction 32K (x36)
L2 Unified 1024K (x36)
L3 Unified 25344K (x2)
-------------------------------------------------------------------------------------
Benchmark Time CPU Iterations
-------------------------------------------------------------------------------------
BM_Parse_Upb_FileDesc<UseArena, Copy> 4134 ns 4134 ns 168714 1.69152GB/s
BM_Parse_Upb_FileDesc<UseArena, Alias> 3487 ns 3487 ns 199509 2.00526GB/s
BM_Parse_Upb_FileDesc<InitBlock, Copy> 3727 ns 3726 ns 187581 1.87643GB/s
BM_Parse_Upb_FileDesc<InitBlock, Alias> 3110 ns 3110 ns 224970 2.24866GB/s
BM_Parse_Proto2<FileDesc, NoArena, Copy> 31132 ns 31132 ns 22437 229.995MB/s
BM_Parse_Proto2<FileDesc, UseArena, Copy> 21011 ns 21009 ns 33922 340.812MB/s
BM_Parse_Proto2<FileDesc, InitBlock, Copy> 17976 ns 17975 ns 38808 398.337MB/s
BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 17357 ns 17356 ns 40244 412.539MB/s
4 years ago
|
|
|
unsigned depth = (unsigned)options >> 16;
|
|
|
|
|
|
|
|
if (size <= 16) {
|
|
|
|
memset(&state.patch, 0, 32);
|
|
|
|
memcpy(&state.patch, buf, size);
|
|
|
|
buf = state.patch;
|
|
|
|
state.end = buf + size;
|
|
|
|
state.limit = 0;
|
|
|
|
options &= ~kUpb_DecodeOption_AliasString; // Can't alias patch buf.
|
|
|
|
} else {
|
|
|
|
state.end = buf + size - 16;
|
|
|
|
state.limit = 16;
|
|
|
|
}
|
|
|
|
|
|
|
|
state.extreg = extreg;
|
|
|
|
state.limit_ptr = state.end;
|
|
|
|
state.unknown_msg = NULL;
|
Added API for copy vs. alias and added benchmarks to test both.
Benchmark output:
$ bazel-bin/benchmarks/benchmark '--benchmark_filter=BM_Parse'
2020-11-11 15:39:04
Running bazel-bin/benchmarks/benchmark
Run on (72 X 3700 MHz CPU s)
CPU Caches:
L1 Data 32K (x36)
L1 Instruction 32K (x36)
L2 Unified 1024K (x36)
L3 Unified 25344K (x2)
-------------------------------------------------------------------------------------
Benchmark Time CPU Iterations
-------------------------------------------------------------------------------------
BM_Parse_Upb_FileDesc<UseArena, Copy> 4134 ns 4134 ns 168714 1.69152GB/s
BM_Parse_Upb_FileDesc<UseArena, Alias> 3487 ns 3487 ns 199509 2.00526GB/s
BM_Parse_Upb_FileDesc<InitBlock, Copy> 3727 ns 3726 ns 187581 1.87643GB/s
BM_Parse_Upb_FileDesc<InitBlock, Alias> 3110 ns 3110 ns 224970 2.24866GB/s
BM_Parse_Proto2<FileDesc, NoArena, Copy> 31132 ns 31132 ns 22437 229.995MB/s
BM_Parse_Proto2<FileDesc, UseArena, Copy> 21011 ns 21009 ns 33922 340.812MB/s
BM_Parse_Proto2<FileDesc, InitBlock, Copy> 17976 ns 17975 ns 38808 398.337MB/s
BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 17357 ns 17356 ns 40244 412.539MB/s
4 years ago
|
|
|
state.depth = depth ? depth : 64;
|
|
|
|
state.end_group = DECODE_NOGROUP;
|
|
|
|
state.options = (uint16_t)options;
|
|
|
|
state.missing_required = false;
|
|
|
|
state.arena.head = arena->head;
|
|
|
|
state.arena.last_size = arena->last_size;
|
|
|
|
state.arena.cleanup_metadata = arena->cleanup_metadata;
|
|
|
|
state.arena.parent = arena;
|
|
|
|
|
|
|
|
upb_DecodeStatus status = UPB_SETJMP(state.err);
|
|
|
|
if (UPB_LIKELY(status == kUpb_DecodeStatus_Ok)) {
|
|
|
|
status = decode_top(&state, buf, msg, l);
|
|
|
|
}
|
|
|
|
|
|
|
|
arena->head.ptr = state.arena.head.ptr;
|
|
|
|
arena->head.end = state.arena.head.end;
|
|
|
|
arena->cleanup_metadata = state.arena.cleanup_metadata;
|
|
|
|
return status;
|
|
|
|
}
|
|
|
|
|
|
|
|
#undef OP_UNKNOWN
|
|
|
|
#undef OP_SKIP
|
|
|
|
#undef OP_SCALAR_LG2
|
|
|
|
#undef OP_FIXPCK_LG2
|
|
|
|
#undef OP_VARPCK_LG2
|
|
|
|
#undef OP_STRING
|
|
|
|
#undef OP_BYTES
|
|
|
|
#undef OP_SUBMSG
|