Protocol Buffers - Google's data interchange format (grpc依赖) https://developers.google.com/protocol-buffers/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

397 lines
13 KiB

/*
* Copyright (c) 2009-2021, Google LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Google LLC nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "upb/msg.h"
#include "upb/msg_internal.h"
#include "upb/port_def.inc"
#include "upb/table_internal.h"
5 years ago
/** upb_msg *******************************************************************/
static const size_t overhead = sizeof(upb_msg_internaldata);
static const upb_msg_internal *upb_msg_getinternal_const(const upb_msg *msg) {
ptrdiff_t size = sizeof(upb_msg_internal);
return (upb_msg_internal*)((char*)msg - size);
}
upb_msg *_upb_msg_new(const upb_msglayout *l, upb_arena *a) {
return _upb_msg_new_inl(l, a);
}
Fixes for PHP. (#286) - A new PHP-specific upb amalgamation. It contains everything related to upb_msg, but leaves out all of the old handlers-related interfaces and encoders/decoders. # Schema/Defs Changes - Changed `upb_fielddef_msgsubdef()` and `upb_fielddef_enumsubdef()` to return `NULL` instead of assert-failing if the field is not a message or enum. - Added `upb_msgdef_iswrapper()`, to test whether this is a wrapper well-known type. # Decoder - Decoder bugfix: when we parse a submessage inside a oneof, we need to clear out any previous data, so we don't misinterpret it as a pointer to an existing submessage. # JSON Decoder - Allowed well-known types at the top level to have their special processing. - Fixed a bug that could occur when parsing nested empty lists/objects, eg `[[]]`. - Made the "ignore unknown" option also be permissive about unknown enumerators by setting them to 0. # JSON Encoder - Allowed well-known types at the top level to have their special processing. - Removed all spaces after `:` and `,` characters, to match the old encoder and pass goldenfile tests. # Message / Reflection - Changed `upb_msg_hasoneof()` -> `upb_msg_whichoneof()`. The new function returns the `upb_fielddef*` of whichever oneof is set. - Implemented `upb_msg_clearfield()` and added/implemented `upb_msg_clear()`. - Added `upb_msg_discardunknown()`. Part of me thinks this should go in a util library instead of core reflection since it is a recursive algorithm. # Compiler - Always emit descriptors as an array instead of as a string, to avoid exceeding maximum string lengths. If this becomes a speed issue later we can go back to two separate paths.
5 years ago
void _upb_msg_clear(upb_msg *msg, const upb_msglayout *l) {
void *mem = UPB_PTR_AT(msg, -sizeof(upb_msg_internal), char);
memset(mem, 0, upb_msg_sizeof(l));
}
static bool realloc_internal(upb_msg *msg, size_t need, upb_arena *arena) {
upb_msg_internal *in = upb_msg_getinternal(msg);
if (!in->internal) {
/* No internal data, allocate from scratch. */
size_t size = UPB_MAX(128, _upb_lg2ceilsize(need + overhead));
upb_msg_internaldata *internal = upb_arena_malloc(arena, size);
if (!internal) return false;
internal->size = size;
internal->unknown_end = overhead;
internal->ext_begin = size;
in->internal = internal;
} else if (in->internal->ext_begin - in->internal->unknown_end < need) {
/* Internal data is too small, reallocate. */
size_t new_size = _upb_lg2ceilsize(in->internal->size + need);
size_t ext_bytes = in->internal->size - in->internal->ext_begin;
size_t new_ext_begin = new_size - ext_bytes;
upb_msg_internaldata *internal =
upb_arena_realloc(arena, in->internal, in->internal->size, new_size);
if (!internal) return false;
if (ext_bytes) {
/* Need to move extension data to the end. */
char *ptr = (char*)internal;
memmove(ptr + new_ext_begin, ptr + internal->ext_begin, ext_bytes);
}
internal->ext_begin = new_ext_begin;
internal->size = new_size;
in->internal = internal;
}
UPB_ASSERT(in->internal->ext_begin - in->internal->unknown_end >= need);
return true;
}
bool _upb_msg_addunknown(upb_msg *msg, const char *data, size_t len,
upb_arena *arena) {
if (!realloc_internal(msg, len, arena)) return false;
5 years ago
upb_msg_internal *in = upb_msg_getinternal(msg);
memcpy(UPB_PTR_AT(in->internal, in->internal->unknown_end, char), data, len);
in->internal->unknown_end += len;
return true;
5 years ago
}
Fixes for PHP. (#286) - A new PHP-specific upb amalgamation. It contains everything related to upb_msg, but leaves out all of the old handlers-related interfaces and encoders/decoders. # Schema/Defs Changes - Changed `upb_fielddef_msgsubdef()` and `upb_fielddef_enumsubdef()` to return `NULL` instead of assert-failing if the field is not a message or enum. - Added `upb_msgdef_iswrapper()`, to test whether this is a wrapper well-known type. # Decoder - Decoder bugfix: when we parse a submessage inside a oneof, we need to clear out any previous data, so we don't misinterpret it as a pointer to an existing submessage. # JSON Decoder - Allowed well-known types at the top level to have their special processing. - Fixed a bug that could occur when parsing nested empty lists/objects, eg `[[]]`. - Made the "ignore unknown" option also be permissive about unknown enumerators by setting them to 0. # JSON Encoder - Allowed well-known types at the top level to have their special processing. - Removed all spaces after `:` and `,` characters, to match the old encoder and pass goldenfile tests. # Message / Reflection - Changed `upb_msg_hasoneof()` -> `upb_msg_whichoneof()`. The new function returns the `upb_fielddef*` of whichever oneof is set. - Implemented `upb_msg_clearfield()` and added/implemented `upb_msg_clear()`. - Added `upb_msg_discardunknown()`. Part of me thinks this should go in a util library instead of core reflection since it is a recursive algorithm. # Compiler - Always emit descriptors as an array instead of as a string, to avoid exceeding maximum string lengths. If this becomes a speed issue later we can go back to two separate paths.
5 years ago
void _upb_msg_discardunknown_shallow(upb_msg *msg) {
upb_msg_internal *in = upb_msg_getinternal(msg);
if (in->internal) {
in->internal->unknown_end = overhead;
}
Fixes for PHP. (#286) - A new PHP-specific upb amalgamation. It contains everything related to upb_msg, but leaves out all of the old handlers-related interfaces and encoders/decoders. # Schema/Defs Changes - Changed `upb_fielddef_msgsubdef()` and `upb_fielddef_enumsubdef()` to return `NULL` instead of assert-failing if the field is not a message or enum. - Added `upb_msgdef_iswrapper()`, to test whether this is a wrapper well-known type. # Decoder - Decoder bugfix: when we parse a submessage inside a oneof, we need to clear out any previous data, so we don't misinterpret it as a pointer to an existing submessage. # JSON Decoder - Allowed well-known types at the top level to have their special processing. - Fixed a bug that could occur when parsing nested empty lists/objects, eg `[[]]`. - Made the "ignore unknown" option also be permissive about unknown enumerators by setting them to 0. # JSON Encoder - Allowed well-known types at the top level to have their special processing. - Removed all spaces after `:` and `,` characters, to match the old encoder and pass goldenfile tests. # Message / Reflection - Changed `upb_msg_hasoneof()` -> `upb_msg_whichoneof()`. The new function returns the `upb_fielddef*` of whichever oneof is set. - Implemented `upb_msg_clearfield()` and added/implemented `upb_msg_clear()`. - Added `upb_msg_discardunknown()`. Part of me thinks this should go in a util library instead of core reflection since it is a recursive algorithm. # Compiler - Always emit descriptors as an array instead of as a string, to avoid exceeding maximum string lengths. If this becomes a speed issue later we can go back to two separate paths.
5 years ago
}
5 years ago
const char *upb_msg_getunknown(const upb_msg *msg, size_t *len) {
const upb_msg_internal *in = upb_msg_getinternal_const(msg);
if (in->internal) {
*len = in->internal->unknown_end - overhead;
return (char*)(in->internal + 1);
} else {
*len = 0;
return NULL;
}
5 years ago
}
const upb_msg_ext *_upb_msg_getexts(const upb_msg *msg, size_t *count) {
const upb_msg_internal *in = upb_msg_getinternal_const(msg);
if (in->internal) {
*count =
(in->internal->size - in->internal->ext_begin) / sizeof(upb_msg_ext);
return UPB_PTR_AT(in->internal, in->internal->ext_begin, void);
} else {
*count = 0;
return NULL;
}
}
const upb_msg_ext *_upb_msg_getext(const upb_msg *msg,
const upb_msglayout_ext *e) {
size_t n;
const upb_msg_ext *ext = _upb_msg_getexts(msg, &n);
/* For now we use linear search exclusively to find extensions. If this
* becomes an issue due to messages with lots of extensions, we can introduce
* a table of some sort. */
for (size_t i = 0; i < n; i++) {
if (ext[i].ext == e) {
return &ext[i];
}
}
return NULL;
}
upb_msg_ext *_upb_msg_getorcreateext(upb_msg *msg, const upb_msglayout_ext *e,
upb_arena *arena) {
upb_msg_ext *ext = (upb_msg_ext*)_upb_msg_getext(msg, e);
if (ext) return ext;
if (!realloc_internal(msg, sizeof(upb_msg_ext), arena)) return NULL;
upb_msg_internal *in = upb_msg_getinternal(msg);
in->internal->ext_begin -= sizeof(upb_msg_ext);
ext = UPB_PTR_AT(in->internal, in->internal->ext_begin, void);
memset(ext, 0, sizeof(upb_msg_ext));
ext->ext = e;
return ext;
}
5 years ago
/** upb_array *****************************************************************/
bool _upb_array_realloc(upb_array *arr, size_t min_size, upb_arena *arena) {
5 years ago
size_t new_size = UPB_MAX(arr->size, 4);
int elem_size_lg2 = arr->data & 7;
size_t old_bytes = arr->size << elem_size_lg2;
5 years ago
size_t new_bytes;
void* ptr = _upb_array_ptr(arr);
/* Log2 ceiling of size. */
5 years ago
while (new_size < min_size) new_size *= 2;
new_bytes = new_size << elem_size_lg2;
ptr = upb_arena_realloc(arena, ptr, old_bytes, new_bytes);
if (!ptr) {
5 years ago
return false;
}
arr->data = _upb_tag_arrptr(ptr, elem_size_lg2);
5 years ago
arr->size = new_size;
return true;
}
static upb_array *getorcreate_array(upb_array **arr_ptr, int elem_size_lg2,
upb_arena *arena) {
5 years ago
upb_array *arr = *arr_ptr;
if (!arr) {
arr = _upb_array_new(arena, 4, elem_size_lg2);
5 years ago
if (!arr) return NULL;
*arr_ptr = arr;
}
return arr;
}
5 years ago
void *_upb_array_resize_fallback(upb_array **arr_ptr, size_t size,
int elem_size_lg2, upb_arena *arena) {
upb_array *arr = getorcreate_array(arr_ptr, elem_size_lg2, arena);
return arr && _upb_array_resize(arr, size, arena) ? _upb_array_ptr(arr)
: NULL;
}
bool _upb_array_append_fallback(upb_array **arr_ptr, const void *value,
int elem_size_lg2, upb_arena *arena) {
upb_array *arr = getorcreate_array(arr_ptr, elem_size_lg2, arena);
if (!arr) return false;
size_t elems = arr->len;
if (!_upb_array_resize(arr, elems + 1, arena)) {
return false;
}
char *data = _upb_array_ptr(arr);
memcpy(data + (elems << elem_size_lg2), value, 1 << elem_size_lg2);
return true;
5 years ago
}
/** upb_map *******************************************************************/
upb_map *_upb_map_new(upb_arena *a, size_t key_size, size_t value_size) {
5 years ago
upb_map *map = upb_arena_malloc(a, sizeof(upb_map));
6 years ago
if (!map) {
return NULL;
}
upb_strtable_init(&map->table, 4, a);
map->key_size = key_size;
map->val_size = value_size;
6 years ago
return map;
}
Added map sorting to binary and text encoders. For the binary encoder, sorting is off by default. For the text encoder, sorting is on by default. Both defaults can be explicitly overridden. This grows code size a bit. I think we could potentially shave this (and other map-related code size) by having the generated code inject a function pointer to the map-related parsing/serialization code if maps are present. FILE SIZE VM SIZE -------------- -------------- +86% +1.07Ki +71% +768 upb/msg.c [NEW] +391 [NEW] +344 _upb_mapsorter_pushmap [NEW] +158 [NEW] +112 _upb_mapsorter_cmpstr [NEW] +111 [NEW] +64 _upb_mapsorter_cmpbool [NEW] +110 [NEW] +64 _upb_mapsorter_cmpi32 [NEW] +110 [NEW] +64 _upb_mapsorter_cmpi64 [NEW] +110 [NEW] +64 _upb_mapsorter_cmpu32 [NEW] +110 [NEW] +64 _upb_mapsorter_cmpu64 -3.6% -8 -4.3% -8 _upb_map_new +9.5% +464 +9.2% +424 upb/text_encode.c [NEW] +656 [NEW] +616 txtenc_mapentry +15% +32 +20% +32 upb_text_encode -20.1% -224 -20.7% -224 txtenc_msg +5.7% +342 +5.3% +296 upb/encode.c [NEW] +344 [NEW] +304 encode_mapentry [NEW] +246 [NEW] +208 upb_encode_ex [NEW] +41 [NEW] +16 upb_encode_ex.ch +0.7% +8 +0.7% +8 encode_scalar -1.0% -32 -1.0% -32 encode_message [DEL] -38 [DEL] -16 upb_encode.ch [DEL] -227 [DEL] -192 upb_encode +2.0% +152 +2.2% +152 upb/decode.c +44% +128 +44% +128 [section .rodata] +3.4% +24 +3.4% +24 _GLOBAL_OFFSET_TABLE_ +0.6% +107 +0.3% +48 upb/def.c [NEW] +100 [NEW] +48 upb_fielddef_descriptortype +7.1% +7 [ = ] 0 upb_fielddef_defaultint32 +2.9% +24 +2.9% +24 [section .dynsym] +1.2% +24 [ = ] 0 [section .symtab] +3.2% +16 +3.2% +16 [section .plt] [NEW] +16 [NEW] +16 memcmp@plt +0.5% +16 +0.6% +16 tests/conformance_upb.c +1.5% +16 +1.6% +16 DoTestIo +0.1% +16 +0.1% +16 upb/json_decode.c +0.4% +16 +0.4% +16 jsondec_wellknown +3.0% +8 +3.0% +8 [section .got.plt] +3.0% +8 +3.0% +8 _GLOBAL_OFFSET_TABLE_ +1.6% +7 +1.6% +7 [section .dynstr] +1.8% +4 +1.8% +4 [section .hash] +0.5% +3 +0.5% +3 [LOAD #2 [RX]] +2.8% +2 +2.8% +2 [section .gnu.version] -60.0% -1.74Ki [ = ] 0 [Unmapped] +0.3% +496 +1.4% +1.74Ki TOTAL
4 years ago
static void _upb_mapsorter_getkeys(const void *_a, const void *_b, void *a_key,
void *b_key, size_t size) {
const upb_tabent *const*a = _a;
const upb_tabent *const*b = _b;
upb_strview a_tabkey = upb_tabstrview((*a)->key);
upb_strview b_tabkey = upb_tabstrview((*b)->key);
_upb_map_fromkey(a_tabkey, a_key, size);
_upb_map_fromkey(b_tabkey, b_key, size);
}
static int _upb_mapsorter_cmpi64(const void *_a, const void *_b) {
int64_t a, b;
_upb_mapsorter_getkeys(_a, _b, &a, &b, 8);
return a - b;
}
static int _upb_mapsorter_cmpu64(const void *_a, const void *_b) {
uint64_t a, b;
_upb_mapsorter_getkeys(_a, _b, &a, &b, 8);
return a - b;
}
static int _upb_mapsorter_cmpi32(const void *_a, const void *_b) {
int32_t a, b;
_upb_mapsorter_getkeys(_a, _b, &a, &b, 4);
return a - b;
}
static int _upb_mapsorter_cmpu32(const void *_a, const void *_b) {
uint32_t a, b;
_upb_mapsorter_getkeys(_a, _b, &a, &b, 4);
return a - b;
}
static int _upb_mapsorter_cmpbool(const void *_a, const void *_b) {
bool a, b;
_upb_mapsorter_getkeys(_a, _b, &a, &b, 1);
return a - b;
}
static int _upb_mapsorter_cmpstr(const void *_a, const void *_b) {
upb_strview a, b;
_upb_mapsorter_getkeys(_a, _b, &a, &b, UPB_MAPTYPE_STRING);
size_t common_size = UPB_MIN(a.size, b.size);
int cmp = memcmp(a.data, b.data, common_size);
if (cmp) return cmp;
return a.size - b.size;
}
bool _upb_mapsorter_pushmap(_upb_mapsorter *s, upb_descriptortype_t key_type,
const upb_map *map, _upb_sortedmap *sorted) {
int map_size = _upb_map_size(map);
sorted->start = s->size;
sorted->pos = sorted->start;
sorted->end = sorted->start + map_size;
/* Grow s->entries if necessary. */
if (sorted->end > s->cap) {
s->cap = _upb_lg2ceilsize(sorted->end);
s->entries = realloc(s->entries, s->cap * sizeof(*s->entries));
if (!s->entries) return false;
}
s->size = sorted->end;
/* Copy non-empty entries from the table to s->entries. */
upb_tabent const**dst = &s->entries[sorted->start];
const upb_tabent *src = map->table.t.entries;
const upb_tabent *end = src + upb_table_size(&map->table.t);
for (; src < end; src++) {
if (!upb_tabent_isempty(src)) {
*dst = src;
dst++;
}
}
UPB_ASSERT(dst == &s->entries[sorted->end]);
/* Sort entries according to the key type. */
int (*compar)(const void *, const void *);
switch (key_type) {
case UPB_DESCRIPTOR_TYPE_INT64:
case UPB_DESCRIPTOR_TYPE_SFIXED64:
case UPB_DESCRIPTOR_TYPE_SINT64:
compar = _upb_mapsorter_cmpi64;
break;
case UPB_DESCRIPTOR_TYPE_UINT64:
case UPB_DESCRIPTOR_TYPE_FIXED64:
compar = _upb_mapsorter_cmpu64;
break;
case UPB_DESCRIPTOR_TYPE_INT32:
case UPB_DESCRIPTOR_TYPE_SINT32:
case UPB_DESCRIPTOR_TYPE_SFIXED32:
case UPB_DESCRIPTOR_TYPE_ENUM:
compar = _upb_mapsorter_cmpi32;
break;
case UPB_DESCRIPTOR_TYPE_UINT32:
case UPB_DESCRIPTOR_TYPE_FIXED32:
compar = _upb_mapsorter_cmpu32;
break;
case UPB_DESCRIPTOR_TYPE_BOOL:
compar = _upb_mapsorter_cmpbool;
break;
case UPB_DESCRIPTOR_TYPE_STRING:
compar = _upb_mapsorter_cmpstr;
break;
default:
UPB_UNREACHABLE();
}
qsort(&s->entries[sorted->start], map_size, sizeof(*s->entries), compar);
return true;
}
/** upb_extreg ****************************************************************/
struct upb_extreg {
upb_arena *arena;
upb_strtable exts; /* Key is upb_msglayout* concatenated with fieldnum. */
};
#define EXTREG_KEY_SIZE (sizeof(upb_msglayout*) + sizeof(uint32_t))
static void extreg_key(char *buf, const upb_msglayout *l, uint32_t fieldnum) {
memcpy(buf, &l, sizeof(l));
memcpy(buf + sizeof(l), &fieldnum, sizeof(fieldnum));
}
upb_extreg *upb_extreg_new(upb_arena *arena) {
upb_extreg *r = upb_arena_malloc(arena, sizeof(*r));
if (!r) return NULL;
r->arena = arena;
if (!upb_strtable_init(&r->exts, 8, arena)) return NULL;
return r;
}
bool _upb_extreg_add(upb_extreg *r, const upb_msglayout_ext *e, size_t count) {
char buf[EXTREG_KEY_SIZE];
const upb_msglayout_ext *start = e;
for (const upb_msglayout_ext *end = e + count; e < end; e++) {
extreg_key(buf, e->extendee, e->field.number);
if (!upb_strtable_insert(&r->exts, buf, EXTREG_KEY_SIZE,
upb_value_constptr(e), r->arena)) {
goto failure;
}
}
return true;
failure:
/* Back out the entries previously added. */
for (end = e, e = start; e < end; e++) {
extreg_key(buf, e->extendee, e->field.number);
upb_strtable_remove(&r->exts, buf, EXTREG_KEY_SIZE, NULL);
}
return false;
}
const upb_msglayout_field *_upb_extreg_get(const upb_extreg *r,
const upb_msglayout *l,
uint32_t num) {
char buf[EXTREG_KEY_SIZE];
upb_value v;
extreg_key(buf, l, num);
if (upb_strtable_lookup2(&r->exts, buf, EXTREG_KEY_SIZE, &v)) {
return upb_value_getconstptr(v);
} else {
return NULL;
}
}