Protocol Buffers - Google's data interchange format (grpc依赖) https://developers.google.com/protocol-buffers/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

476 lines
16 KiB

/*
** upb_table
**
** This header is INTERNAL-ONLY! Its interfaces are not public or stable!
** This file defines very fast int->upb_value (inttable) and string->upb_value
** (strtable) hash tables.
**
** The table uses chained scatter with Brent's variation (inspired by the Lua
** implementation of hash tables). The hash function for strings is Austin
** Appleby's "MurmurHash."
**
** The inttable uses uintptr_t as its key, which guarantees it can be used to
** store pointers or integers of at least 32 bits (upb isn't really useful on
** systems where sizeof(void*) < 4).
**
** The table must be homogeneous (all values of the same type). In debug
** mode, we check this on insert and lookup.
*/
#ifndef UPB_TABLE_H_
#define UPB_TABLE_H_
#include <stdint.h>
#include <string.h>
#include "upb/upb.h"
#include "upb/port_def.inc"
#ifdef __cplusplus
extern "C" {
#endif
/* upb_value ******************************************************************/
/* A tagged union (stored untagged inside the table) so that we can check that
* clients calling table accessors are correctly typed without having to have
* an explosion of accessors. */
typedef enum {
UPB_CTYPE_INT32 = 1,
UPB_CTYPE_INT64 = 2,
UPB_CTYPE_UINT32 = 3,
UPB_CTYPE_UINT64 = 4,
UPB_CTYPE_BOOL = 5,
UPB_CTYPE_CSTR = 6,
UPB_CTYPE_PTR = 7,
UPB_CTYPE_CONSTPTR = 8,
UPB_CTYPE_FPTR = 9,
UPB_CTYPE_FLOAT = 10,
UPB_CTYPE_DOUBLE = 11
} upb_ctype_t;
typedef struct {
uint64_t val;
} upb_value;
/* Like strdup(), which isn't always available since it's not ANSI C. */
char *upb_strdup(const char *s, upb_alloc *a);
/* Variant that works with a length-delimited rather than NULL-delimited string,
* as supported by strtable. */
char *upb_strdup2(const char *s, size_t len, upb_alloc *a);
UPB_INLINE char *upb_gstrdup(const char *s) {
return upb_strdup(s, &upb_alloc_global);
}
UPB_INLINE void _upb_value_setval(upb_value *v, uint64_t val) {
v->val = val;
}
UPB_INLINE upb_value _upb_value_val(uint64_t val) {
upb_value ret;
_upb_value_setval(&ret, val);
return ret;
}
/* For each value ctype, define the following set of functions:
*
* // Get/set an int32 from a upb_value.
* int32_t upb_value_getint32(upb_value val);
* void upb_value_setint32(upb_value *val, int32_t cval);
*
* // Construct a new upb_value from an int32.
* upb_value upb_value_int32(int32_t val); */
#define FUNCS(name, membername, type_t, converter, proto_type) \
UPB_INLINE void upb_value_set ## name(upb_value *val, type_t cval) { \
val->val = (converter)cval; \
} \
UPB_INLINE upb_value upb_value_ ## name(type_t val) { \
upb_value ret; \
upb_value_set ## name(&ret, val); \
return ret; \
} \
UPB_INLINE type_t upb_value_get ## name(upb_value val) { \
return (type_t)(converter)val.val; \
}
FUNCS(int32, int32, int32_t, int32_t, UPB_CTYPE_INT32)
FUNCS(int64, int64, int64_t, int64_t, UPB_CTYPE_INT64)
FUNCS(uint32, uint32, uint32_t, uint32_t, UPB_CTYPE_UINT32)
FUNCS(uint64, uint64, uint64_t, uint64_t, UPB_CTYPE_UINT64)
FUNCS(bool, _bool, bool, bool, UPB_CTYPE_BOOL)
FUNCS(cstr, cstr, char*, uintptr_t, UPB_CTYPE_CSTR)
FUNCS(ptr, ptr, void*, uintptr_t, UPB_CTYPE_PTR)
FUNCS(constptr, constptr, const void*, uintptr_t, UPB_CTYPE_CONSTPTR)
FUNCS(fptr, fptr, upb_func*, uintptr_t, UPB_CTYPE_FPTR)
#undef FUNCS
UPB_INLINE void upb_value_setfloat(upb_value *val, float cval) {
memcpy(&val->val, &cval, sizeof(cval));
}
UPB_INLINE void upb_value_setdouble(upb_value *val, double cval) {
memcpy(&val->val, &cval, sizeof(cval));
}
UPB_INLINE upb_value upb_value_float(float cval) {
upb_value ret;
upb_value_setfloat(&ret, cval);
return ret;
}
UPB_INLINE upb_value upb_value_double(double cval) {
upb_value ret;
upb_value_setdouble(&ret, cval);
return ret;
}
#undef SET_TYPE
/* upb_tabkey *****************************************************************/
/* Either:
* 1. an actual integer key, or
* 2. a pointer to a string prefixed by its uint32_t length, owned by us.
*
* ...depending on whether this is a string table or an int table. We would
* make this a union of those two types, but C89 doesn't support statically
* initializing a non-first union member. */
typedef uintptr_t upb_tabkey;
UPB_INLINE char *upb_tabstr(upb_tabkey key, uint32_t *len) {
char* mem = (char*)key;
if (len) memcpy(len, mem, sizeof(*len));
return mem + sizeof(*len);
}
Added map sorting to binary and text encoders. For the binary encoder, sorting is off by default. For the text encoder, sorting is on by default. Both defaults can be explicitly overridden. This grows code size a bit. I think we could potentially shave this (and other map-related code size) by having the generated code inject a function pointer to the map-related parsing/serialization code if maps are present. FILE SIZE VM SIZE -------------- -------------- +86% +1.07Ki +71% +768 upb/msg.c [NEW] +391 [NEW] +344 _upb_mapsorter_pushmap [NEW] +158 [NEW] +112 _upb_mapsorter_cmpstr [NEW] +111 [NEW] +64 _upb_mapsorter_cmpbool [NEW] +110 [NEW] +64 _upb_mapsorter_cmpi32 [NEW] +110 [NEW] +64 _upb_mapsorter_cmpi64 [NEW] +110 [NEW] +64 _upb_mapsorter_cmpu32 [NEW] +110 [NEW] +64 _upb_mapsorter_cmpu64 -3.6% -8 -4.3% -8 _upb_map_new +9.5% +464 +9.2% +424 upb/text_encode.c [NEW] +656 [NEW] +616 txtenc_mapentry +15% +32 +20% +32 upb_text_encode -20.1% -224 -20.7% -224 txtenc_msg +5.7% +342 +5.3% +296 upb/encode.c [NEW] +344 [NEW] +304 encode_mapentry [NEW] +246 [NEW] +208 upb_encode_ex [NEW] +41 [NEW] +16 upb_encode_ex.ch +0.7% +8 +0.7% +8 encode_scalar -1.0% -32 -1.0% -32 encode_message [DEL] -38 [DEL] -16 upb_encode.ch [DEL] -227 [DEL] -192 upb_encode +2.0% +152 +2.2% +152 upb/decode.c +44% +128 +44% +128 [section .rodata] +3.4% +24 +3.4% +24 _GLOBAL_OFFSET_TABLE_ +0.6% +107 +0.3% +48 upb/def.c [NEW] +100 [NEW] +48 upb_fielddef_descriptortype +7.1% +7 [ = ] 0 upb_fielddef_defaultint32 +2.9% +24 +2.9% +24 [section .dynsym] +1.2% +24 [ = ] 0 [section .symtab] +3.2% +16 +3.2% +16 [section .plt] [NEW] +16 [NEW] +16 memcmp@plt +0.5% +16 +0.6% +16 tests/conformance_upb.c +1.5% +16 +1.6% +16 DoTestIo +0.1% +16 +0.1% +16 upb/json_decode.c +0.4% +16 +0.4% +16 jsondec_wellknown +3.0% +8 +3.0% +8 [section .got.plt] +3.0% +8 +3.0% +8 _GLOBAL_OFFSET_TABLE_ +1.6% +7 +1.6% +7 [section .dynstr] +1.8% +4 +1.8% +4 [section .hash] +0.5% +3 +0.5% +3 [LOAD #2 [RX]] +2.8% +2 +2.8% +2 [section .gnu.version] -60.0% -1.74Ki [ = ] 0 [Unmapped] +0.3% +496 +1.4% +1.74Ki TOTAL
4 years ago
UPB_INLINE upb_strview upb_tabstrview(upb_tabkey key) {
upb_strview ret;
uint32_t len;
ret.data = upb_tabstr(key, &len);
ret.size = len;
return ret;
}
/* upb_tabval *****************************************************************/
Added map sorting to binary and text encoders. For the binary encoder, sorting is off by default. For the text encoder, sorting is on by default. Both defaults can be explicitly overridden. This grows code size a bit. I think we could potentially shave this (and other map-related code size) by having the generated code inject a function pointer to the map-related parsing/serialization code if maps are present. FILE SIZE VM SIZE -------------- -------------- +86% +1.07Ki +71% +768 upb/msg.c [NEW] +391 [NEW] +344 _upb_mapsorter_pushmap [NEW] +158 [NEW] +112 _upb_mapsorter_cmpstr [NEW] +111 [NEW] +64 _upb_mapsorter_cmpbool [NEW] +110 [NEW] +64 _upb_mapsorter_cmpi32 [NEW] +110 [NEW] +64 _upb_mapsorter_cmpi64 [NEW] +110 [NEW] +64 _upb_mapsorter_cmpu32 [NEW] +110 [NEW] +64 _upb_mapsorter_cmpu64 -3.6% -8 -4.3% -8 _upb_map_new +9.5% +464 +9.2% +424 upb/text_encode.c [NEW] +656 [NEW] +616 txtenc_mapentry +15% +32 +20% +32 upb_text_encode -20.1% -224 -20.7% -224 txtenc_msg +5.7% +342 +5.3% +296 upb/encode.c [NEW] +344 [NEW] +304 encode_mapentry [NEW] +246 [NEW] +208 upb_encode_ex [NEW] +41 [NEW] +16 upb_encode_ex.ch +0.7% +8 +0.7% +8 encode_scalar -1.0% -32 -1.0% -32 encode_message [DEL] -38 [DEL] -16 upb_encode.ch [DEL] -227 [DEL] -192 upb_encode +2.0% +152 +2.2% +152 upb/decode.c +44% +128 +44% +128 [section .rodata] +3.4% +24 +3.4% +24 _GLOBAL_OFFSET_TABLE_ +0.6% +107 +0.3% +48 upb/def.c [NEW] +100 [NEW] +48 upb_fielddef_descriptortype +7.1% +7 [ = ] 0 upb_fielddef_defaultint32 +2.9% +24 +2.9% +24 [section .dynsym] +1.2% +24 [ = ] 0 [section .symtab] +3.2% +16 +3.2% +16 [section .plt] [NEW] +16 [NEW] +16 memcmp@plt +0.5% +16 +0.6% +16 tests/conformance_upb.c +1.5% +16 +1.6% +16 DoTestIo +0.1% +16 +0.1% +16 upb/json_decode.c +0.4% +16 +0.4% +16 jsondec_wellknown +3.0% +8 +3.0% +8 [section .got.plt] +3.0% +8 +3.0% +8 _GLOBAL_OFFSET_TABLE_ +1.6% +7 +1.6% +7 [section .dynstr] +1.8% +4 +1.8% +4 [section .hash] +0.5% +3 +0.5% +3 [LOAD #2 [RX]] +2.8% +2 +2.8% +2 [section .gnu.version] -60.0% -1.74Ki [ = ] 0 [Unmapped] +0.3% +496 +1.4% +1.74Ki TOTAL
4 years ago
typedef struct upb_tabval {
uint64_t val;
} upb_tabval;
6 years ago
#define UPB_TABVALUE_EMPTY_INIT {-1}
/* upb_table ******************************************************************/
typedef struct _upb_tabent {
upb_tabkey key;
upb_tabval val;
/* Internal chaining. This is const so we can create static initializers for
* tables. We cast away const sometimes, but *only* when the containing
* upb_table is known to be non-const. This requires a bit of care, but
* the subtlety is confined to table.c. */
const struct _upb_tabent *next;
} upb_tabent;
typedef struct {
size_t count; /* Number of entries in the hash part. */
uint32_t mask; /* Mask to turn hash value -> bucket. */
uint32_t max_count; /* Max count before we hit our load limit. */
uint8_t size_lg2; /* Size of the hashtable part is 2^size_lg2 entries. */
/* Hash table entries.
* Making this const isn't entirely accurate; what we really want is for it to
* have the same const-ness as the table it's inside. But there's no way to
* declare that in C. So we have to make it const so that we can statically
* initialize const hash tables. Then we cast away const when we have to.
*/
const upb_tabent *entries;
} upb_table;
typedef struct {
upb_table t;
} upb_strtable;
typedef struct {
upb_table t; /* For entries that don't fit in the array part. */
const upb_tabval *array; /* Array part of the table. See const note above. */
size_t array_size; /* Array part size. */
size_t array_count; /* Array part number of elements. */
} upb_inttable;
#define UPB_ARRAY_EMPTYENT -1
UPB_INLINE size_t upb_table_size(const upb_table *t) {
if (t->size_lg2 == 0)
return 0;
else
return 1 << t->size_lg2;
}
/* Internal-only functions, in .h file only out of necessity. */
UPB_INLINE bool upb_tabent_isempty(const upb_tabent *e) {
return e->key == 0;
}
/* Used by some of the unit tests for generic hashing functionality. */
uint32_t upb_murmur_hash2(const void * key, size_t len, uint32_t seed);
UPB_INLINE uintptr_t upb_intkey(uintptr_t key) {
return key;
}
UPB_INLINE uint32_t upb_inthash(uintptr_t key) {
return (uint32_t)key;
}
static const upb_tabent *upb_getentry(const upb_table *t, uint32_t hash) {
return t->entries + (hash & t->mask);
}
UPB_INLINE bool upb_arrhas(upb_tabval key) {
return key.val != (uint64_t)-1;
}
/* Initialize and uninitialize a table, respectively. If memory allocation
* failed, false is returned that the table is uninitialized. */
bool upb_inttable_init2(upb_inttable *table, upb_ctype_t ctype, upb_alloc *a);
bool upb_strtable_init2(upb_strtable *table, upb_ctype_t ctype,
size_t expected_size, upb_alloc *a);
void upb_inttable_uninit2(upb_inttable *table, upb_alloc *a);
void upb_strtable_uninit2(upb_strtable *table, upb_alloc *a);
UPB_INLINE bool upb_inttable_init(upb_inttable *table, upb_ctype_t ctype) {
return upb_inttable_init2(table, ctype, &upb_alloc_global);
}
UPB_INLINE bool upb_strtable_init(upb_strtable *table, upb_ctype_t ctype) {
return upb_strtable_init2(table, ctype, 4, &upb_alloc_global);
}
UPB_INLINE void upb_inttable_uninit(upb_inttable *table) {
upb_inttable_uninit2(table, &upb_alloc_global);
}
UPB_INLINE void upb_strtable_uninit(upb_strtable *table) {
upb_strtable_uninit2(table, &upb_alloc_global);
}
/* Returns the number of values in the table. */
size_t upb_inttable_count(const upb_inttable *t);
UPB_INLINE size_t upb_strtable_count(const upb_strtable *t) {
return t->t.count;
}
void upb_inttable_packedsize(const upb_inttable *t, size_t *size);
void upb_strtable_packedsize(const upb_strtable *t, size_t *size);
upb_inttable *upb_inttable_pack(const upb_inttable *t, void *p, size_t *ofs,
size_t size);
upb_strtable *upb_strtable_pack(const upb_strtable *t, void *p, size_t *ofs,
size_t size);
void upb_strtable_clear(upb_strtable *t);
/* Inserts the given key into the hashtable with the given value. The key must
* not already exist in the hash table. For string tables, the key must be
* NULL-terminated, and the table will make an internal copy of the key.
* Inttables must not insert a value of UINTPTR_MAX.
*
* If a table resize was required but memory allocation failed, false is
* returned and the table is unchanged. */
bool upb_inttable_insert2(upb_inttable *t, uintptr_t key, upb_value val,
upb_alloc *a);
bool upb_strtable_insert3(upb_strtable *t, const char *key, size_t len,
upb_value val, upb_alloc *a);
UPB_INLINE bool upb_inttable_insert(upb_inttable *t, uintptr_t key,
upb_value val) {
return upb_inttable_insert2(t, key, val, &upb_alloc_global);
}
UPB_INLINE bool upb_strtable_insert2(upb_strtable *t, const char *key,
size_t len, upb_value val) {
return upb_strtable_insert3(t, key, len, val, &upb_alloc_global);
}
/* For NULL-terminated strings. */
UPB_INLINE bool upb_strtable_insert(upb_strtable *t, const char *key,
upb_value val) {
return upb_strtable_insert2(t, key, strlen(key), val);
}
/* Looks up key in this table, returning "true" if the key was found.
* If v is non-NULL, copies the value for this key into *v. */
bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v);
bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
upb_value *v);
/* For NULL-terminated strings. */
UPB_INLINE bool upb_strtable_lookup(const upb_strtable *t, const char *key,
upb_value *v) {
return upb_strtable_lookup2(t, key, strlen(key), v);
}
/* Removes an item from the table. Returns true if the remove was successful,
* and stores the removed item in *val if non-NULL. */
bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val);
bool upb_strtable_remove3(upb_strtable *t, const char *key, size_t len,
upb_value *val, upb_alloc *alloc);
UPB_INLINE bool upb_strtable_remove2(upb_strtable *t, const char *key,
size_t len, upb_value *val) {
return upb_strtable_remove3(t, key, len, val, &upb_alloc_global);
}
/* For NULL-terminated strings. */
UPB_INLINE bool upb_strtable_remove(upb_strtable *t, const char *key,
upb_value *v) {
return upb_strtable_remove2(t, key, strlen(key), v);
}
/* Updates an existing entry in an inttable. If the entry does not exist,
* returns false and does nothing. Unlike insert/remove, this does not
* invalidate iterators. */
bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val);
/* Convenience routines for inttables with pointer keys. */
bool upb_inttable_insertptr2(upb_inttable *t, const void *key, upb_value val,
upb_alloc *a);
bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val);
bool upb_inttable_lookupptr(
const upb_inttable *t, const void *key, upb_value *val);
UPB_INLINE bool upb_inttable_insertptr(upb_inttable *t, const void *key,
upb_value val) {
return upb_inttable_insertptr2(t, key, val, &upb_alloc_global);
}
/* Optimizes the table for the current set of entries, for both memory use and
* lookup time. Client should call this after all entries have been inserted;
* inserting more entries is legal, but will likely require a table resize. */
void upb_inttable_compact2(upb_inttable *t, upb_alloc *a);
UPB_INLINE void upb_inttable_compact(upb_inttable *t) {
upb_inttable_compact2(t, &upb_alloc_global);
}
/* A special-case inlinable version of the lookup routine for 32-bit
* integers. */
UPB_INLINE bool upb_inttable_lookup32(const upb_inttable *t, uint32_t key,
upb_value *v) {
*v = upb_value_int32(0); /* Silence compiler warnings. */
if (key < t->array_size) {
upb_tabval arrval = t->array[key];
if (upb_arrhas(arrval)) {
_upb_value_setval(v, arrval.val);
return true;
} else {
return false;
}
} else {
const upb_tabent *e;
if (t->t.entries == NULL) return false;
for (e = upb_getentry(&t->t, upb_inthash(key)); true; e = e->next) {
if ((uint32_t)e->key == key) {
_upb_value_setval(v, e->val.val);
return true;
}
if (e->next == NULL) return false;
}
}
}
/* Exposed for testing only. */
bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_alloc *a);
/* Iterators ******************************************************************/
/* Iterators for int and string tables. We are subject to some kind of unusual
* design constraints:
*
* For high-level languages:
* - we must be able to guarantee that we don't crash or corrupt memory even if
* the program accesses an invalidated iterator.
*
* For C++11 range-based for:
* - iterators must be copyable
* - iterators must be comparable
* - it must be possible to construct an "end" value.
*
* Iteration order is undefined.
*
* Modifying the table invalidates iterators. upb_{str,int}table_done() is
* guaranteed to work even on an invalidated iterator, as long as the table it
* is iterating over has not been freed. Calling next() or accessing data from
* an invalidated iterator yields unspecified elements from the table, but it is
* guaranteed not to crash and to return real table elements (except when done()
* is true). */
/* upb_strtable_iter **********************************************************/
/* upb_strtable_iter i;
* upb_strtable_begin(&i, t);
* for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
* const char *key = upb_strtable_iter_key(&i);
* const upb_value val = upb_strtable_iter_value(&i);
* // ...
* }
*/
typedef struct {
const upb_strtable *t;
size_t index;
} upb_strtable_iter;
void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t);
void upb_strtable_next(upb_strtable_iter *i);
bool upb_strtable_done(const upb_strtable_iter *i);
5 years ago
upb_strview upb_strtable_iter_key(const upb_strtable_iter *i);
upb_value upb_strtable_iter_value(const upb_strtable_iter *i);
void upb_strtable_iter_setdone(upb_strtable_iter *i);
bool upb_strtable_iter_isequal(const upb_strtable_iter *i1,
const upb_strtable_iter *i2);
/* upb_inttable_iter **********************************************************/
/* upb_inttable_iter i;
* upb_inttable_begin(&i, t);
* for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
* uintptr_t key = upb_inttable_iter_key(&i);
* upb_value val = upb_inttable_iter_value(&i);
* // ...
* }
*/
typedef struct {
const upb_inttable *t;
size_t index;
bool array_part;
} upb_inttable_iter;
UPB_INLINE const upb_tabent *str_tabent(const upb_strtable_iter *i) {
return &i->t->t.entries[i->index];
}
void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t);
void upb_inttable_next(upb_inttable_iter *i);
bool upb_inttable_done(const upb_inttable_iter *i);
uintptr_t upb_inttable_iter_key(const upb_inttable_iter *i);
upb_value upb_inttable_iter_value(const upb_inttable_iter *i);
void upb_inttable_iter_setdone(upb_inttable_iter *i);
bool upb_inttable_iter_isequal(const upb_inttable_iter *i1,
const upb_inttable_iter *i2);
#ifdef __cplusplus
} /* extern "C" */
#endif
#include "upb/port_undef.inc"
#endif /* UPB_TABLE_H_ */