|
|
|
/*
|
|
|
|
* Copyright (c) 2009-2021, Google LLC
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions are met:
|
|
|
|
* * Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* * Neither the name of Google LLC nor the
|
|
|
|
* names of its contributors may be used to endorse or promote products
|
|
|
|
* derived from this software without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
|
|
|
|
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
|
|
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
|
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
|
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
|
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* upb_table Implementation
|
|
|
|
*
|
|
|
|
* Implementation is heavily inspired by Lua's ltable.c.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
#include "upb/base/log2.h"
|
|
|
|
#include "upb/hash/int_table.h"
|
|
|
|
#include "upb/hash/str_table.h"
|
|
|
|
|
|
|
|
// Must be last.
|
|
|
|
#include "upb/port/def.inc"
|
|
|
|
|
|
|
|
#define UPB_MAXARRSIZE 16 // 2**16 = 64k.
|
|
|
|
|
|
|
|
// From Chromium.
|
|
|
|
#define ARRAY_SIZE(x) \
|
|
|
|
((sizeof(x) / sizeof(0 [x])) / ((size_t)(!(sizeof(x) % sizeof(0 [x])))))
|
|
|
|
|
|
|
|
static const double MAX_LOAD = 0.85;
|
|
|
|
|
|
|
|
/* The minimum utilization of the array part of a mixed hash/array table. This
|
|
|
|
* is a speed/memory-usage tradeoff (though it's not straightforward because of
|
|
|
|
* cache effects). The lower this is, the more memory we'll use. */
|
|
|
|
static const double MIN_DENSITY = 0.1;
|
|
|
|
|
|
|
|
static bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; }
|
|
|
|
|
|
|
|
static upb_value _upb_value_val(uint64_t val) {
|
|
|
|
upb_value ret;
|
|
|
|
_upb_value_setval(&ret, val);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int log2ceil(uint64_t v) {
|
|
|
|
int ret = 0;
|
|
|
|
bool pow2 = is_pow2(v);
|
|
|
|
while (v >>= 1) ret++;
|
|
|
|
ret = pow2 ? ret : ret + 1; // Ceiling.
|
|
|
|
return UPB_MIN(UPB_MAXARRSIZE, ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
char* upb_strdup2(const char* s, size_t len, upb_Arena* a) {
|
|
|
|
size_t n;
|
|
|
|
char* p;
|
|
|
|
|
|
|
|
/* Prevent overflow errors. */
|
|
|
|
if (len == SIZE_MAX) return NULL;
|
|
|
|
/* Always null-terminate, even if binary data; but don't rely on the input to
|
|
|
|
* have a null-terminating byte since it may be a raw binary buffer. */
|
|
|
|
n = len + 1;
|
|
|
|
p = upb_Arena_Malloc(a, n);
|
|
|
|
if (p) {
|
|
|
|
if (len != 0) memcpy(p, s, len);
|
|
|
|
p[len] = 0;
|
|
|
|
}
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* A type to represent the lookup key of either a strtable or an inttable. */
|
|
|
|
typedef union {
|
|
|
|
uintptr_t num;
|
|
|
|
struct {
|
|
|
|
const char* str;
|
|
|
|
size_t len;
|
|
|
|
} str;
|
|
|
|
} lookupkey_t;
|
|
|
|
|
|
|
|
static lookupkey_t strkey2(const char* str, size_t len) {
|
|
|
|
lookupkey_t k;
|
|
|
|
k.str.str = str;
|
|
|
|
k.str.len = len;
|
|
|
|
return k;
|
|
|
|
}
|
|
|
|
|
|
|
|
static lookupkey_t intkey(uintptr_t key) {
|
|
|
|
lookupkey_t k;
|
|
|
|
k.num = key;
|
|
|
|
return k;
|
|
|
|
}
|
|
|
|
|
|
|
|
typedef uint32_t hashfunc_t(upb_tabkey key);
|
|
|
|
typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2);
|
|
|
|
|
|
|
|
/* Base table (shared code) ***************************************************/
|
|
|
|
|
|
|
|
static uint32_t upb_inthash(uintptr_t key) { return (uint32_t)key; }
|
|
|
|
|
|
|
|
static const upb_tabent* upb_getentry(const upb_table* t, uint32_t hash) {
|
|
|
|
return t->entries + (hash & t->mask);
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool upb_arrhas(upb_tabval key) { return key.val != (uint64_t)-1; }
|
|
|
|
|
|
|
|
static bool isfull(upb_table* t) { return t->count == t->max_count; }
|
|
|
|
|
|
|
|
static bool init(upb_table* t, uint8_t size_lg2, upb_Arena* a) {
|
|
|
|
size_t bytes;
|
|
|
|
|
|
|
|
t->count = 0;
|
|
|
|
t->size_lg2 = size_lg2;
|
|
|
|
t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
|
|
|
|
t->max_count = upb_table_size(t) * MAX_LOAD;
|
|
|
|
bytes = upb_table_size(t) * sizeof(upb_tabent);
|
|
|
|
if (bytes > 0) {
|
|
|
|
t->entries = upb_Arena_Malloc(a, bytes);
|
|
|
|
if (!t->entries) return false;
|
|
|
|
memset(t->entries, 0, bytes);
|
|
|
|
} else {
|
|
|
|
t->entries = NULL;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static upb_tabent* emptyent(upb_table* t, upb_tabent* e) {
|
|
|
|
upb_tabent* begin = t->entries;
|
|
|
|
upb_tabent* end = begin + upb_table_size(t);
|
|
|
|
for (e = e + 1; e < end; e++) {
|
|
|
|
if (upb_tabent_isempty(e)) return e;
|
|
|
|
}
|
|
|
|
for (e = begin; e < end; e++) {
|
|
|
|
if (upb_tabent_isempty(e)) return e;
|
|
|
|
}
|
|
|
|
UPB_ASSERT(false);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static upb_tabent* getentry_mutable(upb_table* t, uint32_t hash) {
|
|
|
|
return (upb_tabent*)upb_getentry(t, hash);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const upb_tabent* findentry(const upb_table* t, lookupkey_t key,
|
|
|
|
uint32_t hash, eqlfunc_t* eql) {
|
|
|
|
const upb_tabent* e;
|
|
|
|
|
|
|
|
if (t->size_lg2 == 0) return NULL;
|
|
|
|
e = upb_getentry(t, hash);
|
|
|
|
if (upb_tabent_isempty(e)) return NULL;
|
|
|
|
while (1) {
|
|
|
|
if (eql(e->key, key)) return e;
|
|
|
|
if ((e = e->next) == NULL) return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static upb_tabent* findentry_mutable(upb_table* t, lookupkey_t key,
|
|
|
|
uint32_t hash, eqlfunc_t* eql) {
|
|
|
|
return (upb_tabent*)findentry(t, key, hash, eql);
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool lookup(const upb_table* t, lookupkey_t key, upb_value* v,
|
|
|
|
uint32_t hash, eqlfunc_t* eql) {
|
|
|
|
const upb_tabent* e = findentry(t, key, hash, eql);
|
|
|
|
if (e) {
|
|
|
|
if (v) {
|
|
|
|
_upb_value_setval(v, e->val.val);
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* The given key must not already exist in the table. */
|
|
|
|
static void insert(upb_table* t, lookupkey_t key, upb_tabkey tabkey,
|
|
|
|
upb_value val, uint32_t hash, hashfunc_t* hashfunc,
|
|
|
|
eqlfunc_t* eql) {
|
|
|
|
upb_tabent* mainpos_e;
|
|
|
|
upb_tabent* our_e;
|
|
|
|
|
|
|
|
UPB_ASSERT(findentry(t, key, hash, eql) == NULL);
|
|
|
|
|
|
|
|
t->count++;
|
|
|
|
mainpos_e = getentry_mutable(t, hash);
|
|
|
|
our_e = mainpos_e;
|
|
|
|
|
|
|
|
if (upb_tabent_isempty(mainpos_e)) {
|
|
|
|
/* Our main position is empty; use it. */
|
|
|
|
our_e->next = NULL;
|
|
|
|
} else {
|
|
|
|
/* Collision. */
|
|
|
|
upb_tabent* new_e = emptyent(t, mainpos_e);
|
|
|
|
/* Head of collider's chain. */
|
|
|
|
upb_tabent* chain = getentry_mutable(t, hashfunc(mainpos_e->key));
|
|
|
|
if (chain == mainpos_e) {
|
|
|
|
/* Existing ent is in its main position (it has the same hash as us, and
|
|
|
|
* is the head of our chain). Insert to new ent and append to this chain.
|
|
|
|
*/
|
|
|
|
new_e->next = mainpos_e->next;
|
|
|
|
mainpos_e->next = new_e;
|
|
|
|
our_e = new_e;
|
|
|
|
} else {
|
|
|
|
/* Existing ent is not in its main position (it is a node in some other
|
|
|
|
* chain). This implies that no existing ent in the table has our hash.
|
|
|
|
* Evict it (updating its chain) and use its ent for head of our chain. */
|
|
|
|
*new_e = *mainpos_e; /* copies next. */
|
|
|
|
while (chain->next != mainpos_e) {
|
|
|
|
chain = (upb_tabent*)chain->next;
|
|
|
|
UPB_ASSERT(chain);
|
|
|
|
}
|
|
|
|
chain->next = new_e;
|
|
|
|
our_e = mainpos_e;
|
|
|
|
our_e->next = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
our_e->key = tabkey;
|
|
|
|
our_e->val.val = val.val;
|
|
|
|
UPB_ASSERT(findentry(t, key, hash, eql) == our_e);
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool rm(upb_table* t, lookupkey_t key, upb_value* val,
|
|
|
|
upb_tabkey* removed, uint32_t hash, eqlfunc_t* eql) {
|
|
|
|
upb_tabent* chain = getentry_mutable(t, hash);
|
|
|
|
if (upb_tabent_isempty(chain)) return false;
|
|
|
|
if (eql(chain->key, key)) {
|
|
|
|
/* Element to remove is at the head of its chain. */
|
|
|
|
t->count--;
|
|
|
|
if (val) _upb_value_setval(val, chain->val.val);
|
|
|
|
if (removed) *removed = chain->key;
|
|
|
|
if (chain->next) {
|
|
|
|
upb_tabent* move = (upb_tabent*)chain->next;
|
|
|
|
*chain = *move;
|
|
|
|
move->key = 0; /* Make the slot empty. */
|
|
|
|
} else {
|
|
|
|
chain->key = 0; /* Make the slot empty. */
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
} else {
|
|
|
|
/* Element to remove is either in a non-head position or not in the
|
|
|
|
* table. */
|
|
|
|
while (chain->next && !eql(chain->next->key, key)) {
|
|
|
|
chain = (upb_tabent*)chain->next;
|
|
|
|
}
|
|
|
|
if (chain->next) {
|
|
|
|
/* Found element to remove. */
|
|
|
|
upb_tabent* rm = (upb_tabent*)chain->next;
|
|
|
|
t->count--;
|
|
|
|
if (val) _upb_value_setval(val, chain->next->val.val);
|
|
|
|
if (removed) *removed = rm->key;
|
|
|
|
rm->key = 0; /* Make the slot empty. */
|
|
|
|
chain->next = rm->next;
|
|
|
|
return true;
|
|
|
|
} else {
|
|
|
|
/* Element to remove is not in the table. */
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static size_t next(const upb_table* t, size_t i) {
|
|
|
|
do {
|
|
|
|
if (++i >= upb_table_size(t)) return SIZE_MAX - 1; /* Distinct from -1. */
|
|
|
|
} while (upb_tabent_isempty(&t->entries[i]));
|
|
|
|
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
|
|
|
|
static size_t begin(const upb_table* t) { return next(t, -1); }
|
|
|
|
|
|
|
|
/* upb_strtable ***************************************************************/
|
|
|
|
|
|
|
|
/* A simple "subclass" of upb_table that only adds a hash function for strings.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static upb_tabkey strcopy(lookupkey_t k2, upb_Arena* a) {
|
|
|
|
uint32_t len = (uint32_t)k2.str.len;
|
|
|
|
char* str = upb_Arena_Malloc(a, k2.str.len + sizeof(uint32_t) + 1);
|
|
|
|
if (str == NULL) return 0;
|
|
|
|
memcpy(str, &len, sizeof(uint32_t));
|
|
|
|
if (k2.str.len) memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len);
|
|
|
|
str[sizeof(uint32_t) + k2.str.len] = '\0';
|
|
|
|
return (uintptr_t)str;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Adapted from ABSL's wyhash. */
|
|
|
|
|
|
|
|
static uint64_t UnalignedLoad64(const void* p) {
|
|
|
|
uint64_t val;
|
|
|
|
memcpy(&val, p, 8);
|
|
|
|
return val;
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint32_t UnalignedLoad32(const void* p) {
|
|
|
|
uint32_t val;
|
|
|
|
memcpy(&val, p, 4);
|
|
|
|
return val;
|
|
|
|
}
|
|
|
|
|
|
|
|
#if defined(_MSC_VER) && defined(_M_X64)
|
|
|
|
#include <intrin.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Computes a * b, returning the low 64 bits of the result and storing the high
|
|
|
|
* 64 bits in |*high|. */
|
|
|
|
static uint64_t upb_umul128(uint64_t v0, uint64_t v1, uint64_t* out_high) {
|
|
|
|
#ifdef __SIZEOF_INT128__
|
|
|
|
__uint128_t p = v0;
|
|
|
|
p *= v1;
|
|
|
|
*out_high = (uint64_t)(p >> 64);
|
|
|
|
return (uint64_t)p;
|
|
|
|
#elif defined(_MSC_VER) && defined(_M_X64)
|
|
|
|
return _umul128(v0, v1, out_high);
|
|
|
|
#else
|
|
|
|
uint64_t a32 = v0 >> 32;
|
|
|
|
uint64_t a00 = v0 & 0xffffffff;
|
|
|
|
uint64_t b32 = v1 >> 32;
|
|
|
|
uint64_t b00 = v1 & 0xffffffff;
|
|
|
|
uint64_t high = a32 * b32;
|
|
|
|
uint64_t low = a00 * b00;
|
|
|
|
uint64_t mid1 = a32 * b00;
|
|
|
|
uint64_t mid2 = a00 * b32;
|
|
|
|
low += (mid1 << 32) + (mid2 << 32);
|
|
|
|
// Omit carry bit, for mixing we do not care about exact numerical precision.
|
|
|
|
high += (mid1 >> 32) + (mid2 >> 32);
|
|
|
|
*out_high = high;
|
|
|
|
return low;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint64_t WyhashMix(uint64_t v0, uint64_t v1) {
|
|
|
|
uint64_t high;
|
|
|
|
uint64_t low = upb_umul128(v0, v1, &high);
|
|
|
|
return low ^ high;
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint64_t Wyhash(const void* data, size_t len, uint64_t seed,
|
|
|
|
const uint64_t salt[]) {
|
|
|
|
const uint8_t* ptr = (const uint8_t*)data;
|
|
|
|
uint64_t starting_length = (uint64_t)len;
|
|
|
|
uint64_t current_state = seed ^ salt[0];
|
|
|
|
|
|
|
|
if (len > 64) {
|
|
|
|
// If we have more than 64 bytes, we're going to handle chunks of 64
|
|
|
|
// bytes at a time. We're going to build up two separate hash states
|
|
|
|
// which we will then hash together.
|
|
|
|
uint64_t duplicated_state = current_state;
|
|
|
|
|
|
|
|
do {
|
|
|
|
uint64_t a = UnalignedLoad64(ptr);
|
|
|
|
uint64_t b = UnalignedLoad64(ptr + 8);
|
|
|
|
uint64_t c = UnalignedLoad64(ptr + 16);
|
|
|
|
uint64_t d = UnalignedLoad64(ptr + 24);
|
|
|
|
uint64_t e = UnalignedLoad64(ptr + 32);
|
|
|
|
uint64_t f = UnalignedLoad64(ptr + 40);
|
|
|
|
uint64_t g = UnalignedLoad64(ptr + 48);
|
|
|
|
uint64_t h = UnalignedLoad64(ptr + 56);
|
|
|
|
|
|
|
|
uint64_t cs0 = WyhashMix(a ^ salt[1], b ^ current_state);
|
|
|
|
uint64_t cs1 = WyhashMix(c ^ salt[2], d ^ current_state);
|
|
|
|
current_state = (cs0 ^ cs1);
|
|
|
|
|
|
|
|
uint64_t ds0 = WyhashMix(e ^ salt[3], f ^ duplicated_state);
|
|
|
|
uint64_t ds1 = WyhashMix(g ^ salt[4], h ^ duplicated_state);
|
|
|
|
duplicated_state = (ds0 ^ ds1);
|
|
|
|
|
|
|
|
ptr += 64;
|
|
|
|
len -= 64;
|
|
|
|
} while (len > 64);
|
|
|
|
|
|
|
|
current_state = current_state ^ duplicated_state;
|
|
|
|
}
|
|
|
|
|
|
|
|
// We now have a data `ptr` with at most 64 bytes and the current state
|
|
|
|
// of the hashing state machine stored in current_state.
|
|
|
|
while (len > 16) {
|
|
|
|
uint64_t a = UnalignedLoad64(ptr);
|
|
|
|
uint64_t b = UnalignedLoad64(ptr + 8);
|
|
|
|
|
|
|
|
current_state = WyhashMix(a ^ salt[1], b ^ current_state);
|
|
|
|
|
|
|
|
ptr += 16;
|
|
|
|
len -= 16;
|
|
|
|
}
|
|
|
|
|
|
|
|
// We now have a data `ptr` with at most 16 bytes.
|
|
|
|
uint64_t a = 0;
|
|
|
|
uint64_t b = 0;
|
|
|
|
if (len > 8) {
|
|
|
|
// When we have at least 9 and at most 16 bytes, set A to the first 64
|
|
|
|
// bits of the input and B to the last 64 bits of the input. Yes, they will
|
|
|
|
// overlap in the middle if we are working with less than the full 16
|
|
|
|
// bytes.
|
|
|
|
a = UnalignedLoad64(ptr);
|
|
|
|
b = UnalignedLoad64(ptr + len - 8);
|
|
|
|
} else if (len > 3) {
|
|
|
|
// If we have at least 4 and at most 8 bytes, set A to the first 32
|
|
|
|
// bits and B to the last 32 bits.
|
|
|
|
a = UnalignedLoad32(ptr);
|
|
|
|
b = UnalignedLoad32(ptr + len - 4);
|
|
|
|
} else if (len > 0) {
|
|
|
|
// If we have at least 1 and at most 3 bytes, read all of the provided
|
|
|
|
// bits into A, with some adjustments.
|
|
|
|
a = ((ptr[0] << 16) | (ptr[len >> 1] << 8) | ptr[len - 1]);
|
|
|
|
b = 0;
|
|
|
|
} else {
|
|
|
|
a = 0;
|
|
|
|
b = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t w = WyhashMix(a ^ salt[1], b ^ current_state);
|
|
|
|
uint64_t z = salt[1] ^ starting_length;
|
|
|
|
return WyhashMix(w, z);
|
|
|
|
}
|
|
|
|
|
|
|
|
const uint64_t kWyhashSalt[5] = {
|
|
|
|
0x243F6A8885A308D3ULL, 0x13198A2E03707344ULL, 0xA4093822299F31D0ULL,
|
|
|
|
0x082EFA98EC4E6C89ULL, 0x452821E638D01377ULL,
|
|
|
|
};
|
|
|
|
|
|
|
|
uint32_t _upb_Hash(const void* p, size_t n, uint64_t seed) {
|
|
|
|
return Wyhash(p, n, seed, kWyhashSalt);
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint32_t _upb_Hash_NoSeed(const char* p, size_t n) {
|
|
|
|
return _upb_Hash(p, n, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint32_t strhash(upb_tabkey key) {
|
|
|
|
uint32_t len;
|
|
|
|
char* str = upb_tabstr(key, &len);
|
|
|
|
return _upb_Hash_NoSeed(str, len);
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool streql(upb_tabkey k1, lookupkey_t k2) {
|
|
|
|
uint32_t len;
|
|
|
|
char* str = upb_tabstr(k1, &len);
|
|
|
|
return len == k2.str.len && (len == 0 || memcmp(str, k2.str.str, len) == 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_strtable_init(upb_strtable* t, size_t expected_size, upb_Arena* a) {
|
|
|
|
// Multiply by approximate reciprocal of MAX_LOAD (0.85), with pow2
|
|
|
|
// denominator.
|
|
|
|
size_t need_entries = (expected_size + 1) * 1204 / 1024;
|
|
|
|
UPB_ASSERT(need_entries >= expected_size * 0.85);
|
|
|
|
int size_lg2 = upb_Log2Ceiling(need_entries);
|
|
|
|
return init(&t->t, size_lg2, a);
|
|
|
|
}
|
|
|
|
|
|
|
|
void upb_strtable_clear(upb_strtable* t) {
|
|
|
|
size_t bytes = upb_table_size(&t->t) * sizeof(upb_tabent);
|
|
|
|
t->t.count = 0;
|
|
|
|
memset((char*)t->t.entries, 0, bytes);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_strtable_resize(upb_strtable* t, size_t size_lg2, upb_Arena* a) {
|
|
|
|
upb_strtable new_table;
|
|
|
|
if (!init(&new_table.t, size_lg2, a)) return false;
|
|
|
|
|
|
|
|
intptr_t iter = UPB_STRTABLE_BEGIN;
|
|
|
|
upb_StringView key;
|
|
|
|
upb_value val;
|
|
|
|
while (upb_strtable_next2(t, &key, &val, &iter)) {
|
|
|
|
upb_strtable_insert(&new_table, key.data, key.size, val, a);
|
|
|
|
}
|
|
|
|
*t = new_table;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_strtable_insert(upb_strtable* t, const char* k, size_t len,
|
|
|
|
upb_value v, upb_Arena* a) {
|
|
|
|
lookupkey_t key;
|
|
|
|
upb_tabkey tabkey;
|
|
|
|
uint32_t hash;
|
|
|
|
|
|
|
|
if (isfull(&t->t)) {
|
|
|
|
/* Need to resize. New table of double the size, add old elements to it. */
|
|
|
|
if (!upb_strtable_resize(t, t->t.size_lg2 + 1, a)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
key = strkey2(k, len);
|
|
|
|
tabkey = strcopy(key, a);
|
|
|
|
if (tabkey == 0) return false;
|
|
|
|
|
|
|
|
hash = _upb_Hash_NoSeed(key.str.str, key.str.len);
|
|
|
|
insert(&t->t, key, tabkey, v, hash, &strhash, &streql);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_strtable_lookup2(const upb_strtable* t, const char* key, size_t len,
|
|
|
|
upb_value* v) {
|
|
|
|
uint32_t hash = _upb_Hash_NoSeed(key, len);
|
|
|
|
return lookup(&t->t, strkey2(key, len), v, hash, &streql);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_strtable_remove2(upb_strtable* t, const char* key, size_t len,
|
|
|
|
upb_value* val) {
|
|
|
|
uint32_t hash = _upb_Hash_NoSeed(key, len);
|
|
|
|
upb_tabkey tabkey;
|
|
|
|
return rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Iteration */
|
|
|
|
|
|
|
|
void upb_strtable_begin(upb_strtable_iter* i, const upb_strtable* t) {
|
|
|
|
i->t = t;
|
|
|
|
i->index = begin(&t->t);
|
|
|
|
}
|
|
|
|
|
|
|
|
void upb_strtable_next(upb_strtable_iter* i) {
|
|
|
|
i->index = next(&i->t->t, i->index);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_strtable_done(const upb_strtable_iter* i) {
|
|
|
|
if (!i->t) return true;
|
|
|
|
return i->index >= upb_table_size(&i->t->t) ||
|
|
|
|
upb_tabent_isempty(str_tabent(i));
|
|
|
|
}
|
|
|
|
|
|
|
|
upb_StringView upb_strtable_iter_key(const upb_strtable_iter* i) {
|
|
|
|
upb_StringView key;
|
|
|
|
uint32_t len;
|
|
|
|
UPB_ASSERT(!upb_strtable_done(i));
|
|
|
|
key.data = upb_tabstr(str_tabent(i)->key, &len);
|
|
|
|
key.size = len;
|
|
|
|
return key;
|
|
|
|
}
|
|
|
|
|
|
|
|
upb_value upb_strtable_iter_value(const upb_strtable_iter* i) {
|
|
|
|
UPB_ASSERT(!upb_strtable_done(i));
|
|
|
|
return _upb_value_val(str_tabent(i)->val.val);
|
|
|
|
}
|
|
|
|
|
|
|
|
void upb_strtable_iter_setdone(upb_strtable_iter* i) {
|
|
|
|
i->t = NULL;
|
|
|
|
i->index = SIZE_MAX;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_strtable_iter_isequal(const upb_strtable_iter* i1,
|
|
|
|
const upb_strtable_iter* i2) {
|
|
|
|
if (upb_strtable_done(i1) && upb_strtable_done(i2)) return true;
|
|
|
|
return i1->t == i2->t && i1->index == i2->index;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* upb_inttable ***************************************************************/
|
|
|
|
|
|
|
|
/* For inttables we use a hybrid structure where small keys are kept in an
|
|
|
|
* array and large keys are put in the hash table. */
|
|
|
|
|
|
|
|
static uint32_t inthash(upb_tabkey key) { return upb_inthash(key); }
|
|
|
|
|
|
|
|
static bool inteql(upb_tabkey k1, lookupkey_t k2) { return k1 == k2.num; }
|
|
|
|
|
|
|
|
static upb_tabval* mutable_array(upb_inttable* t) {
|
|
|
|
return (upb_tabval*)t->array;
|
|
|
|
}
|
|
|
|
|
|
|
|
static upb_tabval* inttable_val(upb_inttable* t, uintptr_t key) {
|
|
|
|
if (key < t->array_size) {
|
|
|
|
return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL;
|
|
|
|
} else {
|
|
|
|
upb_tabent* e =
|
|
|
|
findentry_mutable(&t->t, intkey(key), upb_inthash(key), &inteql);
|
|
|
|
return e ? &e->val : NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static const upb_tabval* inttable_val_const(const upb_inttable* t,
|
|
|
|
uintptr_t key) {
|
|
|
|
return inttable_val((upb_inttable*)t, key);
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t upb_inttable_count(const upb_inttable* t) {
|
|
|
|
return t->t.count + t->array_count;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void check(upb_inttable* t) {
|
|
|
|
UPB_UNUSED(t);
|
|
|
|
#if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG)
|
|
|
|
{
|
|
|
|
// This check is very expensive (makes inserts/deletes O(N)).
|
|
|
|
size_t count = 0;
|
|
|
|
intptr_t iter = UPB_INTTABLE_BEGIN;
|
|
|
|
uintptr_t key;
|
|
|
|
upb_value val;
|
|
|
|
while (upb_inttable_next(t, &key, &val, &iter)) {
|
|
|
|
UPB_ASSERT(upb_inttable_lookup(t, key, NULL));
|
|
|
|
}
|
|
|
|
UPB_ASSERT(count == upb_inttable_count(t));
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_inttable_sizedinit(upb_inttable* t, size_t asize, int hsize_lg2,
|
|
|
|
upb_Arena* a) {
|
|
|
|
size_t array_bytes;
|
|
|
|
|
|
|
|
if (!init(&t->t, hsize_lg2, a)) return false;
|
|
|
|
/* Always make the array part at least 1 long, so that we know key 0
|
|
|
|
* won't be in the hash part, which simplifies things. */
|
|
|
|
t->array_size = UPB_MAX(1, asize);
|
|
|
|
t->array_count = 0;
|
|
|
|
array_bytes = t->array_size * sizeof(upb_value);
|
|
|
|
t->array = upb_Arena_Malloc(a, array_bytes);
|
|
|
|
if (!t->array) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
memset(mutable_array(t), 0xff, array_bytes);
|
|
|
|
check(t);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_inttable_init(upb_inttable* t, upb_Arena* a) {
|
|
|
|
return upb_inttable_sizedinit(t, 0, 4, a);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_inttable_insert(upb_inttable* t, uintptr_t key, upb_value val,
|
|
|
|
upb_Arena* a) {
|
|
|
|
upb_tabval tabval;
|
|
|
|
tabval.val = val.val;
|
|
|
|
UPB_ASSERT(
|
|
|
|
upb_arrhas(tabval)); /* This will reject (uint64_t)-1. Fix this. */
|
|
|
|
|
|
|
|
if (key < t->array_size) {
|
|
|
|
UPB_ASSERT(!upb_arrhas(t->array[key]));
|
|
|
|
t->array_count++;
|
|
|
|
mutable_array(t)[key].val = val.val;
|
|
|
|
} else {
|
|
|
|
if (isfull(&t->t)) {
|
|
|
|
/* Need to resize the hash part, but we re-use the array part. */
|
|
|
|
size_t i;
|
|
|
|
upb_table new_table;
|
|
|
|
|
|
|
|
if (!init(&new_table, t->t.size_lg2 + 1, a)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = begin(&t->t); i < upb_table_size(&t->t); i = next(&t->t, i)) {
|
|
|
|
const upb_tabent* e = &t->t.entries[i];
|
|
|
|
uint32_t hash;
|
|
|
|
upb_value v;
|
|
|
|
|
|
|
|
_upb_value_setval(&v, e->val.val);
|
|
|
|
hash = upb_inthash(e->key);
|
|
|
|
insert(&new_table, intkey(e->key), e->key, v, hash, &inthash, &inteql);
|
|
|
|
}
|
|
|
|
|
|
|
|
UPB_ASSERT(t->t.count == new_table.count);
|
|
|
|
|
|
|
|
t->t = new_table;
|
|
|
|
}
|
|
|
|
insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql);
|
|
|
|
}
|
|
|
|
check(t);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_inttable_lookup(const upb_inttable* t, uintptr_t key, upb_value* v) {
|
|
|
|
const upb_tabval* table_v = inttable_val_const(t, key);
|
|
|
|
if (!table_v) return false;
|
|
|
|
if (v) _upb_value_setval(v, table_v->val);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_inttable_replace(upb_inttable* t, uintptr_t key, upb_value val) {
|
|
|
|
upb_tabval* table_v = inttable_val(t, key);
|
|
|
|
if (!table_v) return false;
|
|
|
|
table_v->val = val.val;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_inttable_remove(upb_inttable* t, uintptr_t key, upb_value* val) {
|
|
|
|
bool success;
|
|
|
|
if (key < t->array_size) {
|
|
|
|
if (upb_arrhas(t->array[key])) {
|
|
|
|
upb_tabval empty = UPB_TABVALUE_EMPTY_INIT;
|
|
|
|
t->array_count--;
|
|
|
|
if (val) {
|
|
|
|
_upb_value_setval(val, t->array[key].val);
|
|
|
|
}
|
|
|
|
mutable_array(t)[key] = empty;
|
|
|
|
success = true;
|
|
|
|
} else {
|
|
|
|
success = false;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
success = rm(&t->t, intkey(key), val, NULL, upb_inthash(key), &inteql);
|
|
|
|
}
|
|
|
|
check(t);
|
|
|
|
return success;
|
|
|
|
}
|
|
|
|
|
|
|
|
void upb_inttable_compact(upb_inttable* t, upb_Arena* a) {
|
|
|
|
/* A power-of-two histogram of the table keys. */
|
|
|
|
size_t counts[UPB_MAXARRSIZE + 1] = {0};
|
|
|
|
|
|
|
|
/* The max key in each bucket. */
|
|
|
|
uintptr_t max[UPB_MAXARRSIZE + 1] = {0};
|
|
|
|
|
|
|
|
{
|
|
|
|
intptr_t iter = UPB_INTTABLE_BEGIN;
|
|
|
|
uintptr_t key;
|
|
|
|
upb_value val;
|
|
|
|
while (upb_inttable_next(t, &key, &val, &iter)) {
|
|
|
|
int bucket = log2ceil(key);
|
|
|
|
max[bucket] = UPB_MAX(max[bucket], key);
|
|
|
|
counts[bucket]++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Find the largest power of two that satisfies the MIN_DENSITY
|
|
|
|
* definition (while actually having some keys). */
|
|
|
|
size_t arr_count = upb_inttable_count(t);
|
|
|
|
int size_lg2;
|
|
|
|
upb_inttable new_t;
|
|
|
|
|
|
|
|
for (size_lg2 = ARRAY_SIZE(counts) - 1; size_lg2 > 0; size_lg2--) {
|
|
|
|
if (counts[size_lg2] == 0) {
|
|
|
|
/* We can halve again without losing any entries. */
|
|
|
|
continue;
|
|
|
|
} else if (arr_count >= (1 << size_lg2) * MIN_DENSITY) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
arr_count -= counts[size_lg2];
|
|
|
|
}
|
|
|
|
|
|
|
|
UPB_ASSERT(arr_count <= upb_inttable_count(t));
|
|
|
|
|
|
|
|
{
|
|
|
|
/* Insert all elements into new, perfectly-sized table. */
|
|
|
|
size_t arr_size = max[size_lg2] + 1; /* +1 so arr[max] will fit. */
|
|
|
|
size_t hash_count = upb_inttable_count(t) - arr_count;
|
|
|
|
size_t hash_size = hash_count ? (hash_count / MAX_LOAD) + 1 : 0;
|
|
|
|
int hashsize_lg2 = log2ceil(hash_size);
|
|
|
|
|
|
|
|
upb_inttable_sizedinit(&new_t, arr_size, hashsize_lg2, a);
|
|
|
|
|
|
|
|
{
|
|
|
|
intptr_t iter = UPB_INTTABLE_BEGIN;
|
|
|
|
uintptr_t key;
|
|
|
|
upb_value val;
|
|
|
|
while (upb_inttable_next(t, &key, &val, &iter)) {
|
|
|
|
upb_inttable_insert(&new_t, key, val, a);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
UPB_ASSERT(new_t.array_size == arr_size);
|
|
|
|
UPB_ASSERT(new_t.t.size_lg2 == hashsize_lg2);
|
|
|
|
}
|
|
|
|
*t = new_t;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Iteration.
|
|
|
|
|
|
|
|
bool upb_inttable_next(const upb_inttable* t, uintptr_t* key, upb_value* val,
|
|
|
|
intptr_t* iter) {
|
|
|
|
intptr_t i = *iter;
|
|
|
|
if ((size_t)(i + 1) <= t->array_size) {
|
|
|
|
while ((size_t)++i < t->array_size) {
|
|
|
|
upb_tabval ent = t->array[i];
|
|
|
|
if (upb_arrhas(ent)) {
|
|
|
|
*key = i;
|
|
|
|
*val = _upb_value_val(ent.val);
|
|
|
|
*iter = i;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
i--; // Back up to exactly one position before the start of the table.
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t tab_idx = next(&t->t, i - t->array_size);
|
|
|
|
if (tab_idx < upb_table_size(&t->t)) {
|
|
|
|
upb_tabent* ent = &t->t.entries[tab_idx];
|
|
|
|
*key = ent->key;
|
|
|
|
*val = _upb_value_val(ent->val.val);
|
|
|
|
*iter = tab_idx + t->array_size;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
void upb_inttable_removeiter(upb_inttable* t, intptr_t* iter) {
|
|
|
|
intptr_t i = *iter;
|
|
|
|
if ((size_t)i < t->array_size) {
|
|
|
|
t->array_count--;
|
|
|
|
mutable_array(t)[i].val = -1;
|
|
|
|
} else {
|
|
|
|
upb_tabent* ent = &t->t.entries[i - t->array_size];
|
|
|
|
upb_tabent* prev = NULL;
|
|
|
|
|
|
|
|
// Linear search, not great.
|
|
|
|
upb_tabent* end = &t->t.entries[upb_table_size(&t->t)];
|
|
|
|
for (upb_tabent* e = t->t.entries; e != end; e++) {
|
|
|
|
if (e->next == ent) {
|
|
|
|
prev = e;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (prev) {
|
|
|
|
prev->next = ent->next;
|
|
|
|
}
|
|
|
|
|
|
|
|
t->t.count--;
|
|
|
|
ent->key = 0;
|
|
|
|
ent->next = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool upb_strtable_next2(const upb_strtable* t, upb_StringView* key,
|
|
|
|
upb_value* val, intptr_t* iter) {
|
|
|
|
size_t tab_idx = next(&t->t, *iter);
|
|
|
|
if (tab_idx < upb_table_size(&t->t)) {
|
|
|
|
upb_tabent* ent = &t->t.entries[tab_idx];
|
|
|
|
uint32_t len;
|
|
|
|
key->data = upb_tabstr(ent->key, &len);
|
|
|
|
key->size = len;
|
|
|
|
*val = _upb_value_val(ent->val.val);
|
|
|
|
*iter = tab_idx;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
void upb_strtable_removeiter(upb_strtable* t, intptr_t* iter) {
|
|
|
|
intptr_t i = *iter;
|
|
|
|
upb_tabent* ent = &t->t.entries[i];
|
|
|
|
upb_tabent* prev = NULL;
|
|
|
|
|
|
|
|
// Linear search, not great.
|
|
|
|
upb_tabent* end = &t->t.entries[upb_table_size(&t->t)];
|
|
|
|
for (upb_tabent* e = t->t.entries; e != end; e++) {
|
|
|
|
if (e->next == ent) {
|
|
|
|
prev = e;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (prev) {
|
|
|
|
prev->next = ent->next;
|
|
|
|
}
|
|
|
|
|
|
|
|
t->t.count--;
|
|
|
|
ent->key = 0;
|
|
|
|
ent->next = NULL;
|
|
|
|
}
|
Added a new dynamic tree shaking model to upb, with the intention of removing the old model once YouTube has migrated.
The `kUpb_DecodeOption_ExperimentalAllowUnlinked` flag to the decoder will enable the new behavior. When that flag is not passed, tree shaking with the old model will still be possible.
"Dynamic tree shaking" in upb is a feature that allows messages to be parsed even if the MiniTables have not been fully linked. Unlinked sub-message fields can be parsed by preserving their data in the unknown fields. If the application later discovers that the message field is actually needed, the MiniTable can be patched to properly link that field, and existing message instances can "promote" the data from the unknown fields to an actual message of the correct type.
Before this change, dynamic tree shaking stored unparsed message data in the unknown fields of the *parent*. In effect, we were treating the field as if it did not exist at all. This meant that parsing an unlinked field did not affect the hasbits or oneof cases of the parent, nor did it create a `upb_Array` or `upb_Map` for array/map fields. Only when a message was linked and promoted did any of these things occur.
While this model had some amount of conceptual simplicity, it caused significant problems with oneofs. When multiple fields inside a single oneof are parsed from the wire, order matters, because later oneof fields must overwrite earlier ones. Dynamic tree shaking can mean that some fields in a oneof are linked while others are not. It is essential that we preserve this ordering semantic even when dynamic tree shaking is being used, but it is difficult to do if the oneof's data can be split between linked fields (which have been reified into parsed field data) and unlinked fields (whose data lives in the unknown fields of the parent).
To solve this problem, this CL changes the representation for unlinked fields. Instead of being placed in the parent's unknown fields, we create an actual message instance for each unlinked message we parse, but we use a placeholder "empty message" MiniTable as the message's type. All of the message's data will therefore be placed into the "empty message's" unknown fields. But unlike before, this "empty message" is actually present according to the hasbits, oneof case, and `upb_Array`/`upb_Map` of the parent. This means that all of the oneof presence logic works as normal.
Since the MiniTable can be patched at any time, we need a bit in the message instance itself to signal whether a pointer to a sub-message is an "empty message" or not. When dynamic tree shaking is in use, all users must be capable of recognizing an empty message and acting accordingly (promoting, etc) even if the MiniTable itself says that the field is linked.
Because dynamic tree shaking imposes this extra requirement on users, we require that users pass an extra option to the decoder to allow parsing of unlinked sub-messages. Many existing users of upb (Ruby, PHP, Python, etc) will always have fully-linked MiniTables, so there is no reason for them to add extra logic to handle empty messages. By omitting the `kUpb_DecodeOption_ExperimentalAllowUnlinked` option, they will be relieved of the duty to check the tagged pointer that would indicate an empty, unlinked message.
For existing users of dynamic tree shaking, there are three main changes:
1. The APIs in message/promote.h have changed, and users will need to update to the new interfaces.
2. The model for maps has changed slightly. Before, we required that map entries always had their values linked; for dynamic tree shaking to apply to maps, we required that the *entry* was left unlinked, not the entry's value. In the new model, that is reversed: map entries must always be linked, but a map entry's value can be unlinked.
3. The presence model for unlinked fields has changed. Unlinked fields will now register as "present" from the perspective of hasbits, oneof cases, and array/map entries. Users must test the tagged pointer to know if a message is of the correct, linked type or whether it is a placeholder "empty" message. There is a new function `upb_Message_GetTaggedMessagePtr()`, as well as a new accessor `upb_MessageValue.tagged_msg_val` that can be used to read and test the tagged pointer directly.
PiperOrigin-RevId: 535288031
2 years ago
|
|
|
|
|
|
|
void upb_strtable_setentryvalue(upb_strtable* t, intptr_t iter, upb_value v) {
|
|
|
|
upb_tabent* ent = &t->t.entries[iter];
|
|
|
|
ent->val.val = v.val;
|
|
|
|
}
|