Merge pull request #328 from haberman/optdefload

Optimized the loading of descriptors
pull/13171/head
Joshua Haberman 4 years ago committed by GitHub
commit 6ae3b66cd2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      BUILD
  2. 1
      cmake/CMakeLists.txt
  3. 2
      cmake/upb/json/parser.c
  4. 55
      tests/pb/test_decoder.cc
  5. 10
      tests/test_table.cc
  6. 25
      third_party/wyhash/LICENSE
  7. 142
      third_party/wyhash/wyhash.h
  8. 2
      tools/amalgamate.py
  9. 31
      upb/def.c
  10. 2
      upb/json/parser.rl
  11. 2
      upb/msg.c
  12. 229
      upb/table.c
  13. 8
      upb/table.int.h
  14. 11
      upb/upb.h

@ -68,6 +68,7 @@ cc_library(
"upb/table.int.h",
"upb/upb.c",
"upb/upb.int.h",
"third_party/wyhash/wyhash.h",
],
hdrs = [
"upb/decode.h",
@ -369,6 +370,7 @@ filegroup(
"upbc/**/*",
"upb/**/*",
"tests/**/*",
"third_party/**/*",
]),
visibility = ["//cmake:__pkg__"],
)

@ -72,6 +72,7 @@ add_library(upb
../upb/table.int.h
../upb/upb.c
../upb/upb.int.h
../third_party/wyhash/wyhash.h
../upb/decode.h
../upb/encode.h
../upb/upb.h

@ -3306,7 +3306,7 @@ static upb_json_parsermethod *parsermethod_new(upb_json_codecache *c,
upb_byteshandler_setstring(&m->input_handler_, parse, m);
upb_byteshandler_setendstr(&m->input_handler_, end, m);
upb_strtable_init2(&m->name_table, UPB_CTYPE_CONSTPTR, alloc);
upb_strtable_init2(&m->name_table, UPB_CTYPE_CONSTPTR, 4, alloc);
/* Build name_table */

@ -52,17 +52,6 @@
#define PRINT_FAILURE(expr) \
fprintf(stderr, "Assertion failed: %s:%d\n", __FILE__, __LINE__); \
fprintf(stderr, "expr: %s\n", #expr); \
if (testhash) { \
fprintf(stderr, "assertion failed running test %x.\n", testhash); \
if (!filter_hash) { \
fprintf(stderr, \
"Run with the arg %x to run only this test. " \
"(This will also turn on extra debugging output)\n", \
testhash); \
} \
fprintf(stderr, "Failed at %02.2f%% through tests.\n", \
(float)completed * 100 / total); \
}
#define MAX_NESTING 64
@ -467,17 +456,6 @@ upb::pb::DecoderPtr CreateDecoder(upb::Arena* arena,
return ret;
}
uint32_t Hash(const string& proto, const string* expected_output, size_t seam1,
size_t seam2, bool may_skip) {
uint32_t hash = upb_murmur_hash2(proto.c_str(), proto.size(), 0);
if (expected_output)
hash = upb_murmur_hash2(expected_output->c_str(), expected_output->size(), hash);
hash = upb_murmur_hash2(&seam1, sizeof(seam1), hash);
hash = upb_murmur_hash2(&seam2, sizeof(seam2), hash);
hash = upb_murmur_hash2(&may_skip, sizeof(may_skip), hash);
return hash;
}
void CheckBytesParsed(upb::pb::DecoderPtr decoder, size_t ofs) {
// We can't have parsed more data than the decoder callback is telling us it
// parsed.
@ -506,13 +484,11 @@ void do_run_decoder(VerboseParserEnvironment* env, upb::pb::DecoderPtr decoder,
env->Reset(proto.c_str(), proto.size(), may_skip, expected_output == NULL);
decoder.Reset();
testhash = Hash(proto, expected_output, i, j, may_skip);
if (filter_hash && testhash != filter_hash) return;
if (test_mode != COUNT_ONLY) {
output.clear();
if (filter_hash) {
fprintf(stderr, "RUNNING TEST CASE, hash=%x\n", testhash);
fprintf(stderr, "RUNNING TEST CASE\n");
fprintf(stderr, "Input (len=%u): ", (unsigned)proto.size());
PrintBinary(proto);
fprintf(stderr, "\n");
@ -571,7 +547,6 @@ void run_decoder(const string& proto, const string* expected_output) {
}
}
}
testhash = 0;
}
const static string thirty_byte_nop = cat(
@ -871,23 +846,17 @@ void test_valid() {
// Empty protobuf where we never call PutString between
// StartString/EndString.
// Randomly generated hash for this test, hope it doesn't conflict with others
// by chance.
const uint32_t emptyhash = 0x5709be8e;
if (!filter_hash || filter_hash == testhash) {
testhash = emptyhash;
upb::Status status;
upb::Arena arena;
upb::Sink sink(global_handlers, &closures[0]);
upb::pb::DecoderPtr decoder =
CreateDecoder(&arena, global_method, sink, &status);
output.clear();
bool ok = upb::PutBuffer(std::string(), decoder.input());
ASSERT(ok);
ASSERT(status.ok());
if (test_mode == ALL_HANDLERS) {
ASSERT(output == string("<\n>\n"));
}
upb::Status status;
upb::Arena arena;
upb::Sink sink(global_handlers, &closures[0]);
upb::pb::DecoderPtr decoder =
CreateDecoder(&arena, global_method, sink, &status);
output.clear();
bool ok = upb::PutBuffer(std::string(), decoder.input());
ASSERT(ok);
ASSERT(status.ok());
if (test_mode == ALL_HANDLERS) {
ASSERT(output == string("<\n>\n"));
}
test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_DOUBLE,

@ -618,6 +618,16 @@ void test_delete() {
upb_inttable_uninit(&t);
}
void test_init() {
for (int i = 0; i < 2048; i++) {
/* Tests that the size calculations in init() (lg2 size for target load)
* work for all expected sizes. */
upb_strtable t;
upb_strtable_init2(&t, UPB_CTYPE_BOOL, i, &upb_alloc_global);
upb_strtable_uninit(&t);
}
}
extern "C" {
int run_tests(int argc, char *argv[]) {

@ -0,0 +1,25 @@
This is free and unencumbered software released into the public domain.
Anyone is free to copy, modify, publish, use, compile, sell, or
distribute this software, either in source code form or as a compiled
binary, for any purpose, commercial or non-commercial, and by any
means.
In jurisdictions that recognize copyright laws, the author or authors
of this software dedicate any and all copyright interest in the
software to the public domain. We make this dedication for the benefit
of the public at large and to the detriment of our heirs and
successors. We intend this dedication to be an overt act of
relinquishment in perpetuity of all present and future rights to this
software under copyright law.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
For more information, please refer to <http://unlicense.org/>

@ -0,0 +1,142 @@
//Author: Wang Yi <godspeed_china@yeah.net>
#ifndef wyhash_final_version
#define wyhash_final_version
//defines that change behavior
#ifndef WYHASH_CONDOM
#define WYHASH_CONDOM 1 //0: read 8 bytes before and after boudaries, dangerous but fastest. 1: normal valid behavior 2: extra protection against entropy loss (probability=2^-63), aka. "blind multiplication"
#endif
#define WYHASH_32BIT_MUM 0 //faster on 32 bit system
//includes
#include <stdint.h>
#include <string.h>
#if defined(_MSC_VER) && defined(_M_X64)
#include <intrin.h>
#pragma intrinsic(_umul128)
#endif
#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
#define _likely_(x) __builtin_expect(x,1)
#define _unlikely_(x) __builtin_expect(x,0)
#else
#define _likely_(x) (x)
#define _unlikely_(x) (x)
#endif
//mum function
static inline uint64_t _wyrot(uint64_t x) { return (x>>32)|(x<<32); }
static inline void _wymum(uint64_t *A, uint64_t *B){
#if(WYHASH_32BIT_MUM)
uint64_t hh=(*A>>32)*(*B>>32), hl=(*A>>32)*(unsigned)*B, lh=(unsigned)*A*(*B>>32), ll=(uint64_t)(unsigned)*A*(unsigned)*B;
#if(WYHASH_CONDOM>1)
*A^=_wyrot(hl)^hh; *B^=_wyrot(lh)^ll;
#else
*A=_wyrot(hl)^hh; *B=_wyrot(lh)^ll;
#endif
#elif defined(__SIZEOF_INT128__)
__uint128_t r=*A; r*=*B;
#if(WYHASH_CONDOM>1)
*A^=(uint64_t)r; *B^=(uint64_t)(r>>64);
#else
*A=(uint64_t)r; *B=(uint64_t)(r>>64);
#endif
#elif defined(_MSC_VER) && defined(_M_X64)
#if(WYHASH_CONDOM>1)
uint64_t a, b;
a=_umul128(*A,*B,&b);
*A^=a; *B^=b;
#else
*A=_umul128(*A,*B,B);
#endif
#else
uint64_t ha=*A>>32, hb=*B>>32, la=(uint32_t)*A, lb=(uint32_t)*B, hi, lo;
uint64_t rh=ha*hb, rm0=ha*lb, rm1=hb*la, rl=la*lb, t=rl+(rm0<<32), c=t<rl;
lo=t+(rm1<<32); c+=lo<t; hi=rh+(rm0>>32)+(rm1>>32)+c;
#if(WYHASH_CONDOM>1)
*A^=lo; *B^=hi;
#else
*A=lo; *B=hi;
#endif
#endif
}
static inline uint64_t _wymix(uint64_t A, uint64_t B){ _wymum(&A,&B); return A^B; }
//read functions
#ifndef WYHASH_LITTLE_ENDIAN
#if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
#define WYHASH_LITTLE_ENDIAN 1
#elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
#define WYHASH_LITTLE_ENDIAN 0
#endif
#endif
#if (WYHASH_LITTLE_ENDIAN)
static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return v;}
static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return v;}
#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return __builtin_bswap64(v);}
static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return __builtin_bswap32(v);}
#elif defined(_MSC_VER)
static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return _byteswap_uint64(v);}
static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return _byteswap_ulong(v);}
#endif
static inline uint64_t _wyr3(const uint8_t *p, unsigned k) { return (((uint64_t)p[0])<<16)|(((uint64_t)p[k>>1])<<8)|p[k-1];}
//wyhash function
static inline uint64_t _wyfinish16(const uint8_t *p, uint64_t len, uint64_t seed, const uint64_t *secret, uint64_t i){
#if(WYHASH_CONDOM>0)
uint64_t a, b;
if(_likely_(i<=8)){
if(_likely_(i>=4)){ a=_wyr4(p); b=_wyr4(p+i-4); }
else if (_likely_(i)){ a=_wyr3(p,i); b=0; }
else a=b=0;
}
else{ a=_wyr8(p); b=_wyr8(p+i-8); }
return _wymix(secret[1]^len,_wymix(a^secret[1], b^seed));
#else
#define oneshot_shift ((i<8)*((8-i)<<3))
return _wymix(secret[1]^len,_wymix((_wyr8(p)<<oneshot_shift)^secret[1],(_wyr8(p+i-8)>>oneshot_shift)^seed));
#endif
}
static inline uint64_t _wyfinish(const uint8_t *p, uint64_t len, uint64_t seed, const uint64_t *secret, uint64_t i){
if(_likely_(i<=16)) return _wyfinish16(p,len,seed,secret,i);
return _wyfinish(p+16,len,_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed),secret,i-16);
}
static inline uint64_t wyhash(const void *key, uint64_t len, uint64_t seed, const uint64_t *secret){
const uint8_t *p=(const uint8_t *)key;
uint64_t i=len; seed^=*secret;
if(_unlikely_(i>64)){
uint64_t see1=seed;
do{
seed=_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed)^_wymix(_wyr8(p+16)^secret[2],_wyr8(p+24)^seed);
see1=_wymix(_wyr8(p+32)^secret[3],_wyr8(p+40)^see1)^_wymix(_wyr8(p+48)^secret[4],_wyr8(p+56)^see1);
p+=64; i-=64;
}while(i>64);
seed^=see1;
}
return _wyfinish(p,len,seed,secret,i);
}
//utility functions
const uint64_t _wyp[5] = {0xa0761d6478bd642full, 0xe7037ed1a0b428dbull, 0x8ebc6af09c88c6e3ull, 0x589965cc75374cc3ull, 0x1d8e4e27c47d124full};
static inline uint64_t wyhash64(uint64_t A, uint64_t B){ A^=_wyp[0]; B^=_wyp[1]; _wymum(&A,&B); return _wymix(A^_wyp[0],B^_wyp[1]);}
static inline uint64_t wyrand(uint64_t *seed){ *seed+=_wyp[0]; return _wymix(*seed,*seed^_wyp[1]);}
static inline double wy2u01(uint64_t r){ const double _wynorm=1.0/(1ull<<52); return (r>>12)*_wynorm;}
static inline double wy2gau(uint64_t r){ const double _wynorm=1.0/(1ull<<20); return ((r&0x1fffff)+((r>>21)&0x1fffff)+((r>>42)&0x1fffff))*_wynorm-3.0;}
static inline uint64_t wy2u0k(uint64_t r, uint64_t k){ _wymum(&r,&k); return k; }
static inline void make_secret(uint64_t seed, uint64_t *secret){
uint8_t c[] = {15, 23, 27, 29, 30, 39, 43, 45, 46, 51, 53, 54, 57, 58, 60, 71, 75, 77, 78, 83, 85, 86, 89, 90, 92, 99, 101, 102, 105, 106, 108, 113, 114, 116, 120, 135, 139, 141, 142, 147, 149, 150, 153, 154, 156, 163, 165, 166, 169, 170, 172, 177, 178, 180, 184, 195, 197, 198, 201, 202, 204, 209, 210, 212, 216, 225, 226, 228, 232, 240 };
for(size_t i=0;i<5;i++){
uint8_t ok;
do{
ok=1; secret[i]=0;
for(size_t j=0;j<64;j+=8) secret[i]|=((uint64_t)c[wyrand(&seed)%sizeof(c)])<<j;
if(secret[i]%2==0){ ok=0; continue; }
for(size_t j=0;j<i;j++)
#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
if(__builtin_popcountll(secret[j]^secret[i])!=32){ ok=0; break; }
#elif defined(_MSC_VER) && defined(_M_X64)
if(_mm_popcnt_u64(secret[j]^secret[i])!=32){ ok=0; break; }
#endif
if(!ok)continue;
for(uint64_t j=3;j<0x100000000ull;j+=2) if(secret[i]%j==0){ ok=0; break; }
}while(!ok);
}
}
#endif

@ -52,7 +52,7 @@ class Amalgamator:
include = parse_include(line)
if not include:
return False
if not (include.startswith("upb") or include.startswith("google")):
if not (include.startswith("upb") or include.startswith("google") or include.startswith("third_party")):
return False
if include.endswith("hpp"):
# Skip, we don't support the amalgamation from C++.

@ -1309,7 +1309,7 @@ static bool create_oneofdef(
CHK_OOM(upb_strtable_insert3(&m->ntof, name.data, name.size, v, ctx->alloc));
CHK_OOM(upb_inttable_init2(&o->itof, UPB_CTYPE_CONSTPTR, ctx->alloc));
CHK_OOM(upb_strtable_init2(&o->ntof, UPB_CTYPE_CONSTPTR, ctx->alloc));
CHK_OOM(upb_strtable_init2(&o->ntof, UPB_CTYPE_CONSTPTR, 4, ctx->alloc));
return true;
}
@ -1633,14 +1633,13 @@ static bool create_enumdef(
e->full_name = makefullname(ctx, prefix, name);
CHK_OOM(symtab_add(ctx, e->full_name, pack_def(e, UPB_DEFTYPE_ENUM)));
CHK_OOM(upb_strtable_init2(&e->ntoi, UPB_CTYPE_INT32, ctx->alloc));
values = google_protobuf_EnumDescriptorProto_value(enum_proto, &n);
CHK_OOM(upb_strtable_init2(&e->ntoi, UPB_CTYPE_INT32, n, ctx->alloc));
CHK_OOM(upb_inttable_init2(&e->iton, UPB_CTYPE_CSTR, ctx->alloc));
e->file = ctx->file;
e->defaultval = 0;
values = google_protobuf_EnumDescriptorProto_value(enum_proto, &n);
if (n == 0) {
upb_status_seterrf(ctx->status,
"enums must contain at least one value (%s)",
@ -1690,7 +1689,7 @@ static bool create_msgdef(symtab_addctx *ctx, const char *prefix,
const google_protobuf_FieldDescriptorProto *const *fields;
const google_protobuf_EnumDescriptorProto *const *enums;
const google_protobuf_DescriptorProto *const *msgs;
size_t i, n;
size_t i, n_oneof, n_field, n;
upb_strview name;
name = google_protobuf_DescriptorProto_name(msg_proto);
@ -1700,8 +1699,12 @@ static bool create_msgdef(symtab_addctx *ctx, const char *prefix,
m->full_name = makefullname(ctx, prefix, name);
CHK_OOM(symtab_add(ctx, m->full_name, pack_def(m, UPB_DEFTYPE_MSG)));
oneofs = google_protobuf_DescriptorProto_oneof_decl(msg_proto, &n_oneof);
fields = google_protobuf_DescriptorProto_field(msg_proto, &n_field);
CHK_OOM(upb_inttable_init2(&m->itof, UPB_CTYPE_CONSTPTR, ctx->alloc));
CHK_OOM(upb_strtable_init2(&m->ntof, UPB_CTYPE_CONSTPTR, ctx->alloc));
CHK_OOM(upb_strtable_init2(&m->ntof, UPB_CTYPE_CONSTPTR, n_oneof + n_field,
ctx->alloc));
m->file = ctx->file;
m->map_entry = false;
@ -1720,17 +1723,15 @@ static bool create_msgdef(symtab_addctx *ctx, const char *prefix,
m->layout = upb_malloc(ctx->alloc, sizeof(*m->layout));
}
oneofs = google_protobuf_DescriptorProto_oneof_decl(msg_proto, &n);
m->oneof_count = 0;
m->oneofs = upb_malloc(ctx->alloc, sizeof(*m->oneofs) * n);
for (i = 0; i < n; i++) {
m->oneofs = upb_malloc(ctx->alloc, sizeof(*m->oneofs) * n_oneof);
for (i = 0; i < n_oneof; i++) {
CHK(create_oneofdef(ctx, m, oneofs[i]));
}
fields = google_protobuf_DescriptorProto_field(msg_proto, &n);
m->field_count = 0;
m->fields = upb_malloc(ctx->alloc, sizeof(*m->fields) * n);
for (i = 0; i < n; i++) {
m->fields = upb_malloc(ctx->alloc, sizeof(*m->fields) * n_field);
for (i = 0; i < n_field; i++) {
CHK(create_fielddef(ctx, m->full_name, m, fields[i]));
}
@ -2085,8 +2086,8 @@ upb_symtab *upb_symtab_new(void) {
s->arena = upb_arena_new();
alloc = upb_arena_alloc(s->arena);
if (!upb_strtable_init2(&s->syms, UPB_CTYPE_CONSTPTR, alloc) ||
!upb_strtable_init2(&s->files, UPB_CTYPE_CONSTPTR, alloc)) {
if (!upb_strtable_init2(&s->syms, UPB_CTYPE_CONSTPTR, 32, alloc) ||
!upb_strtable_init2(&s->files, UPB_CTYPE_CONSTPTR, 4, alloc)) {
upb_arena_free(s->arena);
upb_gfree(s);
s = NULL;
@ -2148,7 +2149,7 @@ static const upb_filedef *_upb_symtab_addfile(
ctx.layouts = layouts;
ctx.status = status;
ok = file && upb_strtable_init2(&addtab, UPB_CTYPE_CONSTPTR, ctx.tmp) &&
ok = file && upb_strtable_init2(&addtab, UPB_CTYPE_CONSTPTR, 8, ctx.tmp) &&
build_filedef(&ctx, file, file_proto) && upb_symtab_addtotabs(s, &ctx);
upb_arena_free(tmparena);

@ -2869,7 +2869,7 @@ static upb_json_parsermethod *parsermethod_new(upb_json_codecache *c,
upb_byteshandler_setstring(&m->input_handler_, parse, m);
upb_byteshandler_setendstr(&m->input_handler_, end, m);
upb_strtable_init2(&m->name_table, UPB_CTYPE_CONSTPTR, alloc);
upb_strtable_init2(&m->name_table, UPB_CTYPE_CONSTPTR, 4, alloc);
/* Build name_table */

@ -130,7 +130,7 @@ upb_map *_upb_map_new(upb_arena *a, size_t key_size, size_t value_size) {
return NULL;
}
upb_strtable_init2(&map->table, UPB_CTYPE_INT32, upb_arena_alloc(a));
upb_strtable_init2(&map->table, UPB_CTYPE_INT32, 4, upb_arena_alloc(a));
map->key_size = key_size;
map->val_size = value_size;

@ -4,10 +4,12 @@
** Implementation is heavily inspired by Lua's ltable.c.
*/
#include "upb/table.int.h"
#include <string.h>
#include "third_party/wyhash/wyhash.h"
#include "upb/table.int.h"
/* Must be last. */
#include "upb/port_def.inc"
#define UPB_MAXARRSIZE 16 /* 64k. */
@ -87,11 +89,7 @@ static upb_tabent *mutable_entries(upb_table *t) {
}
static bool isfull(upb_table *t) {
if (upb_table_size(t) == 0) {
return true;
} else {
return ((double)(t->count + 1) / upb_table_size(t)) > MAX_LOAD;
}
return t->count == t->max_count;
}
static bool init(upb_table *t, uint8_t size_lg2, upb_alloc *a) {
@ -100,6 +98,7 @@ static bool init(upb_table *t, uint8_t size_lg2, upb_alloc *a) {
t->count = 0;
t->size_lg2 = size_lg2;
t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
t->max_count = upb_table_size(t) * MAX_LOAD;
bytes = upb_table_size(t) * sizeof(upb_tabent);
if (bytes > 0) {
t->entries = upb_malloc(a, bytes);
@ -115,9 +114,17 @@ static void uninit(upb_table *t, upb_alloc *a) {
upb_free(a, mutable_entries(t));
}
static upb_tabent *emptyent(upb_table *t) {
upb_tabent *e = mutable_entries(t) + upb_table_size(t);
while (1) { if (upb_tabent_isempty(--e)) return e; UPB_ASSERT(e > t->entries); }
static upb_tabent *emptyent(upb_table *t, upb_tabent *e) {
upb_tabent *begin = mutable_entries(t);
upb_tabent *end = begin + upb_table_size(t);
for (e = e + 1; e < end; e++) {
if (upb_tabent_isempty(e)) return e;
}
for (e = begin; e < end; e++) {
if (upb_tabent_isempty(e)) return e;
}
UPB_ASSERT(false);
return NULL;
}
static upb_tabent *getentry_mutable(upb_table *t, uint32_t hash) {
@ -173,7 +180,7 @@ static void insert(upb_table *t, lookupkey_t key, upb_tabkey tabkey,
our_e->next = NULL;
} else {
/* Collision. */
upb_tabent *new_e = emptyent(t);
upb_tabent *new_e = emptyent(t, mainpos_e);
/* Head of collider's chain. */
upb_tabent *chain = getentry_mutable(t, hashfunc(mainpos_e->key));
if (chain == mainpos_e) {
@ -268,10 +275,14 @@ static upb_tabkey strcopy(lookupkey_t k2, upb_alloc *a) {
return (uintptr_t)str;
}
static uint32_t table_hash(const char *p, size_t n) {
return wyhash(p, n, 0, _wyp);
}
static uint32_t strhash(upb_tabkey key) {
uint32_t len;
char *str = upb_tabstr(key, &len);
return upb_murmur_hash2(str, len, 0);
return table_hash(str, len);
}
static bool streql(upb_tabkey k1, lookupkey_t k2) {
@ -280,9 +291,14 @@ static bool streql(upb_tabkey k1, lookupkey_t k2) {
return len == k2.str.len && (len == 0 || memcmp(str, k2.str.str, len) == 0);
}
bool upb_strtable_init2(upb_strtable *t, upb_ctype_t ctype, upb_alloc *a) {
bool upb_strtable_init2(upb_strtable *t, upb_ctype_t ctype,
size_t expected_size, upb_alloc *a) {
UPB_UNUSED(ctype); /* TODO(haberman): rm */
return init(&t->t, 2, a);
// Multiply by approximate reciprocal of MAX_LOAD (0.85), with pow2 denominator.
size_t need_entries = (expected_size + 1) * 1204 / 1024;
UPB_ASSERT(need_entries >= expected_size * 0.85);
int size_lg2 = _upb_lg2ceil(need_entries);
return init(&t->t, size_lg2, a);
}
void upb_strtable_clear(upb_strtable *t) {
@ -333,20 +349,20 @@ bool upb_strtable_insert3(upb_strtable *t, const char *k, size_t len,
tabkey = strcopy(key, a);
if (tabkey == 0) return false;
hash = upb_murmur_hash2(key.str.str, key.str.len, 0);
hash = table_hash(key.str.str, key.str.len);
insert(&t->t, key, tabkey, v, hash, &strhash, &streql);
return true;
}
bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
upb_value *v) {
uint32_t hash = upb_murmur_hash2(key, len, 0);
uint32_t hash = table_hash(key, len);
return lookup(&t->t, strkey2(key, len), v, hash, &streql);
}
bool upb_strtable_remove3(upb_strtable *t, const char *key, size_t len,
upb_value *val, upb_alloc *alloc) {
uint32_t hash = upb_murmur_hash2(key, len, 0);
uint32_t hash = table_hash(key, len);
upb_tabkey tabkey;
if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) {
if (alloc) {
@ -699,182 +715,3 @@ bool upb_inttable_iter_isequal(const upb_inttable_iter *i1,
return i1->t == i2->t && i1->index == i2->index &&
i1->array_part == i2->array_part;
}
#if defined(UPB_UNALIGNED_READS_OK) || defined(__s390x__)
/* -----------------------------------------------------------------------------
* MurmurHash2, by Austin Appleby (released as public domain).
* Reformatted and C99-ified by Joshua Haberman.
* Note - This code makes a few assumptions about how your machine behaves -
* 1. We can read a 4-byte value from any address without crashing
* 2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t
* And it has a few limitations -
* 1. It will not work incrementally.
* 2. It will not produce the same results on little-endian and big-endian
* machines. */
uint32_t upb_murmur_hash2(const void *key, size_t len, uint32_t seed) {
/* 'm' and 'r' are mixing constants generated offline.
* They're not really 'magic', they just happen to work well. */
const uint32_t m = 0x5bd1e995;
const int32_t r = 24;
/* Initialize the hash to a 'random' value */
uint32_t h = seed ^ len;
/* Mix 4 bytes at a time into the hash */
const uint8_t * data = (const uint8_t *)key;
while(len >= 4) {
uint32_t k;
memcpy(&k, data, sizeof(k));
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
/* Handle the last few bytes of the input array */
switch(len) {
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0]; h *= m;
};
/* Do a few final mixes of the hash to ensure the last few
* bytes are well-incorporated. */
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
#else /* !UPB_UNALIGNED_READS_OK */
/* -----------------------------------------------------------------------------
* MurmurHashAligned2, by Austin Appleby
* Same algorithm as MurmurHash2, but only does aligned reads - should be safer
* on certain platforms.
* Performance will be lower than MurmurHash2 */
#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
uint32_t upb_murmur_hash2(const void * key, size_t len, uint32_t seed) {
const uint32_t m = 0x5bd1e995;
const int32_t r = 24;
const uint8_t * data = (const uint8_t *)key;
uint32_t h = (uint32_t)(seed ^ len);
uint8_t align = (uintptr_t)data & 3;
if(align && (len >= 4)) {
/* Pre-load the temp registers */
uint32_t t = 0, d = 0;
int32_t sl;
int32_t sr;
switch(align) {
case 1: t |= data[2] << 16; /* fallthrough */
case 2: t |= data[1] << 8; /* fallthrough */
case 3: t |= data[0];
}
t <<= (8 * align);
data += 4-align;
len -= 4-align;
sl = 8 * (4-align);
sr = 8 * align;
/* Mix */
while(len >= 4) {
uint32_t k;
d = *(uint32_t *)data;
t = (t >> sr) | (d << sl);
k = t;
MIX(h,k,m);
t = d;
data += 4;
len -= 4;
}
/* Handle leftover data in temp registers */
d = 0;
if(len >= align) {
uint32_t k;
switch(align) {
case 3: d |= data[2] << 16; /* fallthrough */
case 2: d |= data[1] << 8; /* fallthrough */
case 1: d |= data[0]; /* fallthrough */
}
k = (t >> sr) | (d << sl);
MIX(h,k,m);
data += align;
len -= align;
/* ----------
* Handle tail bytes */
switch(len) {
case 3: h ^= data[2] << 16; /* fallthrough */
case 2: h ^= data[1] << 8; /* fallthrough */
case 1: h ^= data[0]; h *= m; /* fallthrough */
};
} else {
switch(len) {
case 3: d |= data[2] << 16; /* fallthrough */
case 2: d |= data[1] << 8; /* fallthrough */
case 1: d |= data[0]; /* fallthrough */
case 0: h ^= (t >> sr) | (d << sl); h *= m;
}
}
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
} else {
while(len >= 4) {
uint32_t k = *(uint32_t *)data;
MIX(h,k,m);
data += 4;
len -= 4;
}
/* ----------
* Handle tail bytes */
switch(len) {
case 3: h ^= data[2] << 16; /* fallthrough */
case 2: h ^= data[1] << 8; /* fallthrough */
case 1: h ^= data[0]; h *= m;
};
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
}
#undef MIX
#endif /* UPB_UNALIGNED_READS_OK */

@ -171,7 +171,8 @@ typedef struct _upb_tabent {
typedef struct {
size_t count; /* Number of entries in the hash part. */
size_t mask; /* Mask to turn hash value -> bucket. */
uint32_t mask; /* Mask to turn hash value -> bucket. */
uint32_t max_count; /* Max count before we hit our load limit. */
uint8_t size_lg2; /* Size of the hashtable part is 2^size_lg2 entries. */
/* Hash table entries.
@ -230,7 +231,8 @@ UPB_INLINE bool upb_arrhas(upb_tabval key) {
/* Initialize and uninitialize a table, respectively. If memory allocation
* failed, false is returned that the table is uninitialized. */
bool upb_inttable_init2(upb_inttable *table, upb_ctype_t ctype, upb_alloc *a);
bool upb_strtable_init2(upb_strtable *table, upb_ctype_t ctype, upb_alloc *a);
bool upb_strtable_init2(upb_strtable *table, upb_ctype_t ctype,
size_t expected_size, upb_alloc *a);
void upb_inttable_uninit2(upb_inttable *table, upb_alloc *a);
void upb_strtable_uninit2(upb_strtable *table, upb_alloc *a);
@ -239,7 +241,7 @@ UPB_INLINE bool upb_inttable_init(upb_inttable *table, upb_ctype_t ctype) {
}
UPB_INLINE bool upb_strtable_init(upb_strtable *table, upb_ctype_t ctype) {
return upb_strtable_init2(table, ctype, &upb_alloc_global);
return upb_strtable_init2(table, ctype, 4, &upb_alloc_global);
}
UPB_INLINE void upb_inttable_uninit(upb_inttable *table) {

@ -313,6 +313,17 @@ UPB_INLINE uint64_t _upb_be_swap64(uint64_t val) {
}
}
UPB_INLINE int _upb_lg2ceil(int x) {
if (x == 0) return 0;
#ifdef __GNUC__
return 32 - __builtin_clz(x - 1);
#else
int lg2 = 0;
while (1 << lg2 < x) lg2++;
return lg2;
#endif
}
#include "upb/port_undef.inc"
#ifdef __cplusplus

Loading…
Cancel
Save