Merge pull request #403 from haberman/google-wyhash

Switch to ABSL's wyhash
pull/13171/head
Joshua Haberman 3 years ago committed by GitHub
commit 0010bd88ff
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 4
      BUILD
  2. 2
      cmake/BUILD
  3. 4
      cmake/CMakeLists.txt
  4. 1
      cmake/make_cmakelists.py
  5. 42
      third_party/wyhash/BUILD
  6. 25
      third_party/wyhash/LICENSE
  7. 145
      third_party/wyhash/wyhash.h
  8. 138
      upb/table.c

@ -102,7 +102,6 @@ cc_library(
deps = [
":fastdecode",
":port",
"//third_party/wyhash",
],
)
@ -260,7 +259,6 @@ cc_library(
srcs = ["upb.c"],
hdrs = ["upb.h"],
copts = UPB_DEFAULT_COPTS,
deps = ["//third_party/wyhash"],
)
upb_amalgamation(
@ -287,7 +285,6 @@ cc_library(
srcs = ["php-upb.c"],
hdrs = ["php-upb.h"],
copts = UPB_DEFAULT_COPTS,
deps = ["//third_party/wyhash"],
)
upb_amalgamation(
@ -313,7 +310,6 @@ cc_library(
srcs = ["ruby-upb.c"],
hdrs = ["ruby-upb.h"],
copts = UPB_DEFAULT_COPTS,
deps = ["//third_party/wyhash"],
)
exports_files(

@ -53,7 +53,6 @@ genrule(
"//:BUILD",
"//:WORKSPACE",
"//:cmake_files",
"//third_party/wyhash:cmake_files",
":cmake_files",
],
outs = ["generated-in/CMakeLists.txt"],
@ -102,7 +101,6 @@ sh_test(
data = [
":cmake_files",
"//:cmake_files",
"//third_party/wyhash:cmake_files",
],
deps = ["@bazel_tools//tools/bash/runfiles"],
)

@ -79,8 +79,7 @@ add_library(upb
../upb/upb.hpp)
target_link_libraries(upb
fastdecode
port
/third_party/wyhash)
port)
add_library(fastdecode
../upb/decode_internal.h
../upb/decode_fast.c
@ -126,6 +125,5 @@ target_link_libraries(json
add_library(table INTERFACE)
target_link_libraries(table INTERFACE
port)
add_library(wyhash INTERFACE)

@ -308,7 +308,6 @@ globs = GetDict(converter)
exec(open("WORKSPACE").read(), GetDict(WorkspaceFileFunctions(converter)))
exec(open("BUILD").read(), GetDict(BuildFileFunctions(converter)))
exec(open("third_party/wyhash/BUILD").read(), GetDict(BuildFileFunctions(converter)))
with open(sys.argv[1], "w") as f:
f.write(converter.convert())

@ -1,42 +0,0 @@
# Copyright (c) 2009-2021, Google LLC
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of Google LLC nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
licenses(["unencumbered"])
exports_files(["LICENSE"])
cc_library(
name = "wyhash",
hdrs = ["wyhash.h"],
visibility = ["//:__pkg__"],
)
filegroup(
name = "cmake_files",
srcs = glob([
"**/*",
]),
visibility = ["//cmake:__pkg__"],
)

@ -1,25 +0,0 @@
This is free and unencumbered software released into the public domain.
Anyone is free to copy, modify, publish, use, compile, sell, or
distribute this software, either in source code form or as a compiled
binary, for any purpose, commercial or non-commercial, and by any
means.
In jurisdictions that recognize copyright laws, the author or authors
of this software dedicate any and all copyright interest in the
software to the public domain. We make this dedication for the benefit
of the public at large and to the detriment of our heirs and
successors. We intend this dedication to be an overt act of
relinquishment in perpetuity of all present and future rights to this
software under copyright law.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
For more information, please refer to <http://unlicense.org/>

@ -1,145 +0,0 @@
/* Copyright 2020 王一 Wang Yi <godspeed_china@yeah.net>
This is free and unencumbered software released into the public domain. http://unlicense.org/
See github.com/wangyi-fudan/wyhash/ LICENSE
*/
#ifndef wyhash_final_version
#define wyhash_final_version
//defines that change behavior
#ifndef WYHASH_CONDOM
#define WYHASH_CONDOM 1 //0: read 8 bytes before and after boundaries, dangerous but fastest. 1: normal valid behavior 2: extra protection against entropy loss (probability=2^-63), aka. "blind multiplication"
#endif
#define WYHASH_32BIT_MUM 0 //faster on 32 bit system
//includes
#include <stdint.h>
#include <string.h>
#if defined(_MSC_VER) && defined(_M_X64)
#include <intrin.h>
#pragma intrinsic(_umul128)
#endif
#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
#define _likely_(x) __builtin_expect(x,1)
#define _unlikely_(x) __builtin_expect(x,0)
#else
#define _likely_(x) (x)
#define _unlikely_(x) (x)
#endif
//mum function
static inline uint64_t _wyrot(uint64_t x) { return (x>>32)|(x<<32); }
static inline void _wymum(uint64_t *A, uint64_t *B){
#if(WYHASH_32BIT_MUM)
uint64_t hh=(*A>>32)*(*B>>32), hl=(*A>>32)*(unsigned)*B, lh=(unsigned)*A*(*B>>32), ll=(uint64_t)(unsigned)*A*(unsigned)*B;
#if(WYHASH_CONDOM>1)
*A^=_wyrot(hl)^hh; *B^=_wyrot(lh)^ll;
#else
*A=_wyrot(hl)^hh; *B=_wyrot(lh)^ll;
#endif
#elif defined(__SIZEOF_INT128__)
__uint128_t r=*A; r*=*B;
#if(WYHASH_CONDOM>1)
*A^=(uint64_t)r; *B^=(uint64_t)(r>>64);
#else
*A=(uint64_t)r; *B=(uint64_t)(r>>64);
#endif
#elif defined(_MSC_VER) && defined(_M_X64)
#if(WYHASH_CONDOM>1)
uint64_t a, b;
a=_umul128(*A,*B,&b);
*A^=a; *B^=b;
#else
*A=_umul128(*A,*B,B);
#endif
#else
uint64_t ha=*A>>32, hb=*B>>32, la=(uint32_t)*A, lb=(uint32_t)*B, hi, lo;
uint64_t rh=ha*hb, rm0=ha*lb, rm1=hb*la, rl=la*lb, t=rl+(rm0<<32), c=t<rl;
lo=t+(rm1<<32); c+=lo<t; hi=rh+(rm0>>32)+(rm1>>32)+c;
#if(WYHASH_CONDOM>1)
*A^=lo; *B^=hi;
#else
*A=lo; *B=hi;
#endif
#endif
}
static inline uint64_t _wymix(uint64_t A, uint64_t B){ _wymum(&A,&B); return A^B; }
//read functions
#ifndef WYHASH_LITTLE_ENDIAN
#if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
#define WYHASH_LITTLE_ENDIAN 1
#elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
#define WYHASH_LITTLE_ENDIAN 0
#endif
#endif
#if (WYHASH_LITTLE_ENDIAN)
static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return v;}
static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return v;}
#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return __builtin_bswap64(v);}
static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return __builtin_bswap32(v);}
#elif defined(_MSC_VER)
static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return _byteswap_uint64(v);}
static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return _byteswap_ulong(v);}
#endif
static inline uint64_t _wyr3(const uint8_t *p, unsigned k) { return (((uint64_t)p[0])<<16)|(((uint64_t)p[k>>1])<<8)|p[k-1];}
//wyhash function
static inline uint64_t _wyfinish16(const uint8_t *p, uint64_t len, uint64_t seed, const uint64_t *secret, uint64_t i){
#if(WYHASH_CONDOM>0)
uint64_t a, b;
if(_likely_(i<=8)){
if(_likely_(i>=4)){ a=_wyr4(p); b=_wyr4(p+i-4); }
else if (_likely_(i)){ a=_wyr3(p,i); b=0; }
else a=b=0;
}
else{ a=_wyr8(p); b=_wyr8(p+i-8); }
return _wymix(secret[1]^len,_wymix(a^secret[1], b^seed));
#else
#define oneshot_shift ((i<8)*((8-i)<<3))
return _wymix(secret[1]^len,_wymix((_wyr8(p)<<oneshot_shift)^secret[1],(_wyr8(p+i-8)>>oneshot_shift)^seed));
#endif
}
static inline uint64_t _wyfinish(const uint8_t *p, uint64_t len, uint64_t seed, const uint64_t *secret, uint64_t i){
if(_likely_(i<=16)) return _wyfinish16(p,len,seed,secret,i);
return _wyfinish(p+16,len,_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed),secret,i-16);
}
static inline uint64_t wyhash(const void *key, uint64_t len, uint64_t seed, const uint64_t *secret){
const uint8_t *p=(const uint8_t *)key;
uint64_t i=len; seed^=*secret;
if(_unlikely_(i>64)){
uint64_t see1=seed;
do{
seed=_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed)^_wymix(_wyr8(p+16)^secret[2],_wyr8(p+24)^seed);
see1=_wymix(_wyr8(p+32)^secret[3],_wyr8(p+40)^see1)^_wymix(_wyr8(p+48)^secret[4],_wyr8(p+56)^see1);
p+=64; i-=64;
}while(i>64);
seed^=see1;
}
return _wyfinish(p,len,seed,secret,i);
}
//utility functions
const uint64_t _wyp[5] = {0xa0761d6478bd642full, 0xe7037ed1a0b428dbull, 0x8ebc6af09c88c6e3ull, 0x589965cc75374cc3ull, 0x1d8e4e27c47d124full};
static inline uint64_t wyhash64(uint64_t A, uint64_t B){ A^=_wyp[0]; B^=_wyp[1]; _wymum(&A,&B); return _wymix(A^_wyp[0],B^_wyp[1]);}
static inline uint64_t wyrand(uint64_t *seed){ *seed+=_wyp[0]; return _wymix(*seed,*seed^_wyp[1]);}
static inline double wy2u01(uint64_t r){ const double _wynorm=1.0/(1ull<<52); return (r>>12)*_wynorm;}
static inline double wy2gau(uint64_t r){ const double _wynorm=1.0/(1ull<<20); return ((r&0x1fffff)+((r>>21)&0x1fffff)+((r>>42)&0x1fffff))*_wynorm-3.0;}
static inline uint64_t wy2u0k(uint64_t r, uint64_t k){ _wymum(&r,&k); return k; }
static inline void make_secret(uint64_t seed, uint64_t *secret){
uint8_t c[] = {15, 23, 27, 29, 30, 39, 43, 45, 46, 51, 53, 54, 57, 58, 60, 71, 75, 77, 78, 83, 85, 86, 89, 90, 92, 99, 101, 102, 105, 106, 108, 113, 114, 116, 120, 135, 139, 141, 142, 147, 149, 150, 153, 154, 156, 163, 165, 166, 169, 170, 172, 177, 178, 180, 184, 195, 197, 198, 201, 202, 204, 209, 210, 212, 216, 225, 226, 228, 232, 240 };
for(size_t i=0;i<5;i++){
uint8_t ok;
do{
ok=1; secret[i]=0;
for(size_t j=0;j<64;j+=8) secret[i]|=((uint64_t)c[wyrand(&seed)%sizeof(c)])<<j;
if(secret[i]%2==0){ ok=0; continue; }
for(size_t j=0;j<i;j++)
#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
if(__builtin_popcountll(secret[j]^secret[i])!=32){ ok=0; break; }
#elif defined(_MSC_VER) && defined(_M_X64)
if(_mm_popcnt_u64(secret[j]^secret[i])!=32){ ok=0; break; }
#endif
if(!ok)continue;
for(uint64_t j=3;j<0x100000000ull;j+=2) if(secret[i]%j==0){ ok=0; break; }
}while(!ok);
}
}
#endif

@ -33,7 +33,6 @@
#include <string.h>
#include "third_party/wyhash/wyhash.h"
#include "upb/table_internal.h"
/* Must be last. */
@ -308,8 +307,143 @@ static upb_tabkey strcopy(lookupkey_t k2, upb_arena *a) {
return (uintptr_t)str;
}
/* Adapted from ABSL's wyhash. */
static uint64_t UnalignedLoad64(const void *p) {
uint64_t val;
memcpy(&val, p, 8);
return val;
}
static uint32_t UnalignedLoad32(const void *p) {
uint32_t val;
memcpy(&val, p, 4);
return val;
}
#if defined(_MSC_VER) && defined(_M_X64)
#include <intrin.h>
#endif
/* Computes a * b, returning the low 64 bits of the result and storing the high
* 64 bits in |*high|. */
static uint64_t upb_umul128(uint64_t v0, uint64_t v1, uint64_t* out_high) {
#ifdef __SIZEOF_INT128__
__uint128_t p = v0;
p *= v1;
*out_high = (uint64_t)(p >> 64);
return (uint64_t)p;
#elif defined(_MSC_VER) && defined(_M_X64)
return _umul128(v0, v1, out_high);
#else
uint64_t a32 = v0 >> 32;
uint64_t a00 = v0 & 0xffffffff;
uint64_t b32 = v1 >> 32;
uint64_t b00 = v1 & 0xffffffff;
uint64_t high = a32 * b32;
uint64_t low = a00 * b00;
uint64_t mid1 = a32 * b00;
uint64_t mid2 = a00 * b32;
low += (mid1 << 32) + (mid2 << 32);
// Omit carry bit, for mixing we do not care about exact numerical precision.
high += (mid1 >> 32) + (mid2 >> 32);
*out_high = high;
return low;
#endif
}
static uint64_t WyhashMix(uint64_t v0, uint64_t v1) {
uint64_t high;
uint64_t low = upb_umul128(v0, v1, &high);
return low ^ high;
}
static uint64_t Wyhash(const void *data, size_t len, uint64_t seed,
const uint64_t salt[]) {
const uint8_t* ptr = (const uint8_t*)data;
uint64_t starting_length = (uint64_t)len;
uint64_t current_state = seed ^ salt[0];
if (len > 64) {
// If we have more than 64 bytes, we're going to handle chunks of 64
// bytes at a time. We're going to build up two separate hash states
// which we will then hash together.
uint64_t duplicated_state = current_state;
do {
uint64_t a = UnalignedLoad64(ptr);
uint64_t b = UnalignedLoad64(ptr + 8);
uint64_t c = UnalignedLoad64(ptr + 16);
uint64_t d = UnalignedLoad64(ptr + 24);
uint64_t e = UnalignedLoad64(ptr + 32);
uint64_t f = UnalignedLoad64(ptr + 40);
uint64_t g = UnalignedLoad64(ptr + 48);
uint64_t h = UnalignedLoad64(ptr + 56);
uint64_t cs0 = WyhashMix(a ^ salt[1], b ^ current_state);
uint64_t cs1 = WyhashMix(c ^ salt[2], d ^ current_state);
current_state = (cs0 ^ cs1);
uint64_t ds0 = WyhashMix(e ^ salt[3], f ^ duplicated_state);
uint64_t ds1 = WyhashMix(g ^ salt[4], h ^ duplicated_state);
duplicated_state = (ds0 ^ ds1);
ptr += 64;
len -= 64;
} while (len > 64);
current_state = current_state ^ duplicated_state;
}
// We now have a data `ptr` with at most 64 bytes and the current state
// of the hashing state machine stored in current_state.
while (len > 16) {
uint64_t a = UnalignedLoad64(ptr);
uint64_t b = UnalignedLoad64(ptr + 8);
current_state = WyhashMix(a ^ salt[1], b ^ current_state);
ptr += 16;
len -= 16;
}
// We now have a data `ptr` with at most 16 bytes.
uint64_t a = 0;
uint64_t b = 0;
if (len > 8) {
// When we have at least 9 and at most 16 bytes, set A to the first 64
// bits of the input and B to the last 64 bits of the input. Yes, they will
// overlap in the middle if we are working with less than the full 16
// bytes.
a = UnalignedLoad64(ptr);
b = UnalignedLoad64(ptr + len - 8);
} else if (len > 3) {
// If we have at least 4 and at most 8 bytes, set A to the first 32
// bits and B to the last 32 bits.
a = UnalignedLoad32(ptr);
b = UnalignedLoad32(ptr + len - 4);
} else if (len > 0) {
// If we have at least 1 and at most 3 bytes, read all of the provided
// bits into A, with some adjustments.
a = ((ptr[0] << 16) | (ptr[len >> 1] << 8) | ptr[len - 1]);
b = 0;
} else {
a = 0;
b = 0;
}
uint64_t w = WyhashMix(a ^ salt[1], b ^ current_state);
uint64_t z = salt[1] ^ starting_length;
return WyhashMix(w, z);
}
const uint64_t kWyhashSalt[5] = {
0x243F6A8885A308D3ULL, 0x13198A2E03707344ULL, 0xA4093822299F31D0ULL,
0x082EFA98EC4E6C89ULL, 0x452821E638D01377ULL,
};
static uint32_t table_hash(const char *p, size_t n) {
return wyhash(p, n, 0, _wyp);
return Wyhash(p, n, 0, kWyhashSalt);
}
static uint32_t strhash(upb_tabkey key) {

Loading…
Cancel
Save