Merge branch 'master' into fast-table

pull/13171/head
Joshua Haberman 4 years ago
commit a7993615bf
  1. 20
      .bazelrc
  2. 8
      BUILD
  3. 9
      WORKSPACE
  4. 5
      bazel/build_defs.bzl
  5. 7
      bazel/upb_proto_library.bzl
  6. 7
      benchmarks/BUILD
  7. 29
      benchmarks/BUILD.googleapis
  8. 72
      benchmarks/benchmark.cc
  9. 2
      cmake/BUILD
  10. 7
      cmake/CMakeLists.txt
  11. 4
      cmake/make_cmakelists.py
  12. 2
      cmake/upb/json/parser.c
  13. 31
      kokoro/ubuntu/build.sh
  14. 16
      tests/bindings/lua/test_upb.lua
  15. 55
      tests/pb/test_decoder.cc
  16. 4
      tests/test_generated_code.c
  17. 10
      tests/test_table.cc
  18. 18
      third_party/wyhash/BUILD
  19. 25
      third_party/wyhash/LICENSE
  20. 145
      third_party/wyhash/wyhash.h
  21. 3
      upb/decode.c
  22. 924
      upb/def.c
  23. 1
      upb/def.h
  24. 2
      upb/json/parser.rl
  25. 2
      upb/json_decode.c
  26. 2
      upb/json_encode.h
  27. 17
      upb/msg.c
  28. 24
      upb/msg.h
  29. 26
      upb/port.c
  30. 25
      upb/reflection.c
  31. 229
      upb/table.c
  32. 10
      upb/table.int.h
  33. 11
      upb/upb.h

@ -0,0 +1,20 @@
# Use our custom-configured c++ toolchain.
build:m32 --copt=-m32 --linkopt=-m32
build:asan --copt=-fsanitize=address --linkopt=-fsanitize=address
build:valgrind --run_under='valgrind --leak-check=full --error-exitcode=1'
build:ubsan --copt=-fsanitize=undefined --linkopt=-fsanitize=undefined --action_env=UBSAN_OPTIONS=halt_on_error=1:print_stacktrace=1
# Workaround for the fact that Bazel links with $CC, not $CXX
# https://github.com/bazelbuild/bazel/issues/11122#issuecomment-613746748
build:ubsan --copt=-fno-sanitize=function --copt=-fno-sanitize=vptr
build:Werror --copt=-Werror
build:Werror --per_file_copt=json/parser@-Wno-error
build:Werror --per_file_copt=com_google_protobuf@-Wno-error
# GCC's -fanalyzer, a deeper static analysis than normal warnings.
build:analyzer --copt=-fanalyzer --copt=-Werror
build:analyzer --per_file_copt=json/parser@-fno-analyzer
build:analyzer --per_file_copt=com_google_protobuf@-fno-analyzer
build:analyzer --per_file_copt=com_github_google_benchmark@-fno-analyzer

@ -52,9 +52,6 @@ upb_proto_library_copts(
cc_library(
name = "port",
srcs = [
"upb/port.c",
],
copts = UPB_DEFAULT_COPTS,
textual_hdrs = [
"upb/port_def.inc",
@ -87,6 +84,7 @@ cc_library(
deps = [
":fastdecode",
":port",
"//third_party/wyhash",
],
)
@ -321,6 +319,7 @@ cc_library(
srcs = ["upb.c"],
hdrs = ["upb.h"],
copts = UPB_DEFAULT_COPTS,
deps = ["//third_party/wyhash"],
)
upb_amalgamation(
@ -347,6 +346,7 @@ cc_library(
srcs = ["php-upb.c"],
hdrs = ["php-upb.h"],
copts = UPB_DEFAULT_COPTS,
deps = ["//third_party/wyhash"],
)
upb_amalgamation(
@ -372,6 +372,7 @@ cc_library(
srcs = ["ruby-upb.c"],
hdrs = ["ruby-upb.h"],
copts = UPB_DEFAULT_COPTS,
deps = ["//third_party/wyhash"],
)
exports_files(
@ -401,6 +402,7 @@ filegroup(
"upbc/**/*",
"upb/**/*",
"tests/**/*",
"third_party/**/*",
]),
visibility = ["//cmake:__pkg__"],
)

@ -1,6 +1,7 @@
workspace(name = "upb")
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
load("@bazel_tools//tools/build_defs/repo:git.bzl", "new_git_repository")
load("//bazel:workspace_deps.bzl", "upb_deps")
upb_deps()
@ -37,3 +38,11 @@ http_archive(
strip_prefix = "benchmark-16703ff83c1ae6d53e5155df3bb3ab0bc96083be",
sha256 = "59f918c8ccd4d74b6ac43484467b500f1d64b40cc1010daa055375b322a43ba3",
)
new_git_repository(
name = "com_google_googleapis",
remote = "https://github.com/googleapis/googleapis.git",
branch = "master",
build_file = "//benchmarks:BUILD.googleapis",
patch_cmds = ["find google -type f -name BUILD.bazel -delete"],
)

@ -22,7 +22,12 @@ UPB_DEFAULT_COPTS = select({
"-std=c99",
"-pedantic",
"-Werror=pedantic",
"-Wall",
"-Wstrict-prototypes",
# GCC (at least) emits spurious warnings for this that cannot be fixed
# without introducing redundant initialization (with runtime cost):
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80635
#"-Wno-maybe-uninitialized",
# copybara:strip_end
],
})

@ -207,10 +207,7 @@ def _upb_proto_rule_impl(ctx):
fail("proto_library rule must generate _UpbWrappedCcInfo or " +
"_UpbDefsWrappedCcInfo (aspect should have handled this).")
if type(cc_info.linking_context.libraries_to_link) == "list":
lib = cc_info.linking_context.libraries_to_link[0]
else:
lib = cc_info.linking_context.libraries_to_link.to_list()[0]
lib = cc_info.linking_context.linker_inputs.to_list()[0].libraries[0]
files = _filter_none([
lib.static_library,
lib.pic_static_library,
@ -295,6 +292,7 @@ _upb_proto_library_aspect = aspect(
attr_aspects = ["deps"],
fragments = ["cpp"],
toolchains = ["@bazel_tools//tools/cpp:toolchain_type"],
incompatible_use_toolchain_transition = True,
)
upb_proto_library = rule(
@ -358,6 +356,7 @@ _upb_proto_reflection_library_aspect = aspect(
attr_aspects = ["deps"],
fragments = ["cpp"],
toolchains = ["@bazel_tools//tools/cpp:toolchain_type"],
incompatible_use_toolchain_transition = True,
)
upb_proto_reflection_library = rule(

@ -21,6 +21,11 @@ upb_proto_reflection_library(
deps = [":benchmark_descriptor_proto"],
)
upb_proto_reflection_library(
name = "ads_upb_proto_reflection",
deps = ["@com_google_googleapis//:ads_proto"],
)
cc_proto_library(
name = "benchmark_descriptor_cc_proto",
deps = [":benchmark_descriptor_proto"],
@ -45,8 +50,10 @@ cc_binary(
":benchmark_descriptor_sv_cc_proto",
":benchmark_descriptor_upb_proto",
":benchmark_descriptor_upb_proto_reflection",
":ads_upb_proto_reflection",
"//:descriptor_upb_proto",
"//:reflection",
"@com_google_absl//absl/container:flat_hash_set",
"@com_github_google_benchmark//:benchmark_main",
"@com_google_protobuf//:protobuf",
],

@ -0,0 +1,29 @@
load(
"@rules_proto//proto:defs.bzl",
"proto_library",
)
proto_library(
name = "ads_proto",
srcs = glob([
"google/ads/googleads/v5/**/*.proto",
"google/api/**/*.proto",
"google/rpc/**/*.proto",
"google/longrunning/**/*.proto",
"google/logging/**/*.proto",
]),
#srcs = ["google/ads/googleads/v5/services/google_ads_service.proto"],
visibility = ["//visibility:public"],
deps = [
"@com_google_protobuf//:any_proto",
"@com_google_protobuf//:empty_proto",
"@com_google_protobuf//:descriptor_proto",
"@com_google_protobuf//:field_mask_proto",
"@com_google_protobuf//:duration_proto",
"@com_google_protobuf//:timestamp_proto",
"@com_google_protobuf//:struct_proto",
"@com_google_protobuf//:api_proto",
"@com_google_protobuf//:type_proto",
"@com_google_protobuf//:wrappers_proto",
],
)

@ -2,16 +2,13 @@
#include <benchmark/benchmark.h>
#include <string.h>
// For benchmarks of parsing speed.
#include "absl/container/flat_hash_set.h"
#include "benchmarks/descriptor.pb.h"
#include "benchmarks/descriptor.upb.h"
#include "benchmarks/descriptor.upbdefs.h"
#include "benchmarks/descriptor_sv.pb.h"
// For for benchmarks of building descriptors.
#include "google/protobuf/descriptor.upb.h"
#include "google/ads/googleads/v5/services/google_ads_service.upbdefs.h"
#include "google/protobuf/descriptor.pb.h"
#include "upb/def.hpp"
upb_strview descriptor = benchmarks_descriptor_proto_upbdefinit.descriptor;
@ -20,6 +17,16 @@ namespace protobuf = ::google::protobuf;
/* A buffer big enough to parse descriptor.proto without going to heap. */
char buf[65535];
void CollectFileDescriptors(const upb_def_init* file,
std::vector<upb_strview>& serialized_files,
absl::flat_hash_set<const upb_def_init*>& seen) {
if (!seen.insert(file).second) return;
for (upb_def_init **deps = file->deps; *deps; deps++) {
CollectFileDescriptors(*deps, serialized_files, seen);
}
serialized_files.push_back(file->descriptor);
}
static void BM_ArenaOneAlloc(benchmark::State& state) {
for (auto _ : state) {
upb_arena* arena = upb_arena_new();
@ -39,22 +46,28 @@ static void BM_ArenaInitialBlockOneAlloc(benchmark::State& state) {
BENCHMARK(BM_ArenaInitialBlockOneAlloc);
static void BM_LoadDescriptor_Upb(benchmark::State& state) {
size_t bytes_per_iter = 0;
for (auto _ : state) {
upb::SymbolTable symtab;
upb::Arena arena;
google_protobuf_FileDescriptorProto* file_proto =
google_protobuf_FileDescriptorProto_parse(descriptor.data,
descriptor.size, arena.ptr());
upb::FileDefPtr file_def = symtab.AddFile(file_proto, NULL);
if (!file_def) {
printf("Failed to add file.\n");
exit(1);
}
upb_benchmark_DescriptorProto_getmsgdef(symtab.ptr());
bytes_per_iter = _upb_symtab_bytesloaded(symtab.ptr());
}
state.SetBytesProcessed(state.iterations() * descriptor.size);
state.SetBytesProcessed(state.iterations() * bytes_per_iter);
}
BENCHMARK(BM_LoadDescriptor_Upb);
static void BM_LoadAdsDescriptor_Upb(benchmark::State& state) {
size_t bytes_per_iter = 0;
for (auto _ : state) {
upb::SymbolTable symtab;
google_ads_googleads_v5_services_SearchGoogleAdsRequest_getmsgdef(
symtab.ptr());
bytes_per_iter = _upb_symtab_bytesloaded(symtab.ptr());
}
state.SetBytesProcessed(state.iterations() * bytes_per_iter);
}
BENCHMARK(BM_LoadAdsDescriptor_Upb);
static void BM_LoadDescriptor_Proto2(benchmark::State& state) {
for (auto _ : state) {
protobuf::Arena arena;
@ -73,6 +86,35 @@ static void BM_LoadDescriptor_Proto2(benchmark::State& state) {
}
BENCHMARK(BM_LoadDescriptor_Proto2);
static void BM_LoadAdsDescriptor_Proto2(benchmark::State& state) {
extern upb_def_init google_ads_googleads_v5_services_google_ads_service_proto_upbdefinit;
std::vector<upb_strview> serialized_files;
absl::flat_hash_set<const upb_def_init*> seen_files;
CollectFileDescriptors(
&google_ads_googleads_v5_services_google_ads_service_proto_upbdefinit,
serialized_files, seen_files);
size_t bytes_per_iter = 0;
for (auto _ : state) {
bytes_per_iter = 0;
protobuf::Arena arena;
protobuf::DescriptorPool pool;
for (auto file : serialized_files) {
protobuf::StringPiece input(file.data, file.size);
auto proto = protobuf::Arena::CreateMessage<protobuf::FileDescriptorProto>(
&arena);
bool ok = proto->ParseFrom<protobuf::MessageLite::kMergePartial>(input) &&
pool.BuildFile(*proto) != nullptr;
if (!ok) {
printf("Failed to add file.\n");
exit(1);
}
bytes_per_iter += input.size();
}
}
state.SetBytesProcessed(state.iterations() * bytes_per_iter);
}
BENCHMARK(BM_LoadAdsDescriptor_Proto2);
static void BM_Parse_Upb_FileDesc_WithArena(benchmark::State& state) {
size_t bytes = 0;
for (auto _ : state) {

@ -28,6 +28,7 @@ genrule(
"//:BUILD",
"//:WORKSPACE",
"//:cmake_files",
"//third_party/wyhash:cmake_files",
":cmake_files",
],
outs = ["generated-in/CMakeLists.txt"],
@ -84,6 +85,7 @@ sh_test(
data = [
":cmake_files",
"//:cmake_files",
"//third_party/wyhash:cmake_files",
],
deps = ["@bazel_tools//tools/bash/runfiles"],
)

@ -61,8 +61,7 @@ endif()
enable_testing()
add_library(port
../upb/port.c)
add_library(port INTERFACE)
add_library(upb
../upb/decode.c
../upb/decode.int.h
@ -79,7 +78,8 @@ add_library(upb
../upb/upb.hpp)
target_link_libraries(upb
fastdecode
port)
port
/third_party/wyhash)
add_library(fastdecode
../upb/decode.int.h
../upb/decode_fast.c
@ -160,5 +160,6 @@ add_library(upb_json
target_link_libraries(upb_json
upb
upb_pb)
add_library(wyhash INTERFACE)

@ -180,6 +180,9 @@ class WorkspaceFileFunctions(object):
def git_repository(self, **kwargs):
pass
def new_git_repository(self, **kwargs):
pass
def bazel_version_repository(self, **kwargs):
pass
@ -280,6 +283,7 @@ globs = GetDict(converter)
exec(open("WORKSPACE").read(), GetDict(WorkspaceFileFunctions(converter)))
exec(open("BUILD").read(), GetDict(BuildFileFunctions(converter)))
exec(open("third_party/wyhash/BUILD").read(), GetDict(BuildFileFunctions(converter)))
with open(sys.argv[1], "w") as f:
f.write(converter.convert())

@ -3306,7 +3306,7 @@ static upb_json_parsermethod *parsermethod_new(upb_json_codecache *c,
upb_byteshandler_setstring(&m->input_handler_, parse, m);
upb_byteshandler_setendstr(&m->input_handler_, end, m);
upb_strtable_init2(&m->name_table, UPB_CTYPE_CONSTPTR, alloc);
upb_strtable_init2(&m->name_table, UPB_CTYPE_CONSTPTR, 4, alloc);
/* Build name_table */

@ -17,6 +17,7 @@ which bazel
bazel version
cd $(dirname $0)/../..
<<<<<<< HEAD
if which gcc; then
gcc --version
@ -30,20 +31,32 @@ if which clang; then
# The checked-in code is with fasttable not enabled.
CC=clang bazel test --test_output=errors ... --//:fasttable_enabled=true -- -cmake:test_generated_files
fi
=======
>>>>>>> master
if [[ $(uname) = "Linux" ]]; then
# Verify the ASAN build. Have to exclude test_conformance_upb as protobuf
# currently leaks memory in the conformance test runner.
bazel test --copt=-fsanitize=address --linkopt=-fsanitize=address --test_output=errors ...
if which gcc; then
gcc --version
CC=gcc bazel test --test_output=errors ...
CC=gcc bazel test -c opt --test_output=errors ...
# TODO: work through these errors and enable this.
# if gcc -fanalyzer -x c /dev/null -c -o /dev/null; then
# CC=gcc bazel test --copt=-fanalyzer --test_output=errors ...
# fi
fi
# Verify the UBSan build. Have to exclude Lua as the version we are using
# fails some UBSan tests.
if which clang; then
CC=clang bazel test --test_output=errors ...
CC=clang bazel test --test_output=errors -c opt ...
# For some reason kokoro doesn't have Clang available right now.
#CC=clang CXX=clang++ bazel test -c dbg --copt=-fsanitize=undefined --copt=-fno-sanitize=function,vptr --linkopt=-fsanitize=undefined --action_env=UBSAN_OPTIONS=halt_on_error=1:print_stacktrace=1 -- :all -:test_lua
if [[ $(uname) = "Linux" ]]; then
CC=clang bazel test --test_output=errors --config=m32 ...
CC=clang bazel test --test_output=errors --config=asan ...
# TODO: update to a newer Lua that hopefully does not trigger UBSAN.
CC=clang bazel test --test_output=errors --config=ubsan ... -- -tests/bindings/lua:test_lua
fi
fi
if which valgrind; then
bazel test --run_under='valgrind --leak-check=full --error-exitcode=1' ... -- -tests:test_conformance_upb -cmake:cmake_build
bazel test --config=valgrind ... -- -tests:test_conformance_upb -cmake:cmake_build
fi

@ -104,7 +104,7 @@ function test_utf8()
upb.decode(test_messages_proto3.TestAllTypesProto3, serialized)
end)
-- TOOD(haberman): should proto3 accessors also check UTF-8 at set time?
-- TODO(haberman): should proto3 accessors also check UTF-8 at set time?
end
function test_string_double_map()
@ -587,6 +587,20 @@ function test_foo()
assert_equal(set.file[1].name, "google/protobuf/descriptor.proto")
end
function test_descriptor_error()
local symtab = upb.SymbolTable()
local file = descriptor.FileDescriptorProto()
file.name = "test.proto"
file.message_type[1] = descriptor.DescriptorProto{
name = "ABC"
}
file.message_type[2] = descriptor.DescriptorProto{
name = "BC."
}
assert_error(function () symtab:add_file(upb.encode(file)) end)
assert_nil(symtab:lookup_msg("ABC"))
end
function test_gc()
local top = test_messages_proto3.TestAllTypesProto3()
local n = 100

@ -52,17 +52,6 @@
#define PRINT_FAILURE(expr) \
fprintf(stderr, "Assertion failed: %s:%d\n", __FILE__, __LINE__); \
fprintf(stderr, "expr: %s\n", #expr); \
if (testhash) { \
fprintf(stderr, "assertion failed running test %x.\n", testhash); \
if (!filter_hash) { \
fprintf(stderr, \
"Run with the arg %x to run only this test. " \
"(This will also turn on extra debugging output)\n", \
testhash); \
} \
fprintf(stderr, "Failed at %02.2f%% through tests.\n", \
(float)completed * 100 / total); \
}
#define MAX_NESTING 64
@ -467,17 +456,6 @@ upb::pb::DecoderPtr CreateDecoder(upb::Arena* arena,
return ret;
}
uint32_t Hash(const string& proto, const string* expected_output, size_t seam1,
size_t seam2, bool may_skip) {
uint32_t hash = upb_murmur_hash2(proto.c_str(), proto.size(), 0);
if (expected_output)
hash = upb_murmur_hash2(expected_output->c_str(), expected_output->size(), hash);
hash = upb_murmur_hash2(&seam1, sizeof(seam1), hash);
hash = upb_murmur_hash2(&seam2, sizeof(seam2), hash);
hash = upb_murmur_hash2(&may_skip, sizeof(may_skip), hash);
return hash;
}
void CheckBytesParsed(upb::pb::DecoderPtr decoder, size_t ofs) {
// We can't have parsed more data than the decoder callback is telling us it
// parsed.
@ -506,13 +484,11 @@ void do_run_decoder(VerboseParserEnvironment* env, upb::pb::DecoderPtr decoder,
env->Reset(proto.c_str(), proto.size(), may_skip, expected_output == NULL);
decoder.Reset();
testhash = Hash(proto, expected_output, i, j, may_skip);
if (filter_hash && testhash != filter_hash) return;
if (test_mode != COUNT_ONLY) {
output.clear();
if (filter_hash) {
fprintf(stderr, "RUNNING TEST CASE, hash=%x\n", testhash);
fprintf(stderr, "RUNNING TEST CASE\n");
fprintf(stderr, "Input (len=%u): ", (unsigned)proto.size());
PrintBinary(proto);
fprintf(stderr, "\n");
@ -571,7 +547,6 @@ void run_decoder(const string& proto, const string* expected_output) {
}
}
}
testhash = 0;
}
const static string thirty_byte_nop = cat(
@ -871,23 +846,17 @@ void test_valid() {
// Empty protobuf where we never call PutString between
// StartString/EndString.
// Randomly generated hash for this test, hope it doesn't conflict with others
// by chance.
const uint32_t emptyhash = 0x5709be8e;
if (!filter_hash || filter_hash == testhash) {
testhash = emptyhash;
upb::Status status;
upb::Arena arena;
upb::Sink sink(global_handlers, &closures[0]);
upb::pb::DecoderPtr decoder =
CreateDecoder(&arena, global_method, sink, &status);
output.clear();
bool ok = upb::PutBuffer(std::string(), decoder.input());
ASSERT(ok);
ASSERT(status.ok());
if (test_mode == ALL_HANDLERS) {
ASSERT(output == string("<\n>\n"));
}
upb::Status status;
upb::Arena arena;
upb::Sink sink(global_handlers, &closures[0]);
upb::pb::DecoderPtr decoder =
CreateDecoder(&arena, global_method, sink, &status);
output.clear();
bool ok = upb::PutBuffer(std::string(), decoder.input());
ASSERT(ok);
ASSERT(status.ok());
if (test_mode == ALL_HANDLERS) {
ASSERT(output == string("<\n>\n"));
}
test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_DOUBLE,

@ -57,9 +57,9 @@ static void test_scalars(void) {
ASSERT(protobuf_test_messages_proto3_TestAllTypesProto3_optional_uint64(
msg2) == 40);
ASSERT(protobuf_test_messages_proto3_TestAllTypesProto3_optional_float(
msg2) == 50.5);
msg2) - 50.5 < 0.01);
ASSERT(protobuf_test_messages_proto3_TestAllTypesProto3_optional_double(
msg2) == 60.6);
msg2) - 60.6 < 0.01);
ASSERT(protobuf_test_messages_proto3_TestAllTypesProto3_optional_bool(
msg2) == 1);
val = protobuf_test_messages_proto3_TestAllTypesProto3_optional_string(msg2);

@ -618,6 +618,16 @@ void test_delete() {
upb_inttable_uninit(&t);
}
void test_init() {
for (int i = 0; i < 2048; i++) {
/* Tests that the size calculations in init() (lg2 size for target load)
* work for all expected sizes. */
upb_strtable t;
upb_strtable_init2(&t, UPB_CTYPE_BOOL, i, &upb_alloc_global);
upb_strtable_uninit(&t);
}
}
extern "C" {
int run_tests(int argc, char *argv[]) {

@ -0,0 +1,18 @@
licenses(["unencumbered"])
exports_files(["LICENSE"])
cc_library(
name = "wyhash",
hdrs = ["wyhash.h"],
visibility = ["//:__pkg__"],
)
filegroup(
name = "cmake_files",
srcs = glob([
"**/*",
]),
visibility = ["//cmake:__pkg__"],
)

@ -0,0 +1,25 @@
This is free and unencumbered software released into the public domain.
Anyone is free to copy, modify, publish, use, compile, sell, or
distribute this software, either in source code form or as a compiled
binary, for any purpose, commercial or non-commercial, and by any
means.
In jurisdictions that recognize copyright laws, the author or authors
of this software dedicate any and all copyright interest in the
software to the public domain. We make this dedication for the benefit
of the public at large and to the detriment of our heirs and
successors. We intend this dedication to be an overt act of
relinquishment in perpetuity of all present and future rights to this
software under copyright law.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
For more information, please refer to <http://unlicense.org/>

@ -0,0 +1,145 @@
/* Copyright 2020 王一 Wang Yi <godspeed_china@yeah.net>
This is free and unencumbered software released into the public domain. http://unlicense.org/
See github.com/wangyi-fudan/wyhash/ LICENSE
*/
#ifndef wyhash_final_version
#define wyhash_final_version
//defines that change behavior
#ifndef WYHASH_CONDOM
#define WYHASH_CONDOM 1 //0: read 8 bytes before and after boundaries, dangerous but fastest. 1: normal valid behavior 2: extra protection against entropy loss (probability=2^-63), aka. "blind multiplication"
#endif
#define WYHASH_32BIT_MUM 0 //faster on 32 bit system
//includes
#include <stdint.h>
#include <string.h>
#if defined(_MSC_VER) && defined(_M_X64)
#include <intrin.h>
#pragma intrinsic(_umul128)
#endif
#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
#define _likely_(x) __builtin_expect(x,1)
#define _unlikely_(x) __builtin_expect(x,0)
#else
#define _likely_(x) (x)
#define _unlikely_(x) (x)
#endif
//mum function
static inline uint64_t _wyrot(uint64_t x) { return (x>>32)|(x<<32); }
static inline void _wymum(uint64_t *A, uint64_t *B){
#if(WYHASH_32BIT_MUM)
uint64_t hh=(*A>>32)*(*B>>32), hl=(*A>>32)*(unsigned)*B, lh=(unsigned)*A*(*B>>32), ll=(uint64_t)(unsigned)*A*(unsigned)*B;
#if(WYHASH_CONDOM>1)
*A^=_wyrot(hl)^hh; *B^=_wyrot(lh)^ll;
#else
*A=_wyrot(hl)^hh; *B=_wyrot(lh)^ll;
#endif
#elif defined(__SIZEOF_INT128__)
__uint128_t r=*A; r*=*B;
#if(WYHASH_CONDOM>1)
*A^=(uint64_t)r; *B^=(uint64_t)(r>>64);
#else
*A=(uint64_t)r; *B=(uint64_t)(r>>64);
#endif
#elif defined(_MSC_VER) && defined(_M_X64)
#if(WYHASH_CONDOM>1)
uint64_t a, b;
a=_umul128(*A,*B,&b);
*A^=a; *B^=b;
#else
*A=_umul128(*A,*B,B);
#endif
#else
uint64_t ha=*A>>32, hb=*B>>32, la=(uint32_t)*A, lb=(uint32_t)*B, hi, lo;
uint64_t rh=ha*hb, rm0=ha*lb, rm1=hb*la, rl=la*lb, t=rl+(rm0<<32), c=t<rl;
lo=t+(rm1<<32); c+=lo<t; hi=rh+(rm0>>32)+(rm1>>32)+c;
#if(WYHASH_CONDOM>1)
*A^=lo; *B^=hi;
#else
*A=lo; *B=hi;
#endif
#endif
}
static inline uint64_t _wymix(uint64_t A, uint64_t B){ _wymum(&A,&B); return A^B; }
//read functions
#ifndef WYHASH_LITTLE_ENDIAN
#if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
#define WYHASH_LITTLE_ENDIAN 1
#elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
#define WYHASH_LITTLE_ENDIAN 0
#endif
#endif
#if (WYHASH_LITTLE_ENDIAN)
static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return v;}
static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return v;}
#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return __builtin_bswap64(v);}
static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return __builtin_bswap32(v);}
#elif defined(_MSC_VER)
static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return _byteswap_uint64(v);}
static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return _byteswap_ulong(v);}
#endif
static inline uint64_t _wyr3(const uint8_t *p, unsigned k) { return (((uint64_t)p[0])<<16)|(((uint64_t)p[k>>1])<<8)|p[k-1];}
//wyhash function
static inline uint64_t _wyfinish16(const uint8_t *p, uint64_t len, uint64_t seed, const uint64_t *secret, uint64_t i){
#if(WYHASH_CONDOM>0)
uint64_t a, b;
if(_likely_(i<=8)){
if(_likely_(i>=4)){ a=_wyr4(p); b=_wyr4(p+i-4); }
else if (_likely_(i)){ a=_wyr3(p,i); b=0; }
else a=b=0;
}
else{ a=_wyr8(p); b=_wyr8(p+i-8); }
return _wymix(secret[1]^len,_wymix(a^secret[1], b^seed));
#else
#define oneshot_shift ((i<8)*((8-i)<<3))
return _wymix(secret[1]^len,_wymix((_wyr8(p)<<oneshot_shift)^secret[1],(_wyr8(p+i-8)>>oneshot_shift)^seed));
#endif
}
static inline uint64_t _wyfinish(const uint8_t *p, uint64_t len, uint64_t seed, const uint64_t *secret, uint64_t i){
if(_likely_(i<=16)) return _wyfinish16(p,len,seed,secret,i);
return _wyfinish(p+16,len,_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed),secret,i-16);
}
static inline uint64_t wyhash(const void *key, uint64_t len, uint64_t seed, const uint64_t *secret){
const uint8_t *p=(const uint8_t *)key;
uint64_t i=len; seed^=*secret;
if(_unlikely_(i>64)){
uint64_t see1=seed;
do{
seed=_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed)^_wymix(_wyr8(p+16)^secret[2],_wyr8(p+24)^seed);
see1=_wymix(_wyr8(p+32)^secret[3],_wyr8(p+40)^see1)^_wymix(_wyr8(p+48)^secret[4],_wyr8(p+56)^see1);
p+=64; i-=64;
}while(i>64);
seed^=see1;
}
return _wyfinish(p,len,seed,secret,i);
}
//utility functions
const uint64_t _wyp[5] = {0xa0761d6478bd642full, 0xe7037ed1a0b428dbull, 0x8ebc6af09c88c6e3ull, 0x589965cc75374cc3ull, 0x1d8e4e27c47d124full};
static inline uint64_t wyhash64(uint64_t A, uint64_t B){ A^=_wyp[0]; B^=_wyp[1]; _wymum(&A,&B); return _wymix(A^_wyp[0],B^_wyp[1]);}
static inline uint64_t wyrand(uint64_t *seed){ *seed+=_wyp[0]; return _wymix(*seed,*seed^_wyp[1]);}
static inline double wy2u01(uint64_t r){ const double _wynorm=1.0/(1ull<<52); return (r>>12)*_wynorm;}
static inline double wy2gau(uint64_t r){ const double _wynorm=1.0/(1ull<<20); return ((r&0x1fffff)+((r>>21)&0x1fffff)+((r>>42)&0x1fffff))*_wynorm-3.0;}
static inline uint64_t wy2u0k(uint64_t r, uint64_t k){ _wymum(&r,&k); return k; }
static inline void make_secret(uint64_t seed, uint64_t *secret){
uint8_t c[] = {15, 23, 27, 29, 30, 39, 43, 45, 46, 51, 53, 54, 57, 58, 60, 71, 75, 77, 78, 83, 85, 86, 89, 90, 92, 99, 101, 102, 105, 106, 108, 113, 114, 116, 120, 135, 139, 141, 142, 147, 149, 150, 153, 154, 156, 163, 165, 166, 169, 170, 172, 177, 178, 180, 184, 195, 197, 198, 201, 202, 204, 209, 210, 212, 216, 225, 226, 228, 232, 240 };
for(size_t i=0;i<5;i++){
uint8_t ok;
do{
ok=1; secret[i]=0;
for(size_t j=0;j<64;j+=8) secret[i]|=((uint64_t)c[wyrand(&seed)%sizeof(c)])<<j;
if(secret[i]%2==0){ ok=0; continue; }
for(size_t j=0;j<i;j++)
#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
if(__builtin_popcountll(secret[j]^secret[i])!=32){ ok=0; break; }
#elif defined(_MSC_VER) && defined(_M_X64)
if(_mm_popcnt_u64(secret[j]^secret[i])!=32){ ok=0; break; }
#endif
if(!ok)continue;
for(uint64_t j=3;j<0x100000000ull;j+=2) if(secret[i]%j==0){ ok=0; break; }
}while(!ok);
}
}
#endif

@ -575,7 +575,8 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
int ndx = field->descriptortype;
if (_upb_isrepeated(field)) ndx += 18;
ptr = decode_varint32(d, ptr, &val.size);
if (val.size >= INT32_MAX || ptr - d->end + val.size > d->limit) {
if (val.size >= INT32_MAX ||
ptr - d->end + (int32_t)val.size > d->limit) {
decode_err(d); /* Length overflow. */
}
op = delim_ops[ndx];

File diff suppressed because it is too large Load Diff

@ -293,6 +293,7 @@ int upb_symtab_filecount(const upb_symtab *s);
const upb_filedef *upb_symtab_addfile(
upb_symtab *s, const google_protobuf_FileDescriptorProto *file,
upb_status *status);
size_t _upb_symtab_bytesloaded(const upb_symtab *s);
/* For generated code only: loads a generated descriptor. */
typedef struct upb_def_init {

@ -2869,7 +2869,7 @@ static upb_json_parsermethod *parsermethod_new(upb_json_codecache *c,
upb_byteshandler_setstring(&m->input_handler_, parse, m);
upb_byteshandler_setendstr(&m->input_handler_, end, m);
upb_strtable_init2(&m->name_table, UPB_CTYPE_CONSTPTR, alloc);
upb_strtable_init2(&m->name_table, UPB_CTYPE_CONSTPTR, 4, alloc);
/* Build name_table */

@ -396,6 +396,8 @@ static void jsondec_resize(jsondec *d, char **buf, char **end, char **buf_end) {
size_t size = UPB_MAX(8, 2 * oldsize);
*buf = upb_arena_realloc(d->arena, *buf, len, size);
if (!*buf) jsondec_err(d, "Out of memory");
*end = *buf + len;
*buf_end = *buf + size;
}

@ -9,7 +9,7 @@ extern "C" {
#endif
enum {
/* When set, emits 0/default values. TOOD(haberman): proto3 only? */
/* When set, emits 0/default values. TODO(haberman): proto3 only? */
UPB_JSONENC_EMITDEFAULTS = 1,
/* When set, use normal (snake_caes) field names instead of JSON (camelCase)

@ -11,7 +11,7 @@ static const size_t overhead = sizeof(upb_msg_internal);
static const upb_msg_internal *upb_msg_getinternal_const(const upb_msg *msg) {
ptrdiff_t size = sizeof(upb_msg_internal);
return UPB_PTR_AT(msg, -size, upb_msg_internal);
return (upb_msg_internal*)((char*)msg - size);
}
upb_msg *_upb_msg_new(const upb_msglayout *l, upb_arena *a) {
@ -111,13 +111,16 @@ void *_upb_array_resize_fallback(upb_array **arr_ptr, size_t size,
bool _upb_array_append_fallback(upb_array **arr_ptr, const void *value,
int elem_size_lg2, upb_arena *arena) {
upb_array *arr = getorcreate_array(arr_ptr, elem_size_lg2, arena);
size_t elem = arr->len;
char *data;
if (!arr) return false;
if (!arr || !_upb_array_resize(arr, elem + 1, arena)) return false;
size_t elems = arr->len;
data = _upb_array_ptr(arr);
memcpy(data + (elem << elem_size_lg2), value, 1 << elem_size_lg2);
if (!_upb_array_resize(arr, elems + 1, arena)) {
return false;
}
char *data = _upb_array_ptr(arr);
memcpy(data + (elems << elem_size_lg2), value, 1 << elem_size_lg2);
return true;
}
@ -130,7 +133,7 @@ upb_map *_upb_map_new(upb_arena *a, size_t key_size, size_t value_size) {
return NULL;
}
upb_strtable_init2(&map->table, UPB_CTYPE_INT32, upb_arena_alloc(a));
upb_strtable_init2(&map->table, UPB_CTYPE_INT32, 4, upb_arena_alloc(a));
map->key_size = key_size;
map->val_size = value_size;

@ -110,7 +110,8 @@ UPB_INLINE upb_msg *_upb_msg_new_inl(const upb_msglayout *l, upb_arena *a) {
upb_msg *_upb_msg_new(const upb_msglayout *l, upb_arena *a);
UPB_INLINE upb_msg_internal *upb_msg_getinternal(upb_msg *msg) {
return UPB_PTR_AT(msg, -sizeof(upb_msg_internal), upb_msg_internal);
ptrdiff_t size = sizeof(upb_msg_internal);
return (upb_msg_internal*)((char*)msg - size);
}
/* Clears the given message. */
@ -205,9 +206,11 @@ typedef struct {
uintptr_t data; /* Tagged ptr: low 3 bits of ptr are lg2(elem size). */
size_t len; /* Measured in elements. */
size_t size; /* Measured in elements. */
uint64_t junk;
} upb_array;
UPB_INLINE const void *_upb_array_constptr(const upb_array *arr) {
UPB_ASSERT((arr->data & 7) <= 4);
return (void*)(arr->data & ~(uintptr_t)7);
}
@ -222,15 +225,17 @@ UPB_INLINE void *_upb_array_ptr(upb_array *arr) {
UPB_INLINE uintptr_t _upb_tag_arrptr(void* ptr, int elem_size_lg2) {
UPB_ASSERT(elem_size_lg2 <= 4);
UPB_ASSERT(((uintptr_t)ptr & 7) == 0);
return (uintptr_t)ptr | (unsigned)elem_size_lg2;
}
UPB_INLINE upb_array *_upb_array_new(upb_arena *a, size_t init_size,
int elem_size_lg2) {
const size_t arr_size = UPB_ALIGN_UP(sizeof(upb_array), 8);
const size_t bytes = sizeof(upb_array) + (init_size << elem_size_lg2);
upb_array *arr = (upb_array*)upb_arena_malloc(a, bytes);
if (!arr) return NULL;
arr->data = _upb_tag_arrptr(arr + 1, elem_size_lg2);
arr->data = _upb_tag_arrptr(UPB_PTR_AT(arr, arr_size, void), elem_size_lg2);
arr->len = 0;
arr->size = init_size;
return arr;
@ -403,17 +408,17 @@ UPB_INLINE void _upb_map_fromkey(upb_strview key, void* out, size_t size) {
}
}
UPB_INLINE upb_value _upb_map_tovalue(const void *val, size_t size,
upb_arena *a) {
upb_value ret = {0};
UPB_INLINE bool _upb_map_tovalue(const void *val, size_t size, upb_value *msgval,
upb_arena *a) {
if (size == UPB_MAPTYPE_STRING) {
upb_strview *strp = (upb_strview*)upb_arena_malloc(a, sizeof(*strp));
if (!strp) return false;
*strp = *(upb_strview*)val;
ret = upb_value_ptr(strp);
*msgval = upb_value_ptr(strp);
} else {
memcpy(&ret, val, size);
memcpy(msgval, val, size);
}
return ret;
return true;
}
UPB_INLINE void _upb_map_fromvalue(upb_value val, void* out, size_t size) {
@ -455,7 +460,8 @@ UPB_INLINE void* _upb_map_next(const upb_map *map, size_t *iter) {
UPB_INLINE bool _upb_map_set(upb_map *map, const void *key, size_t key_size,
void *val, size_t val_size, upb_arena *arena) {
upb_strview strkey = _upb_map_tokey(key, key_size);
upb_value tabval = _upb_map_tovalue(val, val_size, arena);
upb_value tabval = {0};
if (!_upb_map_tovalue(val, val_size, &tabval, arena)) return false;
upb_alloc *a = upb_arena_alloc(arena);
/* TODO(haberman): add overwrite operation to minimize number of lookups. */

@ -1,26 +0,0 @@
#include "upb/port_def.inc"
#ifdef UPB_MSVC_VSNPRINTF
/* Visual C++ earlier than 2015 doesn't have standard C99 snprintf and
* vsnprintf. To support them, missing functions are manually implemented
* using the existing secure functions. */
int msvc_vsnprintf(char* s, size_t n, const char* format, va_list arg) {
if (!s) {
return _vscprintf(format, arg);
}
int ret = _vsnprintf_s(s, n, _TRUNCATE, format, arg);
if (ret < 0) {
ret = _vscprintf(format, arg);
}
return ret;
}
int msvc_snprintf(char* s, size_t n, const char* format, ...) {
va_list arg;
va_start(arg, format);
int ret = msvc_vsnprintf(s, n, format, arg);
va_end(arg);
return ret;
}
#endif

@ -96,20 +96,17 @@ bool upb_msg_has(const upb_msg *msg, const upb_fielddef *f) {
const upb_fielddef *upb_msg_whichoneof(const upb_msg *msg,
const upb_oneofdef *o) {
upb_oneof_iter i;
const upb_fielddef *f;
const upb_msglayout_field *field;
const upb_msgdef *m = upb_oneofdef_containingtype(o);
uint32_t oneof_case;
/* This is far from optimal. */
upb_oneof_begin(&i, o);
if (upb_oneof_done(&i)) return false;
f = upb_oneof_iter_field(&i);
field = upb_fielddef_layout(f);
oneof_case = _upb_getoneofcase_field(msg, field);
return oneof_case ? upb_msgdef_itof(m, oneof_case) : NULL;
const upb_fielddef *f = upb_oneofdef_field(o, 0);
if (upb_oneofdef_issynthetic(o)) {
UPB_ASSERT(upb_oneofdef_fieldcount(o) == 1);
return upb_msg_has(msg, f) ? f : NULL;
} else {
const upb_msglayout_field *field = upb_fielddef_layout(f);
uint32_t oneof_case = _upb_getoneofcase_field(msg, field);
f = oneof_case ? upb_oneofdef_itof(o, oneof_case) : NULL;
UPB_ASSERT((f != NULL) == (oneof_case != 0));
return f;
}
}
upb_msgval upb_msg_get(const upb_msg *msg, const upb_fielddef *f) {

@ -4,10 +4,12 @@
** Implementation is heavily inspired by Lua's ltable.c.
*/
#include "upb/table.int.h"
#include <string.h>
#include "third_party/wyhash/wyhash.h"
#include "upb/table.int.h"
/* Must be last. */
#include "upb/port_def.inc"
#define UPB_MAXARRSIZE 16 /* 64k. */
@ -87,11 +89,7 @@ static upb_tabent *mutable_entries(upb_table *t) {
}
static bool isfull(upb_table *t) {
if (upb_table_size(t) == 0) {
return true;
} else {
return ((double)(t->count + 1) / upb_table_size(t)) > MAX_LOAD;
}
return t->count == t->max_count;
}
static bool init(upb_table *t, uint8_t size_lg2, upb_alloc *a) {
@ -100,6 +98,7 @@ static bool init(upb_table *t, uint8_t size_lg2, upb_alloc *a) {
t->count = 0;
t->size_lg2 = size_lg2;
t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
t->max_count = upb_table_size(t) * MAX_LOAD;
bytes = upb_table_size(t) * sizeof(upb_tabent);
if (bytes > 0) {
t->entries = upb_malloc(a, bytes);
@ -115,9 +114,17 @@ static void uninit(upb_table *t, upb_alloc *a) {
upb_free(a, mutable_entries(t));
}
static upb_tabent *emptyent(upb_table *t) {
upb_tabent *e = mutable_entries(t) + upb_table_size(t);
while (1) { if (upb_tabent_isempty(--e)) return e; UPB_ASSERT(e > t->entries); }
static upb_tabent *emptyent(upb_table *t, upb_tabent *e) {
upb_tabent *begin = mutable_entries(t);
upb_tabent *end = begin + upb_table_size(t);
for (e = e + 1; e < end; e++) {
if (upb_tabent_isempty(e)) return e;
}
for (e = begin; e < end; e++) {
if (upb_tabent_isempty(e)) return e;
}
UPB_ASSERT(false);
return NULL;
}
static upb_tabent *getentry_mutable(upb_table *t, uint32_t hash) {
@ -173,7 +180,7 @@ static void insert(upb_table *t, lookupkey_t key, upb_tabkey tabkey,
our_e->next = NULL;
} else {
/* Collision. */
upb_tabent *new_e = emptyent(t);
upb_tabent *new_e = emptyent(t, mainpos_e);
/* Head of collider's chain. */
upb_tabent *chain = getentry_mutable(t, hashfunc(mainpos_e->key));
if (chain == mainpos_e) {
@ -268,10 +275,14 @@ static upb_tabkey strcopy(lookupkey_t k2, upb_alloc *a) {
return (uintptr_t)str;
}
static uint32_t table_hash(const char *p, size_t n) {
return wyhash(p, n, 0, _wyp);
}
static uint32_t strhash(upb_tabkey key) {
uint32_t len;
char *str = upb_tabstr(key, &len);
return upb_murmur_hash2(str, len, 0);
return table_hash(str, len);
}
static bool streql(upb_tabkey k1, lookupkey_t k2) {
@ -280,9 +291,14 @@ static bool streql(upb_tabkey k1, lookupkey_t k2) {
return len == k2.str.len && (len == 0 || memcmp(str, k2.str.str, len) == 0);
}
bool upb_strtable_init2(upb_strtable *t, upb_ctype_t ctype, upb_alloc *a) {
bool upb_strtable_init2(upb_strtable *t, upb_ctype_t ctype,
size_t expected_size, upb_alloc *a) {
UPB_UNUSED(ctype); /* TODO(haberman): rm */
return init(&t->t, 2, a);
// Multiply by approximate reciprocal of MAX_LOAD (0.85), with pow2 denominator.
size_t need_entries = (expected_size + 1) * 1204 / 1024;
UPB_ASSERT(need_entries >= expected_size * 0.85);
int size_lg2 = _upb_lg2ceil(need_entries);
return init(&t->t, size_lg2, a);
}
void upb_strtable_clear(upb_strtable *t) {
@ -333,20 +349,20 @@ bool upb_strtable_insert3(upb_strtable *t, const char *k, size_t len,
tabkey = strcopy(key, a);
if (tabkey == 0) return false;
hash = upb_murmur_hash2(key.str.str, key.str.len, 0);
hash = table_hash(key.str.str, key.str.len);
insert(&t->t, key, tabkey, v, hash, &strhash, &streql);
return true;
}
bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
upb_value *v) {
uint32_t hash = upb_murmur_hash2(key, len, 0);
uint32_t hash = table_hash(key, len);
return lookup(&t->t, strkey2(key, len), v, hash, &streql);
}
bool upb_strtable_remove3(upb_strtable *t, const char *key, size_t len,
upb_value *val, upb_alloc *alloc) {
uint32_t hash = upb_murmur_hash2(key, len, 0);
uint32_t hash = table_hash(key, len);
upb_tabkey tabkey;
if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) {
if (alloc) {
@ -699,182 +715,3 @@ bool upb_inttable_iter_isequal(const upb_inttable_iter *i1,
return i1->t == i2->t && i1->index == i2->index &&
i1->array_part == i2->array_part;
}
#if defined(UPB_UNALIGNED_READS_OK) || defined(__s390x__)
/* -----------------------------------------------------------------------------
* MurmurHash2, by Austin Appleby (released as public domain).
* Reformatted and C99-ified by Joshua Haberman.
* Note - This code makes a few assumptions about how your machine behaves -
* 1. We can read a 4-byte value from any address without crashing
* 2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t
* And it has a few limitations -
* 1. It will not work incrementally.
* 2. It will not produce the same results on little-endian and big-endian
* machines. */
uint32_t upb_murmur_hash2(const void *key, size_t len, uint32_t seed) {
/* 'm' and 'r' are mixing constants generated offline.
* They're not really 'magic', they just happen to work well. */
const uint32_t m = 0x5bd1e995;
const int32_t r = 24;
/* Initialize the hash to a 'random' value */
uint32_t h = seed ^ len;
/* Mix 4 bytes at a time into the hash */
const uint8_t * data = (const uint8_t *)key;
while(len >= 4) {
uint32_t k;
memcpy(&k, data, sizeof(k));
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
/* Handle the last few bytes of the input array */
switch(len) {
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0]; h *= m;
};
/* Do a few final mixes of the hash to ensure the last few
* bytes are well-incorporated. */
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
#else /* !UPB_UNALIGNED_READS_OK */
/* -----------------------------------------------------------------------------
* MurmurHashAligned2, by Austin Appleby
* Same algorithm as MurmurHash2, but only does aligned reads - should be safer
* on certain platforms.
* Performance will be lower than MurmurHash2 */
#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
uint32_t upb_murmur_hash2(const void * key, size_t len, uint32_t seed) {
const uint32_t m = 0x5bd1e995;
const int32_t r = 24;
const uint8_t * data = (const uint8_t *)key;
uint32_t h = (uint32_t)(seed ^ len);
uint8_t align = (uintptr_t)data & 3;
if(align && (len >= 4)) {
/* Pre-load the temp registers */
uint32_t t = 0, d = 0;
int32_t sl;
int32_t sr;
switch(align) {
case 1: t |= data[2] << 16; /* fallthrough */
case 2: t |= data[1] << 8; /* fallthrough */
case 3: t |= data[0];
}
t <<= (8 * align);
data += 4-align;
len -= 4-align;
sl = 8 * (4-align);
sr = 8 * align;
/* Mix */
while(len >= 4) {
uint32_t k;
d = *(uint32_t *)data;
t = (t >> sr) | (d << sl);
k = t;
MIX(h,k,m);
t = d;
data += 4;
len -= 4;
}
/* Handle leftover data in temp registers */
d = 0;
if(len >= align) {
uint32_t k;
switch(align) {
case 3: d |= data[2] << 16; /* fallthrough */
case 2: d |= data[1] << 8; /* fallthrough */
case 1: d |= data[0]; /* fallthrough */
}
k = (t >> sr) | (d << sl);
MIX(h,k,m);
data += align;
len -= align;
/* ----------
* Handle tail bytes */
switch(len) {
case 3: h ^= data[2] << 16; /* fallthrough */
case 2: h ^= data[1] << 8; /* fallthrough */
case 1: h ^= data[0]; h *= m; /* fallthrough */
};
} else {
switch(len) {
case 3: d |= data[2] << 16; /* fallthrough */
case 2: d |= data[1] << 8; /* fallthrough */
case 1: d |= data[0]; /* fallthrough */
case 0: h ^= (t >> sr) | (d << sl); h *= m;
}
}
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
} else {
while(len >= 4) {
uint32_t k = *(uint32_t *)data;
MIX(h,k,m);
data += 4;
len -= 4;
}
/* ----------
* Handle tail bytes */
switch(len) {
case 3: h ^= data[2] << 16; /* fallthrough */
case 2: h ^= data[1] << 8; /* fallthrough */
case 1: h ^= data[0]; h *= m;
};
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
}
#undef MIX
#endif /* UPB_UNALIGNED_READS_OK */

@ -13,7 +13,7 @@
** store pointers or integers of at least 32 bits (upb isn't really useful on
** systems where sizeof(void*) < 4).
**
** The table must be homogenous (all values of the same type). In debug
** The table must be homogeneous (all values of the same type). In debug
** mode, we check this on insert and lookup.
*/
@ -171,7 +171,8 @@ typedef struct _upb_tabent {
typedef struct {
size_t count; /* Number of entries in the hash part. */
size_t mask; /* Mask to turn hash value -> bucket. */
uint32_t mask; /* Mask to turn hash value -> bucket. */
uint32_t max_count; /* Max count before we hit our load limit. */
uint8_t size_lg2; /* Size of the hashtable part is 2^size_lg2 entries. */
/* Hash table entries.
@ -230,7 +231,8 @@ UPB_INLINE bool upb_arrhas(upb_tabval key) {
/* Initialize and uninitialize a table, respectively. If memory allocation
* failed, false is returned that the table is uninitialized. */
bool upb_inttable_init2(upb_inttable *table, upb_ctype_t ctype, upb_alloc *a);
bool upb_strtable_init2(upb_strtable *table, upb_ctype_t ctype, upb_alloc *a);
bool upb_strtable_init2(upb_strtable *table, upb_ctype_t ctype,
size_t expected_size, upb_alloc *a);
void upb_inttable_uninit2(upb_inttable *table, upb_alloc *a);
void upb_strtable_uninit2(upb_strtable *table, upb_alloc *a);
@ -239,7 +241,7 @@ UPB_INLINE bool upb_inttable_init(upb_inttable *table, upb_ctype_t ctype) {
}
UPB_INLINE bool upb_strtable_init(upb_strtable *table, upb_ctype_t ctype) {
return upb_strtable_init2(table, ctype, &upb_alloc_global);
return upb_strtable_init2(table, ctype, 4, &upb_alloc_global);
}
UPB_INLINE void upb_inttable_uninit(upb_inttable *table) {

@ -313,6 +313,17 @@ UPB_INLINE uint64_t _upb_be_swap64(uint64_t val) {
}
}
UPB_INLINE int _upb_lg2ceil(int x) {
if (x <= 1) return 0;
#ifdef __GNUC__
return 32 - __builtin_clz(x - 1);
#else
int lg2 = 0;
while (1 << lg2 < x) lg2++;
return lg2;
#endif
}
#include "upb/port_undef.inc"
#ifdef __cplusplus

Loading…
Cancel
Save