diff --git a/.bazelrc b/.bazelrc new file mode 100644 index 0000000000..df04df138f --- /dev/null +++ b/.bazelrc @@ -0,0 +1,20 @@ +# Use our custom-configured c++ toolchain. + +build:m32 --copt=-m32 --linkopt=-m32 +build:asan --copt=-fsanitize=address --linkopt=-fsanitize=address +build:valgrind --run_under='valgrind --leak-check=full --error-exitcode=1' + +build:ubsan --copt=-fsanitize=undefined --linkopt=-fsanitize=undefined --action_env=UBSAN_OPTIONS=halt_on_error=1:print_stacktrace=1 +# Workaround for the fact that Bazel links with $CC, not $CXX +# https://github.com/bazelbuild/bazel/issues/11122#issuecomment-613746748 +build:ubsan --copt=-fno-sanitize=function --copt=-fno-sanitize=vptr + +build:Werror --copt=-Werror +build:Werror --per_file_copt=json/parser@-Wno-error +build:Werror --per_file_copt=com_google_protobuf@-Wno-error + +# GCC's -fanalyzer, a deeper static analysis than normal warnings. +build:analyzer --copt=-fanalyzer --copt=-Werror +build:analyzer --per_file_copt=json/parser@-fno-analyzer +build:analyzer --per_file_copt=com_google_protobuf@-fno-analyzer +build:analyzer --per_file_copt=com_github_google_benchmark@-fno-analyzer diff --git a/BUILD b/BUILD index 9f2698724d..5ba41c9edd 100644 --- a/BUILD +++ b/BUILD @@ -52,9 +52,6 @@ upb_proto_library_copts( cc_library( name = "port", - srcs = [ - "upb/port.c", - ], copts = UPB_DEFAULT_COPTS, textual_hdrs = [ "upb/port_def.inc", @@ -87,6 +84,7 @@ cc_library( deps = [ ":fastdecode", ":port", + "//third_party/wyhash", ], ) @@ -321,6 +319,7 @@ cc_library( srcs = ["upb.c"], hdrs = ["upb.h"], copts = UPB_DEFAULT_COPTS, + deps = ["//third_party/wyhash"], ) upb_amalgamation( @@ -347,6 +346,7 @@ cc_library( srcs = ["php-upb.c"], hdrs = ["php-upb.h"], copts = UPB_DEFAULT_COPTS, + deps = ["//third_party/wyhash"], ) upb_amalgamation( @@ -372,6 +372,7 @@ cc_library( srcs = ["ruby-upb.c"], hdrs = ["ruby-upb.h"], copts = UPB_DEFAULT_COPTS, + deps = ["//third_party/wyhash"], ) exports_files( @@ -401,6 +402,7 @@ filegroup( "upbc/**/*", "upb/**/*", "tests/**/*", + "third_party/**/*", ]), visibility = ["//cmake:__pkg__"], ) diff --git a/WORKSPACE b/WORKSPACE index c1c8c3be9b..a0b04ad044 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -1,6 +1,7 @@ workspace(name = "upb") load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") +load("@bazel_tools//tools/build_defs/repo:git.bzl", "new_git_repository") load("//bazel:workspace_deps.bzl", "upb_deps") upb_deps() @@ -37,3 +38,11 @@ http_archive( strip_prefix = "benchmark-16703ff83c1ae6d53e5155df3bb3ab0bc96083be", sha256 = "59f918c8ccd4d74b6ac43484467b500f1d64b40cc1010daa055375b322a43ba3", ) + +new_git_repository( + name = "com_google_googleapis", + remote = "https://github.com/googleapis/googleapis.git", + branch = "master", + build_file = "//benchmarks:BUILD.googleapis", + patch_cmds = ["find google -type f -name BUILD.bazel -delete"], +) diff --git a/bazel/build_defs.bzl b/bazel/build_defs.bzl index 0d31cecfea..ae41577ed9 100644 --- a/bazel/build_defs.bzl +++ b/bazel/build_defs.bzl @@ -22,7 +22,12 @@ UPB_DEFAULT_COPTS = select({ "-std=c99", "-pedantic", "-Werror=pedantic", + "-Wall", "-Wstrict-prototypes", + # GCC (at least) emits spurious warnings for this that cannot be fixed + # without introducing redundant initialization (with runtime cost): + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80635 + #"-Wno-maybe-uninitialized", # copybara:strip_end ], }) diff --git a/bazel/upb_proto_library.bzl b/bazel/upb_proto_library.bzl index 392404fa2b..d4d51604f4 100644 --- a/bazel/upb_proto_library.bzl +++ b/bazel/upb_proto_library.bzl @@ -207,10 +207,7 @@ def _upb_proto_rule_impl(ctx): fail("proto_library rule must generate _UpbWrappedCcInfo or " + "_UpbDefsWrappedCcInfo (aspect should have handled this).") - if type(cc_info.linking_context.libraries_to_link) == "list": - lib = cc_info.linking_context.libraries_to_link[0] - else: - lib = cc_info.linking_context.libraries_to_link.to_list()[0] + lib = cc_info.linking_context.linker_inputs.to_list()[0].libraries[0] files = _filter_none([ lib.static_library, lib.pic_static_library, @@ -295,6 +292,7 @@ _upb_proto_library_aspect = aspect( attr_aspects = ["deps"], fragments = ["cpp"], toolchains = ["@bazel_tools//tools/cpp:toolchain_type"], + incompatible_use_toolchain_transition = True, ) upb_proto_library = rule( @@ -358,6 +356,7 @@ _upb_proto_reflection_library_aspect = aspect( attr_aspects = ["deps"], fragments = ["cpp"], toolchains = ["@bazel_tools//tools/cpp:toolchain_type"], + incompatible_use_toolchain_transition = True, ) upb_proto_reflection_library = rule( diff --git a/benchmarks/BUILD b/benchmarks/BUILD index 87315a34e9..4ed4042a6c 100644 --- a/benchmarks/BUILD +++ b/benchmarks/BUILD @@ -21,6 +21,11 @@ upb_proto_reflection_library( deps = [":benchmark_descriptor_proto"], ) +upb_proto_reflection_library( + name = "ads_upb_proto_reflection", + deps = ["@com_google_googleapis//:ads_proto"], +) + cc_proto_library( name = "benchmark_descriptor_cc_proto", deps = [":benchmark_descriptor_proto"], @@ -45,8 +50,10 @@ cc_binary( ":benchmark_descriptor_sv_cc_proto", ":benchmark_descriptor_upb_proto", ":benchmark_descriptor_upb_proto_reflection", + ":ads_upb_proto_reflection", "//:descriptor_upb_proto", "//:reflection", + "@com_google_absl//absl/container:flat_hash_set", "@com_github_google_benchmark//:benchmark_main", "@com_google_protobuf//:protobuf", ], diff --git a/benchmarks/BUILD.googleapis b/benchmarks/BUILD.googleapis new file mode 100644 index 0000000000..c90815b09a --- /dev/null +++ b/benchmarks/BUILD.googleapis @@ -0,0 +1,29 @@ +load( + "@rules_proto//proto:defs.bzl", + "proto_library", +) + +proto_library( + name = "ads_proto", + srcs = glob([ + "google/ads/googleads/v5/**/*.proto", + "google/api/**/*.proto", + "google/rpc/**/*.proto", + "google/longrunning/**/*.proto", + "google/logging/**/*.proto", + ]), + #srcs = ["google/ads/googleads/v5/services/google_ads_service.proto"], + visibility = ["//visibility:public"], + deps = [ + "@com_google_protobuf//:any_proto", + "@com_google_protobuf//:empty_proto", + "@com_google_protobuf//:descriptor_proto", + "@com_google_protobuf//:field_mask_proto", + "@com_google_protobuf//:duration_proto", + "@com_google_protobuf//:timestamp_proto", + "@com_google_protobuf//:struct_proto", + "@com_google_protobuf//:api_proto", + "@com_google_protobuf//:type_proto", + "@com_google_protobuf//:wrappers_proto", + ], +) diff --git a/benchmarks/benchmark.cc b/benchmarks/benchmark.cc index 8d64626ffc..cc510bdd6c 100644 --- a/benchmarks/benchmark.cc +++ b/benchmarks/benchmark.cc @@ -2,16 +2,13 @@ #include #include -// For benchmarks of parsing speed. +#include "absl/container/flat_hash_set.h" #include "benchmarks/descriptor.pb.h" #include "benchmarks/descriptor.upb.h" #include "benchmarks/descriptor.upbdefs.h" #include "benchmarks/descriptor_sv.pb.h" - -// For for benchmarks of building descriptors. -#include "google/protobuf/descriptor.upb.h" +#include "google/ads/googleads/v5/services/google_ads_service.upbdefs.h" #include "google/protobuf/descriptor.pb.h" - #include "upb/def.hpp" upb_strview descriptor = benchmarks_descriptor_proto_upbdefinit.descriptor; @@ -20,6 +17,16 @@ namespace protobuf = ::google::protobuf; /* A buffer big enough to parse descriptor.proto without going to heap. */ char buf[65535]; +void CollectFileDescriptors(const upb_def_init* file, + std::vector& serialized_files, + absl::flat_hash_set& seen) { + if (!seen.insert(file).second) return; + for (upb_def_init **deps = file->deps; *deps; deps++) { + CollectFileDescriptors(*deps, serialized_files, seen); + } + serialized_files.push_back(file->descriptor); +} + static void BM_ArenaOneAlloc(benchmark::State& state) { for (auto _ : state) { upb_arena* arena = upb_arena_new(); @@ -39,22 +46,28 @@ static void BM_ArenaInitialBlockOneAlloc(benchmark::State& state) { BENCHMARK(BM_ArenaInitialBlockOneAlloc); static void BM_LoadDescriptor_Upb(benchmark::State& state) { + size_t bytes_per_iter = 0; for (auto _ : state) { upb::SymbolTable symtab; - upb::Arena arena; - google_protobuf_FileDescriptorProto* file_proto = - google_protobuf_FileDescriptorProto_parse(descriptor.data, - descriptor.size, arena.ptr()); - upb::FileDefPtr file_def = symtab.AddFile(file_proto, NULL); - if (!file_def) { - printf("Failed to add file.\n"); - exit(1); - } + upb_benchmark_DescriptorProto_getmsgdef(symtab.ptr()); + bytes_per_iter = _upb_symtab_bytesloaded(symtab.ptr()); } - state.SetBytesProcessed(state.iterations() * descriptor.size); + state.SetBytesProcessed(state.iterations() * bytes_per_iter); } BENCHMARK(BM_LoadDescriptor_Upb); +static void BM_LoadAdsDescriptor_Upb(benchmark::State& state) { + size_t bytes_per_iter = 0; + for (auto _ : state) { + upb::SymbolTable symtab; + google_ads_googleads_v5_services_SearchGoogleAdsRequest_getmsgdef( + symtab.ptr()); + bytes_per_iter = _upb_symtab_bytesloaded(symtab.ptr()); + } + state.SetBytesProcessed(state.iterations() * bytes_per_iter); +} +BENCHMARK(BM_LoadAdsDescriptor_Upb); + static void BM_LoadDescriptor_Proto2(benchmark::State& state) { for (auto _ : state) { protobuf::Arena arena; @@ -73,6 +86,35 @@ static void BM_LoadDescriptor_Proto2(benchmark::State& state) { } BENCHMARK(BM_LoadDescriptor_Proto2); +static void BM_LoadAdsDescriptor_Proto2(benchmark::State& state) { + extern upb_def_init google_ads_googleads_v5_services_google_ads_service_proto_upbdefinit; + std::vector serialized_files; + absl::flat_hash_set seen_files; + CollectFileDescriptors( + &google_ads_googleads_v5_services_google_ads_service_proto_upbdefinit, + serialized_files, seen_files); + size_t bytes_per_iter = 0; + for (auto _ : state) { + bytes_per_iter = 0; + protobuf::Arena arena; + protobuf::DescriptorPool pool; + for (auto file : serialized_files) { + protobuf::StringPiece input(file.data, file.size); + auto proto = protobuf::Arena::CreateMessage( + &arena); + bool ok = proto->ParseFrom(input) && + pool.BuildFile(*proto) != nullptr; + if (!ok) { + printf("Failed to add file.\n"); + exit(1); + } + bytes_per_iter += input.size(); + } + } + state.SetBytesProcessed(state.iterations() * bytes_per_iter); +} +BENCHMARK(BM_LoadAdsDescriptor_Proto2); + static void BM_Parse_Upb_FileDesc_WithArena(benchmark::State& state) { size_t bytes = 0; for (auto _ : state) { diff --git a/cmake/BUILD b/cmake/BUILD index d59475d909..53fbd07abe 100644 --- a/cmake/BUILD +++ b/cmake/BUILD @@ -28,6 +28,7 @@ genrule( "//:BUILD", "//:WORKSPACE", "//:cmake_files", + "//third_party/wyhash:cmake_files", ":cmake_files", ], outs = ["generated-in/CMakeLists.txt"], @@ -84,6 +85,7 @@ sh_test( data = [ ":cmake_files", "//:cmake_files", + "//third_party/wyhash:cmake_files", ], deps = ["@bazel_tools//tools/bash/runfiles"], ) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index fcdce9936a..d4cbcc91fc 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -61,8 +61,7 @@ endif() enable_testing() -add_library(port - ../upb/port.c) +add_library(port INTERFACE) add_library(upb ../upb/decode.c ../upb/decode.int.h @@ -79,7 +78,8 @@ add_library(upb ../upb/upb.hpp) target_link_libraries(upb fastdecode - port) + port + /third_party/wyhash) add_library(fastdecode ../upb/decode.int.h ../upb/decode_fast.c @@ -160,5 +160,6 @@ add_library(upb_json target_link_libraries(upb_json upb upb_pb) +add_library(wyhash INTERFACE) diff --git a/cmake/make_cmakelists.py b/cmake/make_cmakelists.py index d63740439b..035debb730 100755 --- a/cmake/make_cmakelists.py +++ b/cmake/make_cmakelists.py @@ -180,6 +180,9 @@ class WorkspaceFileFunctions(object): def git_repository(self, **kwargs): pass + def new_git_repository(self, **kwargs): + pass + def bazel_version_repository(self, **kwargs): pass @@ -280,6 +283,7 @@ globs = GetDict(converter) exec(open("WORKSPACE").read(), GetDict(WorkspaceFileFunctions(converter))) exec(open("BUILD").read(), GetDict(BuildFileFunctions(converter))) +exec(open("third_party/wyhash/BUILD").read(), GetDict(BuildFileFunctions(converter))) with open(sys.argv[1], "w") as f: f.write(converter.convert()) diff --git a/cmake/upb/json/parser.c b/cmake/upb/json/parser.c index 75cacd4c1d..26996d8a59 100644 --- a/cmake/upb/json/parser.c +++ b/cmake/upb/json/parser.c @@ -3306,7 +3306,7 @@ static upb_json_parsermethod *parsermethod_new(upb_json_codecache *c, upb_byteshandler_setstring(&m->input_handler_, parse, m); upb_byteshandler_setendstr(&m->input_handler_, end, m); - upb_strtable_init2(&m->name_table, UPB_CTYPE_CONSTPTR, alloc); + upb_strtable_init2(&m->name_table, UPB_CTYPE_CONSTPTR, 4, alloc); /* Build name_table */ diff --git a/kokoro/ubuntu/build.sh b/kokoro/ubuntu/build.sh index 2dacffec6d..eeb17903f4 100644 --- a/kokoro/ubuntu/build.sh +++ b/kokoro/ubuntu/build.sh @@ -17,6 +17,7 @@ which bazel bazel version cd $(dirname $0)/../.. +<<<<<<< HEAD if which gcc; then gcc --version @@ -30,20 +31,32 @@ if which clang; then # The checked-in code is with fasttable not enabled. CC=clang bazel test --test_output=errors ... --//:fasttable_enabled=true -- -cmake:test_generated_files fi +======= +>>>>>>> master -if [[ $(uname) = "Linux" ]]; then - # Verify the ASAN build. Have to exclude test_conformance_upb as protobuf - # currently leaks memory in the conformance test runner. - bazel test --copt=-fsanitize=address --linkopt=-fsanitize=address --test_output=errors ... +if which gcc; then + gcc --version + CC=gcc bazel test --test_output=errors ... + CC=gcc bazel test -c opt --test_output=errors ... + # TODO: work through these errors and enable this. + # if gcc -fanalyzer -x c /dev/null -c -o /dev/null; then + # CC=gcc bazel test --copt=-fanalyzer --test_output=errors ... + # fi +fi - # Verify the UBSan build. Have to exclude Lua as the version we are using - # fails some UBSan tests. +if which clang; then + CC=clang bazel test --test_output=errors ... + CC=clang bazel test --test_output=errors -c opt ... - # For some reason kokoro doesn't have Clang available right now. - #CC=clang CXX=clang++ bazel test -c dbg --copt=-fsanitize=undefined --copt=-fno-sanitize=function,vptr --linkopt=-fsanitize=undefined --action_env=UBSAN_OPTIONS=halt_on_error=1:print_stacktrace=1 -- :all -:test_lua + if [[ $(uname) = "Linux" ]]; then + CC=clang bazel test --test_output=errors --config=m32 ... + CC=clang bazel test --test_output=errors --config=asan ... + # TODO: update to a newer Lua that hopefully does not trigger UBSAN. + CC=clang bazel test --test_output=errors --config=ubsan ... -- -tests/bindings/lua:test_lua + fi fi if which valgrind; then - bazel test --run_under='valgrind --leak-check=full --error-exitcode=1' ... -- -tests:test_conformance_upb -cmake:cmake_build + bazel test --config=valgrind ... -- -tests:test_conformance_upb -cmake:cmake_build fi diff --git a/tests/bindings/lua/test_upb.lua b/tests/bindings/lua/test_upb.lua index 815f907551..84178aa79c 100644 --- a/tests/bindings/lua/test_upb.lua +++ b/tests/bindings/lua/test_upb.lua @@ -104,7 +104,7 @@ function test_utf8() upb.decode(test_messages_proto3.TestAllTypesProto3, serialized) end) - -- TOOD(haberman): should proto3 accessors also check UTF-8 at set time? + -- TODO(haberman): should proto3 accessors also check UTF-8 at set time? end function test_string_double_map() @@ -587,6 +587,20 @@ function test_foo() assert_equal(set.file[1].name, "google/protobuf/descriptor.proto") end +function test_descriptor_error() + local symtab = upb.SymbolTable() + local file = descriptor.FileDescriptorProto() + file.name = "test.proto" + file.message_type[1] = descriptor.DescriptorProto{ + name = "ABC" + } + file.message_type[2] = descriptor.DescriptorProto{ + name = "BC." + } + assert_error(function () symtab:add_file(upb.encode(file)) end) + assert_nil(symtab:lookup_msg("ABC")) +end + function test_gc() local top = test_messages_proto3.TestAllTypesProto3() local n = 100 diff --git a/tests/pb/test_decoder.cc b/tests/pb/test_decoder.cc index e8b3a63ed5..110dd2f9be 100644 --- a/tests/pb/test_decoder.cc +++ b/tests/pb/test_decoder.cc @@ -52,17 +52,6 @@ #define PRINT_FAILURE(expr) \ fprintf(stderr, "Assertion failed: %s:%d\n", __FILE__, __LINE__); \ fprintf(stderr, "expr: %s\n", #expr); \ - if (testhash) { \ - fprintf(stderr, "assertion failed running test %x.\n", testhash); \ - if (!filter_hash) { \ - fprintf(stderr, \ - "Run with the arg %x to run only this test. " \ - "(This will also turn on extra debugging output)\n", \ - testhash); \ - } \ - fprintf(stderr, "Failed at %02.2f%% through tests.\n", \ - (float)completed * 100 / total); \ - } #define MAX_NESTING 64 @@ -467,17 +456,6 @@ upb::pb::DecoderPtr CreateDecoder(upb::Arena* arena, return ret; } -uint32_t Hash(const string& proto, const string* expected_output, size_t seam1, - size_t seam2, bool may_skip) { - uint32_t hash = upb_murmur_hash2(proto.c_str(), proto.size(), 0); - if (expected_output) - hash = upb_murmur_hash2(expected_output->c_str(), expected_output->size(), hash); - hash = upb_murmur_hash2(&seam1, sizeof(seam1), hash); - hash = upb_murmur_hash2(&seam2, sizeof(seam2), hash); - hash = upb_murmur_hash2(&may_skip, sizeof(may_skip), hash); - return hash; -} - void CheckBytesParsed(upb::pb::DecoderPtr decoder, size_t ofs) { // We can't have parsed more data than the decoder callback is telling us it // parsed. @@ -506,13 +484,11 @@ void do_run_decoder(VerboseParserEnvironment* env, upb::pb::DecoderPtr decoder, env->Reset(proto.c_str(), proto.size(), may_skip, expected_output == NULL); decoder.Reset(); - testhash = Hash(proto, expected_output, i, j, may_skip); - if (filter_hash && testhash != filter_hash) return; if (test_mode != COUNT_ONLY) { output.clear(); if (filter_hash) { - fprintf(stderr, "RUNNING TEST CASE, hash=%x\n", testhash); + fprintf(stderr, "RUNNING TEST CASE\n"); fprintf(stderr, "Input (len=%u): ", (unsigned)proto.size()); PrintBinary(proto); fprintf(stderr, "\n"); @@ -571,7 +547,6 @@ void run_decoder(const string& proto, const string* expected_output) { } } } - testhash = 0; } const static string thirty_byte_nop = cat( @@ -871,23 +846,17 @@ void test_valid() { // Empty protobuf where we never call PutString between // StartString/EndString. - // Randomly generated hash for this test, hope it doesn't conflict with others - // by chance. - const uint32_t emptyhash = 0x5709be8e; - if (!filter_hash || filter_hash == testhash) { - testhash = emptyhash; - upb::Status status; - upb::Arena arena; - upb::Sink sink(global_handlers, &closures[0]); - upb::pb::DecoderPtr decoder = - CreateDecoder(&arena, global_method, sink, &status); - output.clear(); - bool ok = upb::PutBuffer(std::string(), decoder.input()); - ASSERT(ok); - ASSERT(status.ok()); - if (test_mode == ALL_HANDLERS) { - ASSERT(output == string("<\n>\n")); - } + upb::Status status; + upb::Arena arena; + upb::Sink sink(global_handlers, &closures[0]); + upb::pb::DecoderPtr decoder = + CreateDecoder(&arena, global_method, sink, &status); + output.clear(); + bool ok = upb::PutBuffer(std::string(), decoder.input()); + ASSERT(ok); + ASSERT(status.ok()); + if (test_mode == ALL_HANDLERS) { + ASSERT(output == string("<\n>\n")); } test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_DOUBLE, diff --git a/tests/test_generated_code.c b/tests/test_generated_code.c index 66027245dd..bd1066b233 100644 --- a/tests/test_generated_code.c +++ b/tests/test_generated_code.c @@ -57,9 +57,9 @@ static void test_scalars(void) { ASSERT(protobuf_test_messages_proto3_TestAllTypesProto3_optional_uint64( msg2) == 40); ASSERT(protobuf_test_messages_proto3_TestAllTypesProto3_optional_float( - msg2) == 50.5); + msg2) - 50.5 < 0.01); ASSERT(protobuf_test_messages_proto3_TestAllTypesProto3_optional_double( - msg2) == 60.6); + msg2) - 60.6 < 0.01); ASSERT(protobuf_test_messages_proto3_TestAllTypesProto3_optional_bool( msg2) == 1); val = protobuf_test_messages_proto3_TestAllTypesProto3_optional_string(msg2); diff --git a/tests/test_table.cc b/tests/test_table.cc index e19a74a50d..83b49f5c20 100644 --- a/tests/test_table.cc +++ b/tests/test_table.cc @@ -618,6 +618,16 @@ void test_delete() { upb_inttable_uninit(&t); } +void test_init() { + for (int i = 0; i < 2048; i++) { + /* Tests that the size calculations in init() (lg2 size for target load) + * work for all expected sizes. */ + upb_strtable t; + upb_strtable_init2(&t, UPB_CTYPE_BOOL, i, &upb_alloc_global); + upb_strtable_uninit(&t); + } +} + extern "C" { int run_tests(int argc, char *argv[]) { diff --git a/third_party/wyhash/BUILD b/third_party/wyhash/BUILD new file mode 100644 index 0000000000..28fa3be6a5 --- /dev/null +++ b/third_party/wyhash/BUILD @@ -0,0 +1,18 @@ + +licenses(["unencumbered"]) + +exports_files(["LICENSE"]) + +cc_library( + name = "wyhash", + hdrs = ["wyhash.h"], + visibility = ["//:__pkg__"], +) + +filegroup( + name = "cmake_files", + srcs = glob([ + "**/*", + ]), + visibility = ["//cmake:__pkg__"], +) diff --git a/third_party/wyhash/LICENSE b/third_party/wyhash/LICENSE new file mode 100644 index 0000000000..471f09f4cf --- /dev/null +++ b/third_party/wyhash/LICENSE @@ -0,0 +1,25 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to + diff --git a/third_party/wyhash/wyhash.h b/third_party/wyhash/wyhash.h new file mode 100644 index 0000000000..5658f02df3 --- /dev/null +++ b/third_party/wyhash/wyhash.h @@ -0,0 +1,145 @@ +/* Copyright 2020 王一 Wang Yi + This is free and unencumbered software released into the public domain. http://unlicense.org/ + See github.com/wangyi-fudan/wyhash/ LICENSE + */ +#ifndef wyhash_final_version +#define wyhash_final_version +//defines that change behavior +#ifndef WYHASH_CONDOM +#define WYHASH_CONDOM 1 //0: read 8 bytes before and after boundaries, dangerous but fastest. 1: normal valid behavior 2: extra protection against entropy loss (probability=2^-63), aka. "blind multiplication" +#endif +#define WYHASH_32BIT_MUM 0 //faster on 32 bit system +//includes +#include +#include +#if defined(_MSC_VER) && defined(_M_X64) + #include + #pragma intrinsic(_umul128) +#endif +#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__) + #define _likely_(x) __builtin_expect(x,1) + #define _unlikely_(x) __builtin_expect(x,0) +#else + #define _likely_(x) (x) + #define _unlikely_(x) (x) +#endif +//mum function +static inline uint64_t _wyrot(uint64_t x) { return (x>>32)|(x<<32); } +static inline void _wymum(uint64_t *A, uint64_t *B){ +#if(WYHASH_32BIT_MUM) + uint64_t hh=(*A>>32)*(*B>>32), hl=(*A>>32)*(unsigned)*B, lh=(unsigned)*A*(*B>>32), ll=(uint64_t)(unsigned)*A*(unsigned)*B; + #if(WYHASH_CONDOM>1) + *A^=_wyrot(hl)^hh; *B^=_wyrot(lh)^ll; + #else + *A=_wyrot(hl)^hh; *B=_wyrot(lh)^ll; + #endif +#elif defined(__SIZEOF_INT128__) + __uint128_t r=*A; r*=*B; + #if(WYHASH_CONDOM>1) + *A^=(uint64_t)r; *B^=(uint64_t)(r>>64); + #else + *A=(uint64_t)r; *B=(uint64_t)(r>>64); + #endif +#elif defined(_MSC_VER) && defined(_M_X64) + #if(WYHASH_CONDOM>1) + uint64_t a, b; + a=_umul128(*A,*B,&b); + *A^=a; *B^=b; + #else + *A=_umul128(*A,*B,B); + #endif +#else + uint64_t ha=*A>>32, hb=*B>>32, la=(uint32_t)*A, lb=(uint32_t)*B, hi, lo; + uint64_t rh=ha*hb, rm0=ha*lb, rm1=hb*la, rl=la*lb, t=rl+(rm0<<32), c=t>32)+(rm1>>32)+c; + #if(WYHASH_CONDOM>1) + *A^=lo; *B^=hi; + #else + *A=lo; *B=hi; + #endif +#endif +} +static inline uint64_t _wymix(uint64_t A, uint64_t B){ _wymum(&A,&B); return A^B; } +//read functions +#ifndef WYHASH_LITTLE_ENDIAN + #if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + #define WYHASH_LITTLE_ENDIAN 1 + #elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) + #define WYHASH_LITTLE_ENDIAN 0 + #endif +#endif +#if (WYHASH_LITTLE_ENDIAN) +static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return v;} +static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return v;} +#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__) +static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return __builtin_bswap64(v);} +static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return __builtin_bswap32(v);} +#elif defined(_MSC_VER) +static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return _byteswap_uint64(v);} +static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return _byteswap_ulong(v);} +#endif +static inline uint64_t _wyr3(const uint8_t *p, unsigned k) { return (((uint64_t)p[0])<<16)|(((uint64_t)p[k>>1])<<8)|p[k-1];} +//wyhash function +static inline uint64_t _wyfinish16(const uint8_t *p, uint64_t len, uint64_t seed, const uint64_t *secret, uint64_t i){ +#if(WYHASH_CONDOM>0) + uint64_t a, b; + if(_likely_(i<=8)){ + if(_likely_(i>=4)){ a=_wyr4(p); b=_wyr4(p+i-4); } + else if (_likely_(i)){ a=_wyr3(p,i); b=0; } + else a=b=0; + } + else{ a=_wyr8(p); b=_wyr8(p+i-8); } + return _wymix(secret[1]^len,_wymix(a^secret[1], b^seed)); +#else + #define oneshot_shift ((i<8)*((8-i)<<3)) + return _wymix(secret[1]^len,_wymix((_wyr8(p)<>oneshot_shift)^seed)); +#endif +} + +static inline uint64_t _wyfinish(const uint8_t *p, uint64_t len, uint64_t seed, const uint64_t *secret, uint64_t i){ + if(_likely_(i<=16)) return _wyfinish16(p,len,seed,secret,i); + return _wyfinish(p+16,len,_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed),secret,i-16); +} + +static inline uint64_t wyhash(const void *key, uint64_t len, uint64_t seed, const uint64_t *secret){ + const uint8_t *p=(const uint8_t *)key; + uint64_t i=len; seed^=*secret; + if(_unlikely_(i>64)){ + uint64_t see1=seed; + do{ + seed=_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed)^_wymix(_wyr8(p+16)^secret[2],_wyr8(p+24)^seed); + see1=_wymix(_wyr8(p+32)^secret[3],_wyr8(p+40)^see1)^_wymix(_wyr8(p+48)^secret[4],_wyr8(p+56)^see1); + p+=64; i-=64; + }while(i>64); + seed^=see1; + } + return _wyfinish(p,len,seed,secret,i); +} +//utility functions +const uint64_t _wyp[5] = {0xa0761d6478bd642full, 0xe7037ed1a0b428dbull, 0x8ebc6af09c88c6e3ull, 0x589965cc75374cc3ull, 0x1d8e4e27c47d124full}; +static inline uint64_t wyhash64(uint64_t A, uint64_t B){ A^=_wyp[0]; B^=_wyp[1]; _wymum(&A,&B); return _wymix(A^_wyp[0],B^_wyp[1]);} +static inline uint64_t wyrand(uint64_t *seed){ *seed+=_wyp[0]; return _wymix(*seed,*seed^_wyp[1]);} +static inline double wy2u01(uint64_t r){ const double _wynorm=1.0/(1ull<<52); return (r>>12)*_wynorm;} +static inline double wy2gau(uint64_t r){ const double _wynorm=1.0/(1ull<<20); return ((r&0x1fffff)+((r>>21)&0x1fffff)+((r>>42)&0x1fffff))*_wynorm-3.0;} +static inline uint64_t wy2u0k(uint64_t r, uint64_t k){ _wymum(&r,&k); return k; } + +static inline void make_secret(uint64_t seed, uint64_t *secret){ + uint8_t c[] = {15, 23, 27, 29, 30, 39, 43, 45, 46, 51, 53, 54, 57, 58, 60, 71, 75, 77, 78, 83, 85, 86, 89, 90, 92, 99, 101, 102, 105, 106, 108, 113, 114, 116, 120, 135, 139, 141, 142, 147, 149, 150, 153, 154, 156, 163, 165, 166, 169, 170, 172, 177, 178, 180, 184, 195, 197, 198, 201, 202, 204, 209, 210, 212, 216, 225, 226, 228, 232, 240 }; + for(size_t i=0;i<5;i++){ + uint8_t ok; + do{ + ok=1; secret[i]=0; + for(size_t j=0;j<64;j+=8) secret[i]|=((uint64_t)c[wyrand(&seed)%sizeof(c)])<descriptortype; if (_upb_isrepeated(field)) ndx += 18; ptr = decode_varint32(d, ptr, &val.size); - if (val.size >= INT32_MAX || ptr - d->end + val.size > d->limit) { + if (val.size >= INT32_MAX || + ptr - d->end + (int32_t)val.size > d->limit) { decode_err(d); /* Length overflow. */ } op = delim_ops[ndx]; diff --git a/upb/def.c b/upb/def.c index f1c4e3f0d2..19b30fe7ae 100644 --- a/upb/def.c +++ b/upb/def.c @@ -3,11 +3,11 @@ #include #include +#include #include #include -#include "google/protobuf/descriptor.upb.h" -#include "upb/decode.int.h" +#include "google/protobuf/descriptor.upb.h" #include "upb/port_def.inc" typedef struct { @@ -15,15 +15,6 @@ typedef struct { char str[1]; /* Null-terminated string data follows. */ } str_t; -static str_t *newstr(upb_alloc *alloc, const char *data, size_t len) { - str_t *ret = upb_malloc(alloc, sizeof(*ret) + len); - if (!ret) return NULL; - ret->len = len; - if (len) memcpy(ret->str, data, len); - ret->str[len] = '\0'; - return ret; -} - struct upb_fielddef { const upb_filedef *file; const upb_msgdef *msgdef; @@ -119,6 +110,7 @@ struct upb_symtab { upb_arena *arena; upb_strtable syms; /* full_name -> packed def ptr */ upb_strtable files; /* file_name -> upb_filedef* */ + size_t bytes_loaded; }; /* Inside a symtab we store tagged pointers to specific def types. */ @@ -157,38 +149,6 @@ static bool upb_isalphanum(char c) { return upb_isletter(c) || upb_isbetween(c, '0', '9'); } -static bool upb_isident(upb_strview name, bool full, upb_status *s) { - const char *str = name.data; - size_t len = name.size; - bool start = true; - size_t i; - for (i = 0; i < len; i++) { - char c = str[i]; - if (c == '.') { - if (start || !full) { - upb_status_seterrf(s, "invalid name: unexpected '.' (%s)", str); - return false; - } - start = true; - } else if (start) { - if (!upb_isletter(c)) { - upb_status_seterrf( - s, "invalid name: path components must start with a letter (%s)", - str); - return false; - } - start = false; - } else { - if (!upb_isalphanum(c)) { - upb_status_seterrf(s, "invalid name: non-alphanumeric character (%s)", - str); - return false; - } - } - } - return !start; -} - static const char *shortdefname(const char *fullname) { const char *p; @@ -248,54 +208,6 @@ static void upb_status_setoom(upb_status *status) { upb_status_seterrmsg(status, "out of memory"); } -static bool assign_msg_indices(upb_msgdef *m, upb_status *s) { - /* Sort fields. upb internally relies on UPB_TYPE_MESSAGE fields having the - * lowest indexes, but we do not publicly guarantee this. */ - upb_msg_field_iter j; - int i; - uint32_t selector; - int n = upb_msgdef_numfields(m); - upb_fielddef **fields; - - if (n == 0) { - m->selector_count = UPB_STATIC_SELECTOR_COUNT; - m->submsg_field_count = 0; - return true; - } - - fields = upb_gmalloc(n * sizeof(*fields)); - if (!fields) { - upb_status_setoom(s); - return false; - } - - m->submsg_field_count = 0; - for(i = 0, upb_msg_field_begin(&j, m); - !upb_msg_field_done(&j); - upb_msg_field_next(&j), i++) { - upb_fielddef *f = upb_msg_iter_field(&j); - UPB_ASSERT(f->msgdef == m); - if (upb_fielddef_issubmsg(f)) { - m->submsg_field_count++; - } - fields[i] = f; - } - - qsort(fields, n, sizeof(*fields), cmp_fields); - - selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count; - for (i = 0; i < n; i++) { - upb_fielddef *f = fields[i]; - f->index_ = i; - f->selector_base = selector + upb_handlers_selectorbaseoffset(f); - selector += upb_handlers_selectorcount(f); - } - m->selector_count = selector; - - upb_gfree(fields); - return true; -} - static void assign_msg_wellknowntype(upb_msgdef *m) { const char *name = upb_msgdef_fullname(m); if (name == NULL) { @@ -865,7 +777,185 @@ void upb_oneof_iter_setdone(upb_oneof_iter *iter) { upb_inttable_iter_setdone(iter); } -/* Dynamic Layout Generation. *************************************************/ +/* upb_filedef ****************************************************************/ + +const char *upb_filedef_name(const upb_filedef *f) { + return f->name; +} + +const char *upb_filedef_package(const upb_filedef *f) { + return f->package; +} + +const char *upb_filedef_phpprefix(const upb_filedef *f) { + return f->phpprefix; +} + +const char *upb_filedef_phpnamespace(const upb_filedef *f) { + return f->phpnamespace; +} + +upb_syntax_t upb_filedef_syntax(const upb_filedef *f) { + return f->syntax; +} + +int upb_filedef_msgcount(const upb_filedef *f) { + return f->msg_count; +} + +int upb_filedef_depcount(const upb_filedef *f) { + return f->dep_count; +} + +int upb_filedef_enumcount(const upb_filedef *f) { + return f->enum_count; +} + +const upb_filedef *upb_filedef_dep(const upb_filedef *f, int i) { + return i < 0 || i >= f->dep_count ? NULL : f->deps[i]; +} + +const upb_msgdef *upb_filedef_msg(const upb_filedef *f, int i) { + return i < 0 || i >= f->msg_count ? NULL : &f->msgs[i]; +} + +const upb_enumdef *upb_filedef_enum(const upb_filedef *f, int i) { + return i < 0 || i >= f->enum_count ? NULL : &f->enums[i]; +} + +void upb_symtab_free(upb_symtab *s) { + upb_arena_free(s->arena); + upb_gfree(s); +} + +upb_symtab *upb_symtab_new(void) { + upb_symtab *s = upb_gmalloc(sizeof(*s)); + upb_alloc *alloc; + + if (!s) { + return NULL; + } + + s->arena = upb_arena_new(); + s->bytes_loaded = 0; + alloc = upb_arena_alloc(s->arena); + + if (!upb_strtable_init2(&s->syms, UPB_CTYPE_CONSTPTR, 32, alloc) || + !upb_strtable_init2(&s->files, UPB_CTYPE_CONSTPTR, 4, alloc)) { + upb_arena_free(s->arena); + upb_gfree(s); + s = NULL; + } + return s; +} + +const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) { + upb_value v; + return upb_strtable_lookup(&s->syms, sym, &v) ? + unpack_def(v, UPB_DEFTYPE_MSG) : NULL; +} + +const upb_msgdef *upb_symtab_lookupmsg2(const upb_symtab *s, const char *sym, + size_t len) { + upb_value v; + return upb_strtable_lookup2(&s->syms, sym, len, &v) ? + unpack_def(v, UPB_DEFTYPE_MSG) : NULL; +} + +const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) { + upb_value v; + return upb_strtable_lookup(&s->syms, sym, &v) ? + unpack_def(v, UPB_DEFTYPE_ENUM) : NULL; +} + +const upb_filedef *upb_symtab_lookupfile(const upb_symtab *s, const char *name) { + upb_value v; + return upb_strtable_lookup(&s->files, name, &v) ? upb_value_getconstptr(v) + : NULL; +} + +const upb_filedef *upb_symtab_lookupfile2( + const upb_symtab *s, const char *name, size_t len) { + upb_value v; + return upb_strtable_lookup2(&s->files, name, len, &v) ? + upb_value_getconstptr(v) : NULL; +} + +int upb_symtab_filecount(const upb_symtab *s) { + return (int)upb_strtable_count(&s->files); +} + +/* Code to build defs from descriptor protos. *********************************/ + +/* There is a question of how much validation to do here. It will be difficult + * to perfectly match the amount of validation performed by proto2. But since + * this code is used to directly build defs from Ruby (for example) we do need + * to validate important constraints like uniqueness of names and numbers. */ + +#define CHK_OOM(x) if (!(x)) { symtab_oomerr(ctx); } + +typedef struct { + upb_symtab *symtab; + upb_filedef *file; /* File we are building. */ + upb_arena *file_arena; /* Allocate defs here. */ + upb_alloc *alloc; /* Alloc of file_arena, for tables. */ + const upb_msglayout **layouts; /* NULL if we should build layouts. */ + upb_status *status; /* Record errors here. */ + jmp_buf err; /* longjmp() on error. */ +} symtab_addctx; + +UPB_NORETURN UPB_NOINLINE +static void symtab_errf(symtab_addctx *ctx, const char *fmt, ...) { + va_list argp; + va_start(argp, fmt); + upb_status_vseterrf(ctx->status, fmt, argp); + va_end(argp); + longjmp(ctx->err, 1); +} + +UPB_NORETURN UPB_NOINLINE +static void symtab_oomerr(symtab_addctx *ctx) { + upb_status_setoom(ctx->status); + longjmp(ctx->err, 1); +} + +void *symtab_alloc(symtab_addctx *ctx, size_t bytes) { + void *ret = upb_arena_malloc(ctx->file_arena, bytes); + if (!ret) symtab_oomerr(ctx); + return ret; +} + +static void check_ident(symtab_addctx *ctx, upb_strview name, bool full) { + const char *str = name.data; + size_t len = name.size; + bool start = true; + size_t i; + for (i = 0; i < len; i++) { + char c = str[i]; + if (c == '.') { + if (start || !full) { + symtab_errf(ctx, "invalid name: unexpected '.' (%.*s)", (int)len, str); + } + start = true; + } else if (start) { + if (!upb_isletter(c)) { + symtab_errf( + ctx, + "invalid name: path components must start with a letter (%.*s)", + (int)len, str); + } + start = false; + } else { + if (!upb_isalphanum(c)) { + symtab_errf(ctx, "invalid name: non-alphanumeric character (%.*s)", + (int)len, str); + } + } + } + if (start) { + symtab_errf(ctx, "invalid name: empty part (%.*s)", (int)len, str); + } +} static size_t div_round_up(size_t n, size_t d) { return (n + d - 1) / d; @@ -932,7 +1022,7 @@ static void assign_layout_indices(const upb_msgdef *m, upb_msglayout_field *fiel /* This function is the dynamic equivalent of message_layout.{cc,h} in upbc. * It computes a dynamic layout for all of the fields in |m|. */ -static bool make_layout(const upb_symtab *symtab, const upb_msgdef *m) { +static void make_layout(symtab_addctx *ctx, const upb_msgdef *m) { upb_msglayout *l = (upb_msglayout*)m->layout; upb_msg_field_iter it; upb_msg_oneof_iter oit; @@ -940,18 +1030,11 @@ static bool make_layout(const upb_symtab *symtab, const upb_msgdef *m) { size_t submsg_count = m->submsg_field_count; const upb_msglayout **submsgs; upb_msglayout_field *fields; - upb_alloc *alloc = upb_arena_alloc(symtab->arena); memset(l, 0, sizeof(*l) + sizeof(_upb_fasttable_entry)); - fields = upb_malloc(alloc, upb_msgdef_numfields(m) * sizeof(*fields)); - submsgs = upb_malloc(alloc, submsg_count * sizeof(*submsgs)); - - if ((!fields && upb_msgdef_numfields(m)) || - (!submsgs && submsg_count)) { - /* OOM. */ - return false; - } + fields = symtab_alloc(ctx, upb_msgdef_numfields(m) * sizeof(*fields)); + submsgs = symtab_alloc(ctx, submsg_count * sizeof(*submsgs)); l->field_count = upb_msgdef_numfields(m); l->fields = fields; @@ -987,7 +1070,7 @@ static bool make_layout(const upb_symtab *symtab, const upb_msgdef *m) { l->field_count = 2; l->size = 2 * sizeof(upb_strview); l->size = UPB_ALIGN_UP(l->size, 8); - return true; + return; } /* Allocate data offsets in three stages: @@ -1099,31 +1182,52 @@ static bool make_layout(const upb_symtab *symtab, const upb_msgdef *m) { /* Sort fields by number. */ qsort(fields, upb_msgdef_numfields(m), sizeof(*fields), field_number_cmp); assign_layout_indices(m, fields); - - return true; } -/* Code to build defs from descriptor protos. *********************************/ +static void assign_msg_indices(symtab_addctx *ctx, upb_msgdef *m) { + /* Sort fields. upb internally relies on UPB_TYPE_MESSAGE fields having the + * lowest indexes, but we do not publicly guarantee this. */ + upb_msg_field_iter j; + int i; + uint32_t selector; + int n = upb_msgdef_numfields(m); + upb_fielddef **fields; -/* There is a question of how much validation to do here. It will be difficult - * to perfectly match the amount of validation performed by proto2. But since - * this code is used to directly build defs from Ruby (for example) we do need - * to validate important constraints like uniqueness of names and numbers. */ + if (n == 0) { + m->selector_count = UPB_STATIC_SELECTOR_COUNT; + m->submsg_field_count = 0; + return; + } -#define CHK(x) if (!(x)) { return false; } -#define CHK_OOM(x) if (!(x)) { upb_status_setoom(ctx->status); return false; } + fields = upb_gmalloc(n * sizeof(*fields)); -typedef struct { - const upb_symtab *symtab; - upb_filedef *file; /* File we are building. */ - upb_alloc *alloc; /* Allocate defs here. */ - upb_alloc *tmp; /* Alloc for addtab and any other tmp data. */ - upb_strtable *addtab; /* full_name -> packed def ptr for new defs */ - const upb_msglayout **layouts; /* NULL if we should build layouts. */ - upb_status *status; /* Record errors here. */ -} symtab_addctx; + m->submsg_field_count = 0; + for(i = 0, upb_msg_field_begin(&j, m); + !upb_msg_field_done(&j); + upb_msg_field_next(&j), i++) { + upb_fielddef *f = upb_msg_iter_field(&j); + UPB_ASSERT(f->msgdef == m); + if (upb_fielddef_issubmsg(f)) { + m->submsg_field_count++; + } + fields[i] = f; + } + + qsort(fields, n, sizeof(*fields), cmp_fields); + + selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count; + for (i = 0; i < n; i++) { + upb_fielddef *f = fields[i]; + f->index_ = i; + f->selector_base = selector + upb_handlers_selectorbaseoffset(f); + selector += upb_handlers_selectorcount(f); + } + m->selector_count = selector; -static char* strviewdup(const symtab_addctx *ctx, upb_strview view) { + upb_gfree(fields); +} + +static char *strviewdup(symtab_addctx *ctx, upb_strview view) { return upb_strdup2(view.data, view.size, ctx->alloc); } @@ -1135,13 +1239,12 @@ static bool streql_view(upb_strview view, const char *b) { return streql2(view.data, view.size, b); } -static const char *makefullname(const symtab_addctx *ctx, const char *prefix, +static const char *makefullname(symtab_addctx *ctx, const char *prefix, upb_strview name) { if (prefix) { /* ret = prefix + '.' + name; */ size_t n = strlen(prefix); - char *ret = upb_malloc(ctx->alloc, n + name.size + 2); - CHK_OOM(ret); + char *ret = symtab_alloc(ctx, n + name.size + 2); strcpy(ret, prefix); ret[n] = '.'; memcpy(&ret[n + 1], name.data, name.size); @@ -1152,7 +1255,7 @@ static const char *makefullname(const symtab_addctx *ctx, const char *prefix, } } -static bool finalize_oneofs(symtab_addctx *ctx, upb_msgdef *m) { +static void finalize_oneofs(symtab_addctx *ctx, upb_msgdef *m) { int i; int synthetic_count = 0; upb_oneofdef *mutable_oneofs = (upb_oneofdef*)m->oneofs; @@ -1161,22 +1264,18 @@ static bool finalize_oneofs(symtab_addctx *ctx, upb_msgdef *m) { upb_oneofdef *o = &mutable_oneofs[i]; if (o->synthetic && o->field_count != 1) { - upb_status_seterrf( - ctx->status, "Synthetic oneofs must have one field, not %d: %s", - o->field_count, upb_oneofdef_name(o)); - return false; + symtab_errf(ctx, "Synthetic oneofs must have one field, not %d: %s", + o->field_count, upb_oneofdef_name(o)); } if (o->synthetic) { synthetic_count++; } else if (synthetic_count != 0) { - upb_status_seterrf( - ctx->status, "Synthetic oneofs must be after all other oneofs: %s", - upb_oneofdef_name(o)); - return false; + symtab_errf(ctx, "Synthetic oneofs must be after all other oneofs: %s", + upb_oneofdef_name(o)); } - o->fields = upb_malloc(ctx->alloc, sizeof(upb_fielddef*) * o->field_count); + o->fields = symtab_alloc(ctx, sizeof(upb_fielddef *) * o->field_count); o->field_count = 0; } @@ -1189,7 +1288,6 @@ static bool finalize_oneofs(symtab_addctx *ctx, upb_msgdef *m) { } m->real_oneof_count = m->oneof_count - synthetic_count; - return true; } size_t getjsonname(const char *name, char *buf, size_t len) { @@ -1230,76 +1328,57 @@ size_t getjsonname(const char *name, char *buf, size_t len) { #undef WRITE } -static char* makejsonname(const char* name, upb_alloc *alloc) { +static char* makejsonname(symtab_addctx *ctx, const char* name) { size_t size = getjsonname(name, NULL, 0); - char* json_name = upb_malloc(alloc, size); + char* json_name = symtab_alloc(ctx, size); getjsonname(name, json_name, size); return json_name; } -static bool symtab_add(const symtab_addctx *ctx, const char *name, - upb_value v) { - upb_value tmp; - if (upb_strtable_lookup(ctx->addtab, name, &tmp) || - upb_strtable_lookup(&ctx->symtab->syms, name, &tmp)) { - upb_status_seterrf(ctx->status, "duplicate symbol '%s'", name); - return false; +static void symtab_add(symtab_addctx *ctx, const char *name, upb_value v) { + if (upb_strtable_lookup(&ctx->symtab->syms, name, NULL)) { + symtab_errf(ctx, "duplicate symbol '%s'", name); } - - CHK_OOM(upb_strtable_insert3(ctx->addtab, name, strlen(name), v, ctx->tmp)); - return true; + upb_alloc *alloc = upb_arena_alloc(ctx->symtab->arena); + size_t len = strlen(name); + CHK_OOM(upb_strtable_insert3(&ctx->symtab->syms, name, len, v, alloc)); } /* Given a symbol and the base symbol inside which it is defined, find the * symbol's definition in t. */ -static bool resolvename(const upb_strtable *t, const upb_fielddef *f, - const char *base, upb_strview sym, - upb_deftype_t type, upb_status *status, - const void **def) { - if(sym.size == 0) return false; +static const void *symtab_resolve(symtab_addctx *ctx, const upb_fielddef *f, + const char *base, upb_strview sym, + upb_deftype_t type) { + const upb_strtable *t = &ctx->symtab->syms; + if(sym.size == 0) goto notfound; if(sym.data[0] == '.') { /* Symbols starting with '.' are absolute, so we do a single lookup. * Slice to omit the leading '.' */ upb_value v; if (!upb_strtable_lookup2(t, sym.data + 1, sym.size - 1, &v)) { - return false; + goto notfound; } - *def = unpack_def(v, type); - - if (!*def) { - upb_status_seterrf(status, - "type mismatch when resolving field %s, name %s", - f->full_name, sym.data); - return false; + const void *ret = unpack_def(v, type); + if (!ret) { + symtab_errf(ctx, "type mismatch when resolving field %s, name %s", + f->full_name, sym.data); } - - return true; + return ret; } else { /* Remove components from base until we find an entry or run out. * TODO: This branch is totally broken, but currently not used. */ (void)base; UPB_ASSERT(false); - return false; + goto notfound; } -} -const void *symtab_resolve(const symtab_addctx *ctx, const upb_fielddef *f, - const char *base, upb_strview sym, - upb_deftype_t type) { - const void *ret; - if (!resolvename(ctx->addtab, f, base, sym, type, ctx->status, &ret) && - !resolvename(&ctx->symtab->syms, f, base, sym, type, ctx->status, &ret)) { - if (upb_ok(ctx->status)) { - upb_status_seterrf(ctx->status, "couldn't resolve name '%s'", sym.data); - } - return false; - } - return ret; +notfound: + symtab_errf(ctx, "couldn't resolve name '%s'", sym.data); } -static bool create_oneofdef( - const symtab_addctx *ctx, upb_msgdef *m, +static void create_oneofdef( + symtab_addctx *ctx, upb_msgdef *m, const google_protobuf_OneofDescriptorProto *oneof_proto) { upb_oneofdef *o; upb_strview name = google_protobuf_OneofDescriptorProto_name(oneof_proto); @@ -1312,16 +1391,23 @@ static bool create_oneofdef( o->synthetic = false; v = pack_def(o, UPB_DEFTYPE_ONEOF); - CHK_OOM(symtab_add(ctx, o->full_name, v)); + symtab_add(ctx, o->full_name, v); CHK_OOM(upb_strtable_insert3(&m->ntof, name.data, name.size, v, ctx->alloc)); CHK_OOM(upb_inttable_init2(&o->itof, UPB_CTYPE_CONSTPTR, ctx->alloc)); - CHK_OOM(upb_strtable_init2(&o->ntof, UPB_CTYPE_CONSTPTR, ctx->alloc)); + CHK_OOM(upb_strtable_init2(&o->ntof, UPB_CTYPE_CONSTPTR, 4, ctx->alloc)); +} - return true; +static str_t *newstr(symtab_addctx *ctx, const char *data, size_t len) { + str_t *ret = symtab_alloc(ctx, sizeof(*ret) + len); + if (!ret) return NULL; + ret->len = len; + if (len) memcpy(ret->str, data, len); + ret->str[len] = '\0'; + return ret; } -static bool parse_default(const symtab_addctx *ctx, const char *str, size_t len, +static void parse_default(symtab_addctx *ctx, const char *str, size_t len, upb_fielddef *f) { char *end; char nullz[64]; @@ -1336,7 +1422,7 @@ static bool parse_default(const symtab_addctx *ctx, const char *str, size_t len, case UPB_TYPE_FLOAT: /* Standard C number parsing functions expect null-terminated strings. */ if (len >= sizeof(nullz) - 1) { - return false; + symtab_errf(ctx, "Default too long: %.*s", (int)len, str); } memcpy(nullz, str, len); nullz[len] = '\0'; @@ -1349,47 +1435,61 @@ static bool parse_default(const symtab_addctx *ctx, const char *str, size_t len, switch (upb_fielddef_type(f)) { case UPB_TYPE_INT32: { long val = strtol(str, &end, 0); - CHK(val <= INT32_MAX && val >= INT32_MIN && errno != ERANGE && !*end); + if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end) { + goto invalid; + } f->defaultval.sint = val; break; } case UPB_TYPE_ENUM: { const upb_enumdef *e = f->sub.enumdef; int32_t val; - CHK(upb_enumdef_ntoi(e, str, len, &val)); + if (!upb_enumdef_ntoi(e, str, len, &val)) { + goto invalid; + } f->defaultval.sint = val; break; } case UPB_TYPE_INT64: { /* XXX: Need to write our own strtoll, since it's not available in c89. */ int64_t val = strtol(str, &end, 0); - CHK(val <= INT64_MAX && val >= INT64_MIN && errno != ERANGE && !*end); + if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end) { + goto invalid; + } f->defaultval.sint = val; break; } case UPB_TYPE_UINT32: { unsigned long val = strtoul(str, &end, 0); - CHK(val <= UINT32_MAX && errno != ERANGE && !*end); + if (val > UINT32_MAX || errno == ERANGE || *end) { + goto invalid; + } f->defaultval.uint = val; break; } case UPB_TYPE_UINT64: { /* XXX: Need to write our own strtoull, since it's not available in c89. */ uint64_t val = strtoul(str, &end, 0); - CHK(val <= UINT64_MAX && errno != ERANGE && !*end); + if (val > UINT64_MAX || errno == ERANGE || *end) { + goto invalid; + } f->defaultval.uint = val; break; } case UPB_TYPE_DOUBLE: { double val = strtod(str, &end); - CHK(errno != ERANGE && !*end); + if (errno == ERANGE || *end) { + goto invalid; + } f->defaultval.dbl = val; break; } case UPB_TYPE_FLOAT: { /* XXX: Need to write our own strtof, since it's not available in c89. */ float val = strtod(str, &end); - CHK(errno != ERANGE && !*end); + if (errno == ERANGE || *end) { + goto invalid; + } f->defaultval.flt = val; break; } @@ -1399,25 +1499,30 @@ static bool parse_default(const symtab_addctx *ctx, const char *str, size_t len, } else if (streql2(str, len, "true")) { f->defaultval.boolean = true; } else { - return false; } break; } case UPB_TYPE_STRING: - f->defaultval.str = newstr(ctx->alloc, str, len); + f->defaultval.str = newstr(ctx, str, len); break; case UPB_TYPE_BYTES: /* XXX: need to interpret the C-escaped value. */ - f->defaultval.str = newstr(ctx->alloc, str, len); + f->defaultval.str = newstr(ctx, str, len); break; case UPB_TYPE_MESSAGE: /* Should not have a default value. */ - return false; + symtab_errf(ctx, "Message should not have a default (%s)", + upb_fielddef_fullname(f)); } - return true; + + return; + +invalid: + symtab_errf(ctx, "Invalid default '%.*s' for field %f", (int)len, str, + upb_fielddef_fullname(f)); } -static void set_default_default(const symtab_addctx *ctx, upb_fielddef *f) { +static void set_default_default(symtab_addctx *ctx, upb_fielddef *f) { switch (upb_fielddef_type(f)) { case UPB_TYPE_INT32: case UPB_TYPE_INT64: @@ -1434,7 +1539,7 @@ static void set_default_default(const symtab_addctx *ctx, upb_fielddef *f) { break; case UPB_TYPE_STRING: case UPB_TYPE_BYTES: - f->defaultval.str = newstr(ctx->alloc, NULL, 0); + f->defaultval.str = newstr(ctx, NULL, 0); break; case UPB_TYPE_BOOL: f->defaultval.boolean = false; @@ -1444,8 +1549,8 @@ static void set_default_default(const symtab_addctx *ctx, upb_fielddef *f) { } } -static bool create_fielddef( - const symtab_addctx *ctx, const char *prefix, upb_msgdef *m, +static void create_fielddef( + symtab_addctx *ctx, const char *prefix, upb_msgdef *m, const google_protobuf_FieldDescriptorProto *field_proto) { upb_alloc *alloc = ctx->alloc; upb_fielddef *f; @@ -1457,12 +1562,11 @@ static bool create_fielddef( uint32_t field_number; if (!google_protobuf_FieldDescriptorProto_has_name(field_proto)) { - upb_status_seterrmsg(ctx->status, "field has no name"); - return false; + symtab_errf(ctx, "field has no name (%s)", upb_msgdef_fullname(m)); } name = google_protobuf_FieldDescriptorProto_name(field_proto); - CHK(upb_isident(name, false, ctx->status)); + check_ident(ctx, name, false); full_name = makefullname(ctx, prefix, name); shortname = shortdefname(full_name); @@ -1470,14 +1574,13 @@ static bool create_fielddef( json_name = strviewdup( ctx, google_protobuf_FieldDescriptorProto_json_name(field_proto)); } else { - json_name = makejsonname(shortname, ctx->alloc); + json_name = makejsonname(ctx, shortname); } field_number = google_protobuf_FieldDescriptorProto_number(field_proto); if (field_number == 0 || field_number > UPB_MAX_FIELDNUMBER) { - upb_status_seterrf(ctx->status, "invalid field number (%u)", field_number); - return false; + symtab_errf(ctx, "invalid field number (%u)", field_number); } if (m) { @@ -1490,19 +1593,15 @@ static bool create_fielddef( f->is_extension_ = false; if (upb_strtable_lookup(&m->ntof, shortname, NULL)) { - upb_status_seterrf(ctx->status, "duplicate field name (%s)", shortname); - return false; + symtab_errf(ctx, "duplicate field name (%s)", shortname); } if (upb_strtable_lookup(&m->ntof, json_name, NULL)) { - upb_status_seterrf(ctx->status, "duplicate json_name (%s)", json_name); - return false; + symtab_errf(ctx, "duplicate json_name (%s)", json_name); } if (upb_inttable_lookup(&m->itof, field_number, NULL)) { - upb_status_seterrf(ctx->status, "duplicate field number (%u)", - field_number); - return false; + symtab_errf(ctx, "duplicate field number (%u)", field_number); } field_v = pack_def(f, UPB_DEFTYPE_FIELD); @@ -1536,7 +1635,7 @@ static bool create_fielddef( /* extension field. */ f = (upb_fielddef*)&ctx->file->exts[ctx->file->ext_count++]; f->is_extension_ = true; - CHK_OOM(symtab_add(ctx, full_name, pack_def(f, UPB_DEFTYPE_FIELD))); + symtab_add(ctx, full_name, pack_def(f, UPB_DEFTYPE_FIELD)); } f->full_name = full_name; @@ -1555,9 +1654,7 @@ static bool create_fielddef( f->sub.unresolved = field_proto; if (f->label_ == UPB_LABEL_REQUIRED && f->file->syntax == UPB_SYNTAX_PROTO3) { - upb_status_seterrf(ctx->status, "proto3 fields cannot be required (%s)", - f->full_name); - return false; + symtab_errf(ctx, "proto3 fields cannot be required (%s)", f->full_name); } if (google_protobuf_FieldDescriptorProto_has_oneof_index(field_proto)) { @@ -1567,23 +1664,17 @@ static bool create_fielddef( upb_value v = upb_value_constptr(f); if (upb_fielddef_label(f) != UPB_LABEL_OPTIONAL) { - upb_status_seterrf(ctx->status, - "fields in oneof must have OPTIONAL label (%s)", - f->full_name); - return false; + symtab_errf(ctx, "fields in oneof must have OPTIONAL label (%s)", + f->full_name); } if (!m) { - upb_status_seterrf(ctx->status, - "oneof_index provided for extension field (%s)", - f->full_name); - return false; + symtab_errf(ctx, "oneof_index provided for extension field (%s)", + f->full_name); } if (oneof_index >= m->oneof_count) { - upb_status_seterrf(ctx->status, "oneof_index out of range (%s)", - f->full_name); - return false; + symtab_errf(ctx, "oneof_index out of range (%s)", f->full_name); } oneof = (upb_oneofdef*)&m->oneofs[oneof_index]; @@ -1593,15 +1684,13 @@ static bool create_fielddef( if (f->proto3_optional_) { oneof->synthetic = true; } - CHK(upb_inttable_insert2(&oneof->itof, f->number_, v, alloc)); - CHK(upb_strtable_insert3(&oneof->ntof, name.data, name.size, v, alloc)); + CHK_OOM(upb_inttable_insert2(&oneof->itof, f->number_, v, alloc)); + CHK_OOM(upb_strtable_insert3(&oneof->ntof, name.data, name.size, v, alloc)); } else { f->oneof = NULL; if (f->proto3_optional_) { - upb_status_seterrf(ctx->status, - "field with proto3_optional was not in a oneof (%s)", - f->full_name); - return false; + symtab_errf(ctx, "field with proto3_optional was not in a oneof (%s)", + f->full_name); } } @@ -1621,12 +1710,10 @@ static bool create_fielddef( } else { f->lazy_ = false; } - - return true; } -static bool create_enumdef( - const symtab_addctx *ctx, const char *prefix, +static void create_enumdef( + symtab_addctx *ctx, const char *prefix, const google_protobuf_EnumDescriptorProto *enum_proto) { upb_enumdef *e; const google_protobuf_EnumValueDescriptorProto *const *values; @@ -1634,25 +1721,22 @@ static bool create_enumdef( size_t i, n; name = google_protobuf_EnumDescriptorProto_name(enum_proto); - CHK(upb_isident(name, false, ctx->status)); + check_ident(ctx, name, false); e = (upb_enumdef*)&ctx->file->enums[ctx->file->enum_count++]; e->full_name = makefullname(ctx, prefix, name); - CHK_OOM(symtab_add(ctx, e->full_name, pack_def(e, UPB_DEFTYPE_ENUM))); + symtab_add(ctx, e->full_name, pack_def(e, UPB_DEFTYPE_ENUM)); - CHK_OOM(upb_strtable_init2(&e->ntoi, UPB_CTYPE_INT32, ctx->alloc)); + values = google_protobuf_EnumDescriptorProto_value(enum_proto, &n); + CHK_OOM(upb_strtable_init2(&e->ntoi, UPB_CTYPE_INT32, n, ctx->alloc)); CHK_OOM(upb_inttable_init2(&e->iton, UPB_CTYPE_CSTR, ctx->alloc)); e->file = ctx->file; e->defaultval = 0; - values = google_protobuf_EnumDescriptorProto_value(enum_proto, &n); - if (n == 0) { - upb_status_seterrf(ctx->status, - "enums must contain at least one value (%s)", - e->full_name); - return false; + symtab_errf(ctx, "enums must contain at least one value (%s)", + e->full_name); } for (i = 0; i < n; i++) { @@ -1663,15 +1747,12 @@ static bool create_enumdef( upb_value v = upb_value_int32(num); if (i == 0 && e->file->syntax == UPB_SYNTAX_PROTO3 && num != 0) { - upb_status_seterrf(ctx->status, - "for proto3, the first enum value must be zero (%s)", - e->full_name); - return false; + symtab_errf(ctx, "for proto3, the first enum value must be zero (%s)", + e->full_name); } if (upb_strtable_lookup(&e->ntoi, name2, NULL)) { - upb_status_seterrf(ctx->status, "duplicate enum label '%s'", name2); - return false; + symtab_errf(ctx, "duplicate enum label '%s'", name2); } CHK_OOM(name2) @@ -1685,11 +1766,9 @@ static bool create_enumdef( } upb_inttable_compact2(&e->iton, ctx->alloc); - - return true; } -static bool create_msgdef(symtab_addctx *ctx, const char *prefix, +static void create_msgdef(symtab_addctx *ctx, const char *prefix, const google_protobuf_DescriptorProto *msg_proto) { upb_msgdef *m; const google_protobuf_MessageOptions *options; @@ -1697,18 +1776,22 @@ static bool create_msgdef(symtab_addctx *ctx, const char *prefix, const google_protobuf_FieldDescriptorProto *const *fields; const google_protobuf_EnumDescriptorProto *const *enums; const google_protobuf_DescriptorProto *const *msgs; - size_t i, n; + size_t i, n_oneof, n_field, n; upb_strview name; name = google_protobuf_DescriptorProto_name(msg_proto); - CHK(upb_isident(name, false, ctx->status)); + check_ident(ctx, name, false); m = (upb_msgdef*)&ctx->file->msgs[ctx->file->msg_count++]; m->full_name = makefullname(ctx, prefix, name); - CHK_OOM(symtab_add(ctx, m->full_name, pack_def(m, UPB_DEFTYPE_MSG))); + symtab_add(ctx, m->full_name, pack_def(m, UPB_DEFTYPE_MSG)); + + oneofs = google_protobuf_DescriptorProto_oneof_decl(msg_proto, &n_oneof); + fields = google_protobuf_DescriptorProto_field(msg_proto, &n_field); CHK_OOM(upb_inttable_init2(&m->itof, UPB_CTYPE_CONSTPTR, ctx->alloc)); - CHK_OOM(upb_strtable_init2(&m->ntof, UPB_CTYPE_CONSTPTR, ctx->alloc)); + CHK_OOM(upb_strtable_init2(&m->ntof, UPB_CTYPE_CONSTPTR, n_oneof + n_field, + ctx->alloc)); m->file = ctx->file; m->map_entry = false; @@ -1724,26 +1807,24 @@ static bool create_msgdef(symtab_addctx *ctx, const char *prefix, ctx->layouts++; } else { /* Allocate now (to allow cross-linking), populate later. */ - m->layout = upb_malloc(ctx->alloc, - sizeof(*m->layout) + sizeof(_upb_fasttable_entry)); + m->layout = symtab_alloc( + ctx, sizeof(*m->layout) + sizeof(_upb_fasttable_entry)); } - oneofs = google_protobuf_DescriptorProto_oneof_decl(msg_proto, &n); m->oneof_count = 0; - m->oneofs = upb_malloc(ctx->alloc, sizeof(*m->oneofs) * n); - for (i = 0; i < n; i++) { - CHK(create_oneofdef(ctx, m, oneofs[i])); + m->oneofs = symtab_alloc(ctx, sizeof(*m->oneofs) * n_oneof); + for (i = 0; i < n_oneof; i++) { + create_oneofdef(ctx, m, oneofs[i]); } - fields = google_protobuf_DescriptorProto_field(msg_proto, &n); m->field_count = 0; - m->fields = upb_malloc(ctx->alloc, sizeof(*m->fields) * n); - for (i = 0; i < n; i++) { - CHK(create_fielddef(ctx, m->full_name, m, fields[i])); + m->fields = symtab_alloc(ctx, sizeof(*m->fields) * n_field); + for (i = 0; i < n_field; i++) { + create_fielddef(ctx, m->full_name, m, fields[i]); } - CHK(assign_msg_indices(m, ctx->status)); - CHK(finalize_oneofs(ctx, m)); + assign_msg_indices(ctx, m); + finalize_oneofs(ctx, m); assign_msg_wellknowntype(m); upb_inttable_compact2(&m->itof, ctx->alloc); @@ -1751,93 +1832,78 @@ static bool create_msgdef(symtab_addctx *ctx, const char *prefix, enums = google_protobuf_DescriptorProto_enum_type(msg_proto, &n); for (i = 0; i < n; i++) { - CHK(create_enumdef(ctx, m->full_name, enums[i])); + create_enumdef(ctx, m->full_name, enums[i]); } msgs = google_protobuf_DescriptorProto_nested_type(msg_proto, &n); for (i = 0; i < n; i++) { - CHK(create_msgdef(ctx, m->full_name, msgs[i])); + create_msgdef(ctx, m->full_name, msgs[i]); } - - return true; } -typedef struct { - int msg_count; - int enum_count; - int ext_count; -} decl_counts; - static void count_types_in_msg(const google_protobuf_DescriptorProto *msg_proto, - decl_counts *counts) { + upb_filedef *file) { const google_protobuf_DescriptorProto *const *msgs; size_t i, n; - counts->msg_count++; + file->msg_count++; msgs = google_protobuf_DescriptorProto_nested_type(msg_proto, &n); for (i = 0; i < n; i++) { - count_types_in_msg(msgs[i], counts); + count_types_in_msg(msgs[i], file); } google_protobuf_DescriptorProto_enum_type(msg_proto, &n); - counts->enum_count += n; + file->enum_count += n; google_protobuf_DescriptorProto_extension(msg_proto, &n); - counts->ext_count += n; + file->ext_count += n; } static void count_types_in_file( const google_protobuf_FileDescriptorProto *file_proto, - decl_counts *counts) { + upb_filedef *file) { const google_protobuf_DescriptorProto *const *msgs; size_t i, n; msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n); for (i = 0; i < n; i++) { - count_types_in_msg(msgs[i], counts); + count_types_in_msg(msgs[i], file); } google_protobuf_FileDescriptorProto_enum_type(file_proto, &n); - counts->enum_count += n; + file->enum_count += n; google_protobuf_FileDescriptorProto_extension(file_proto, &n); - counts->ext_count += n; + file->ext_count += n; } -static bool resolve_fielddef(const symtab_addctx *ctx, const char *prefix, +static void resolve_fielddef(symtab_addctx *ctx, const char *prefix, upb_fielddef *f) { upb_strview name; const google_protobuf_FieldDescriptorProto *field_proto = f->sub.unresolved; if (f->is_extension_) { if (!google_protobuf_FieldDescriptorProto_has_extendee(field_proto)) { - upb_status_seterrf(ctx->status, - "extension for field '%s' had no extendee", - f->full_name); - return false; + symtab_errf(ctx, "extension for field '%s' had no extendee", + f->full_name); } name = google_protobuf_FieldDescriptorProto_extendee(field_proto); f->msgdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_MSG); - CHK(f->msgdef); } if ((upb_fielddef_issubmsg(f) || f->type_ == UPB_DESCRIPTOR_TYPE_ENUM) && !google_protobuf_FieldDescriptorProto_has_type_name(field_proto)) { - upb_status_seterrf(ctx->status, "field '%s' is missing type name", - f->full_name); - return false; + symtab_errf(ctx, "field '%s' is missing type name", f->full_name); } name = google_protobuf_FieldDescriptorProto_type_name(field_proto); if (upb_fielddef_issubmsg(f)) { f->sub.msgdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_MSG); - CHK(f->sub.msgdef); } else if (f->type_ == UPB_DESCRIPTOR_TYPE_ENUM) { f->sub.enumdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_ENUM); - CHK(f->sub.enumdef); } /* Have to delay resolving of the default value until now because of the enum @@ -1847,54 +1913,36 @@ static bool resolve_fielddef(const symtab_addctx *ctx, const char *prefix, google_protobuf_FieldDescriptorProto_default_value(field_proto); if (f->file->syntax == UPB_SYNTAX_PROTO3) { - upb_status_seterrf(ctx->status, - "proto3 fields cannot have explicit defaults (%s)", - f->full_name); - return false; + symtab_errf(ctx, "proto3 fields cannot have explicit defaults (%s)", + f->full_name); } if (upb_fielddef_issubmsg(f)) { - upb_status_seterrf(ctx->status, - "message fields cannot have explicit defaults (%s)", - f->full_name); - return false; + symtab_errf(ctx, "message fields cannot have explicit defaults (%s)", + f->full_name); } - if (!parse_default(ctx, defaultval.data, defaultval.size, f)) { - upb_status_seterrf(ctx->status, - "couldn't parse default '" UPB_STRVIEW_FORMAT - "' for field (%s)", - UPB_STRVIEW_ARGS(defaultval), f->full_name); - return false; - } + parse_default(ctx, defaultval.data, defaultval.size, f); } else { set_default_default(ctx, f); } - - return true; } -static bool build_filedef( +static void build_filedef( symtab_addctx *ctx, upb_filedef *file, const google_protobuf_FileDescriptorProto *file_proto) { - upb_alloc *alloc = ctx->alloc; const google_protobuf_FileOptions *file_options_proto; const google_protobuf_DescriptorProto *const *msgs; const google_protobuf_EnumDescriptorProto *const *enums; const google_protobuf_FieldDescriptorProto *const *exts; const upb_strview* strs; size_t i, n; - decl_counts counts = {0, 0, 0}; - count_types_in_file(file_proto, &counts); + count_types_in_file(file_proto, file); - file->msgs = upb_malloc(alloc, sizeof(*file->msgs) * counts.msg_count); - file->enums = upb_malloc(alloc, sizeof(*file->enums) * counts.enum_count); - file->exts = upb_malloc(alloc, sizeof(*file->exts) * counts.ext_count); - - CHK_OOM(counts.msg_count == 0 || file->msgs); - CHK_OOM(counts.enum_count == 0 || file->enums); - CHK_OOM(counts.ext_count == 0 || file->exts); + file->msgs = symtab_alloc(ctx, sizeof(*file->msgs) * file->msg_count); + file->enums = symtab_alloc(ctx, sizeof(*file->enums) * file->enum_count); + file->exts = symtab_alloc(ctx, sizeof(*file->exts) * file->ext_count); /* We increment these as defs are added. */ file->msg_count = 0; @@ -1902,8 +1950,7 @@ static bool build_filedef( file->ext_count = 0; if (!google_protobuf_FileDescriptorProto_has_name(file_proto)) { - upb_status_seterrmsg(ctx->status, "File has no name"); - return false; + symtab_errf(ctx, "File has no name"); } file->name = @@ -1914,7 +1961,7 @@ static bool build_filedef( if (google_protobuf_FileDescriptorProto_has_package(file_proto)) { upb_strview package = google_protobuf_FileDescriptorProto_package(file_proto); - CHK(upb_isident(package, true, ctx->status)); + check_ident(ctx, package, true); file->package = strviewdup(ctx, package); } else { file->package = NULL; @@ -1929,9 +1976,8 @@ static bool build_filedef( } else if (streql_view(syntax, "proto3")) { file->syntax = UPB_SYNTAX_PROTO3; } else { - upb_status_seterrf(ctx->status, "Invalid syntax '" UPB_STRVIEW_FORMAT "'", - UPB_STRVIEW_ARGS(syntax)); - return false; + symtab_errf(ctx, "Invalid syntax '" UPB_STRVIEW_FORMAT "'", + UPB_STRVIEW_ARGS(syntax)); } } else { file->syntax = UPB_SYNTAX_PROTO2; @@ -1953,19 +1999,17 @@ static bool build_filedef( /* Verify dependencies. */ strs = google_protobuf_FileDescriptorProto_dependency(file_proto, &n); - file->deps = upb_malloc(alloc, sizeof(*file->deps) * n) ; - CHK_OOM(n == 0 || file->deps); + file->deps = symtab_alloc(ctx, sizeof(*file->deps) * n); for (i = 0; i < n; i++) { upb_strview dep_name = strs[i]; upb_value v; if (!upb_strtable_lookup2(&ctx->symtab->files, dep_name.data, dep_name.size, &v)) { - upb_status_seterrf(ctx->status, - "Depends on file '" UPB_STRVIEW_FORMAT - "', but it has not been loaded", - UPB_STRVIEW_ARGS(dep_name)); - return false; + symtab_errf(ctx, + "Depends on file '" UPB_STRVIEW_FORMAT + "', but it has not been loaded", + UPB_STRVIEW_ARGS(dep_name)); } file->deps[i] = upb_value_getconstptr(v); } @@ -1973,194 +2017,98 @@ static bool build_filedef( /* Create messages. */ msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n); for (i = 0; i < n; i++) { - CHK(create_msgdef(ctx, file->package, msgs[i])); + create_msgdef(ctx, file->package, msgs[i]); } /* Create enums. */ enums = google_protobuf_FileDescriptorProto_enum_type(file_proto, &n); for (i = 0; i < n; i++) { - CHK(create_enumdef(ctx, file->package, enums[i])); + create_enumdef(ctx, file->package, enums[i]); } /* Create extensions. */ exts = google_protobuf_FileDescriptorProto_extension(file_proto, &n); - file->exts = upb_malloc(alloc, sizeof(*file->exts) * n); - CHK_OOM(n == 0 || file->exts); + file->exts = symtab_alloc(ctx, sizeof(*file->exts) * n); for (i = 0; i < n; i++) { - CHK(create_fielddef(ctx, file->package, NULL, exts[i])); + create_fielddef(ctx, file->package, NULL, exts[i]); } /* Now that all names are in the table, build layouts and resolve refs. */ for (i = 0; i < (size_t)file->ext_count; i++) { - CHK(resolve_fielddef(ctx, file->package, (upb_fielddef*)&file->exts[i])); + resolve_fielddef(ctx, file->package, (upb_fielddef*)&file->exts[i]); } for (i = 0; i < (size_t)file->msg_count; i++) { const upb_msgdef *m = &file->msgs[i]; int j; for (j = 0; j < m->field_count; j++) { - CHK(resolve_fielddef(ctx, m->full_name, (upb_fielddef*)&m->fields[j])); + resolve_fielddef(ctx, m->full_name, (upb_fielddef*)&m->fields[j]); } } if (!ctx->layouts) { for (i = 0; i < (size_t)file->msg_count; i++) { const upb_msgdef *m = &file->msgs[i]; - make_layout(ctx->symtab, m); + make_layout(ctx, m); } } +} - return true; - } - -static bool upb_symtab_addtotabs(upb_symtab *s, symtab_addctx *ctx) { - const upb_filedef *file = ctx->file; +static void remove_filedef(upb_symtab *s, upb_filedef *file) { upb_alloc *alloc = upb_arena_alloc(s->arena); - upb_strtable_iter iter; - - CHK_OOM(upb_strtable_insert3(&s->files, file->name, strlen(file->name), - upb_value_constptr(file), alloc)); - - upb_strtable_begin(&iter, ctx->addtab); - for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) { - upb_strview key = upb_strtable_iter_key(&iter); - upb_value value = upb_strtable_iter_value(&iter); - CHK_OOM(upb_strtable_insert3(&s->syms, key.data, key.size, value, alloc)); + int i; + for (i = 0; i < file->msg_count; i++) { + const char *name = file->msgs[i].full_name; + upb_strtable_remove3(&s->syms, name, strlen(name), NULL, alloc); } - - return true; -} - -/* upb_filedef ****************************************************************/ - -const char *upb_filedef_name(const upb_filedef *f) { - return f->name; -} - -const char *upb_filedef_package(const upb_filedef *f) { - return f->package; -} - -const char *upb_filedef_phpprefix(const upb_filedef *f) { - return f->phpprefix; -} - -const char *upb_filedef_phpnamespace(const upb_filedef *f) { - return f->phpnamespace; -} - -upb_syntax_t upb_filedef_syntax(const upb_filedef *f) { - return f->syntax; -} - -int upb_filedef_msgcount(const upb_filedef *f) { - return f->msg_count; -} - -int upb_filedef_depcount(const upb_filedef *f) { - return f->dep_count; -} - -int upb_filedef_enumcount(const upb_filedef *f) { - return f->enum_count; -} - -const upb_filedef *upb_filedef_dep(const upb_filedef *f, int i) { - return i < 0 || i >= f->dep_count ? NULL : f->deps[i]; -} - -const upb_msgdef *upb_filedef_msg(const upb_filedef *f, int i) { - return i < 0 || i >= f->msg_count ? NULL : &f->msgs[i]; -} - -const upb_enumdef *upb_filedef_enum(const upb_filedef *f, int i) { - return i < 0 || i >= f->enum_count ? NULL : &f->enums[i]; -} - -void upb_symtab_free(upb_symtab *s) { - upb_arena_free(s->arena); - upb_gfree(s); -} - -upb_symtab *upb_symtab_new(void) { - upb_symtab *s = upb_gmalloc(sizeof(*s)); - upb_alloc *alloc; - - if (!s) { - return NULL; + for (i = 0; i < file->enum_count; i++) { + const char *name = file->enums[i].full_name; + upb_strtable_remove3(&s->syms, name, strlen(name), NULL, alloc); } - - s->arena = upb_arena_new(); - alloc = upb_arena_alloc(s->arena); - - if (!upb_strtable_init2(&s->syms, UPB_CTYPE_CONSTPTR, alloc) || - !upb_strtable_init2(&s->files, UPB_CTYPE_CONSTPTR, alloc)) { - upb_arena_free(s->arena); - upb_gfree(s); - s = NULL; + for (i = 0; i < file->ext_count; i++) { + const char *name = file->exts[i].full_name; + upb_strtable_remove3(&s->syms, name, strlen(name), NULL, alloc); } - return s; -} - -const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) { - upb_value v; - return upb_strtable_lookup(&s->syms, sym, &v) ? - unpack_def(v, UPB_DEFTYPE_MSG) : NULL; -} - -const upb_msgdef *upb_symtab_lookupmsg2(const upb_symtab *s, const char *sym, - size_t len) { - upb_value v; - return upb_strtable_lookup2(&s->syms, sym, len, &v) ? - unpack_def(v, UPB_DEFTYPE_MSG) : NULL; -} - -const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) { - upb_value v; - return upb_strtable_lookup(&s->syms, sym, &v) ? - unpack_def(v, UPB_DEFTYPE_ENUM) : NULL; -} - -const upb_filedef *upb_symtab_lookupfile(const upb_symtab *s, const char *name) { - upb_value v; - return upb_strtable_lookup(&s->files, name, &v) ? upb_value_getconstptr(v) - : NULL; -} - -const upb_filedef *upb_symtab_lookupfile2( - const upb_symtab *s, const char *name, size_t len) { - upb_value v; - return upb_strtable_lookup2(&s->files, name, len, &v) ? - upb_value_getconstptr(v) : NULL; -} - -int upb_symtab_filecount(const upb_symtab *s) { - return (int)upb_strtable_count(&s->files); } static const upb_filedef *_upb_symtab_addfile( upb_symtab *s, const google_protobuf_FileDescriptorProto *file_proto, const upb_msglayout **layouts, upb_status *status) { - upb_arena *tmparena = upb_arena_new(); - upb_strtable addtab; - upb_alloc *alloc = upb_arena_alloc(s->arena); - upb_filedef *file = upb_malloc(alloc, sizeof(*file)); - bool ok; + upb_arena *file_arena = upb_arena_new(); + upb_filedef *file; symtab_addctx ctx; + if (!file_arena) return NULL; + + file = upb_arena_malloc(file_arena, sizeof(*file)); + if (!file) goto done; + ctx.file = file; ctx.symtab = s; - ctx.alloc = alloc; - ctx.tmp = upb_arena_alloc(tmparena); - ctx.addtab = &addtab; + ctx.file_arena = file_arena; + ctx.alloc = upb_arena_alloc(file_arena); ctx.layouts = layouts; ctx.status = status; - ok = file && upb_strtable_init2(&addtab, UPB_CTYPE_CONSTPTR, ctx.tmp) && - build_filedef(&ctx, file, file_proto) && upb_symtab_addtotabs(s, &ctx); + file->msg_count = 0; + file->enum_count = 0; + file->ext_count = 0; + + if (UPB_UNLIKELY(setjmp(ctx.err))) { + UPB_ASSERT(!upb_ok(status)); + remove_filedef(s, file); + file = NULL; + } else { + build_filedef(&ctx, file, file_proto); + upb_strtable_insert3(&s->files, file->name, strlen(file->name), + upb_value_constptr(file), ctx.alloc); + UPB_ASSERT(upb_ok(status)); + upb_arena_fuse(s->arena, file_arena); + } - upb_arena_free(tmparena); - return ok ? file : NULL; +done: + upb_arena_free(file_arena); + return file; } const upb_filedef *upb_symtab_addfile( @@ -2194,6 +2142,7 @@ bool _upb_symtab_loaddefinit(upb_symtab *s, const upb_def_init *init) { file = google_protobuf_FileDescriptorProto_parse( init->descriptor.data, init->descriptor.size, arena); + s->bytes_loaded += init->descriptor.size; if (!file) { upb_status_seterrf( @@ -2216,5 +2165,8 @@ err: return false; } -#undef CHK +size_t _upb_symtab_bytesloaded(const upb_symtab *s) { + return s->bytes_loaded; +} + #undef CHK_OOM diff --git a/upb/def.h b/upb/def.h index d88c2872ea..6b73ef5be3 100644 --- a/upb/def.h +++ b/upb/def.h @@ -293,6 +293,7 @@ int upb_symtab_filecount(const upb_symtab *s); const upb_filedef *upb_symtab_addfile( upb_symtab *s, const google_protobuf_FileDescriptorProto *file, upb_status *status); +size_t _upb_symtab_bytesloaded(const upb_symtab *s); /* For generated code only: loads a generated descriptor. */ typedef struct upb_def_init { diff --git a/upb/json/parser.rl b/upb/json/parser.rl index ed7eead408..a7d75ff158 100644 --- a/upb/json/parser.rl +++ b/upb/json/parser.rl @@ -2869,7 +2869,7 @@ static upb_json_parsermethod *parsermethod_new(upb_json_codecache *c, upb_byteshandler_setstring(&m->input_handler_, parse, m); upb_byteshandler_setendstr(&m->input_handler_, end, m); - upb_strtable_init2(&m->name_table, UPB_CTYPE_CONSTPTR, alloc); + upb_strtable_init2(&m->name_table, UPB_CTYPE_CONSTPTR, 4, alloc); /* Build name_table */ diff --git a/upb/json_decode.c b/upb/json_decode.c index fb53c3de35..44ee56a857 100644 --- a/upb/json_decode.c +++ b/upb/json_decode.c @@ -396,6 +396,8 @@ static void jsondec_resize(jsondec *d, char **buf, char **end, char **buf_end) { size_t size = UPB_MAX(8, 2 * oldsize); *buf = upb_arena_realloc(d->arena, *buf, len, size); + if (!*buf) jsondec_err(d, "Out of memory"); + *end = *buf + len; *buf_end = *buf + size; } diff --git a/upb/json_encode.h b/upb/json_encode.h index 8396fbf80f..0ad4163cdf 100644 --- a/upb/json_encode.h +++ b/upb/json_encode.h @@ -9,7 +9,7 @@ extern "C" { #endif enum { - /* When set, emits 0/default values. TOOD(haberman): proto3 only? */ + /* When set, emits 0/default values. TODO(haberman): proto3 only? */ UPB_JSONENC_EMITDEFAULTS = 1, /* When set, use normal (snake_caes) field names instead of JSON (camelCase) diff --git a/upb/msg.c b/upb/msg.c index 5f6d1ce170..1dbb8b0fab 100644 --- a/upb/msg.c +++ b/upb/msg.c @@ -11,7 +11,7 @@ static const size_t overhead = sizeof(upb_msg_internal); static const upb_msg_internal *upb_msg_getinternal_const(const upb_msg *msg) { ptrdiff_t size = sizeof(upb_msg_internal); - return UPB_PTR_AT(msg, -size, upb_msg_internal); + return (upb_msg_internal*)((char*)msg - size); } upb_msg *_upb_msg_new(const upb_msglayout *l, upb_arena *a) { @@ -111,13 +111,16 @@ void *_upb_array_resize_fallback(upb_array **arr_ptr, size_t size, bool _upb_array_append_fallback(upb_array **arr_ptr, const void *value, int elem_size_lg2, upb_arena *arena) { upb_array *arr = getorcreate_array(arr_ptr, elem_size_lg2, arena); - size_t elem = arr->len; - char *data; + if (!arr) return false; - if (!arr || !_upb_array_resize(arr, elem + 1, arena)) return false; + size_t elems = arr->len; - data = _upb_array_ptr(arr); - memcpy(data + (elem << elem_size_lg2), value, 1 << elem_size_lg2); + if (!_upb_array_resize(arr, elems + 1, arena)) { + return false; + } + + char *data = _upb_array_ptr(arr); + memcpy(data + (elems << elem_size_lg2), value, 1 << elem_size_lg2); return true; } @@ -130,7 +133,7 @@ upb_map *_upb_map_new(upb_arena *a, size_t key_size, size_t value_size) { return NULL; } - upb_strtable_init2(&map->table, UPB_CTYPE_INT32, upb_arena_alloc(a)); + upb_strtable_init2(&map->table, UPB_CTYPE_INT32, 4, upb_arena_alloc(a)); map->key_size = key_size; map->val_size = value_size; diff --git a/upb/msg.h b/upb/msg.h index 587e895db9..36c57b32cd 100644 --- a/upb/msg.h +++ b/upb/msg.h @@ -110,7 +110,8 @@ UPB_INLINE upb_msg *_upb_msg_new_inl(const upb_msglayout *l, upb_arena *a) { upb_msg *_upb_msg_new(const upb_msglayout *l, upb_arena *a); UPB_INLINE upb_msg_internal *upb_msg_getinternal(upb_msg *msg) { - return UPB_PTR_AT(msg, -sizeof(upb_msg_internal), upb_msg_internal); + ptrdiff_t size = sizeof(upb_msg_internal); + return (upb_msg_internal*)((char*)msg - size); } /* Clears the given message. */ @@ -205,9 +206,11 @@ typedef struct { uintptr_t data; /* Tagged ptr: low 3 bits of ptr are lg2(elem size). */ size_t len; /* Measured in elements. */ size_t size; /* Measured in elements. */ + uint64_t junk; } upb_array; UPB_INLINE const void *_upb_array_constptr(const upb_array *arr) { + UPB_ASSERT((arr->data & 7) <= 4); return (void*)(arr->data & ~(uintptr_t)7); } @@ -222,15 +225,17 @@ UPB_INLINE void *_upb_array_ptr(upb_array *arr) { UPB_INLINE uintptr_t _upb_tag_arrptr(void* ptr, int elem_size_lg2) { UPB_ASSERT(elem_size_lg2 <= 4); + UPB_ASSERT(((uintptr_t)ptr & 7) == 0); return (uintptr_t)ptr | (unsigned)elem_size_lg2; } UPB_INLINE upb_array *_upb_array_new(upb_arena *a, size_t init_size, int elem_size_lg2) { + const size_t arr_size = UPB_ALIGN_UP(sizeof(upb_array), 8); const size_t bytes = sizeof(upb_array) + (init_size << elem_size_lg2); upb_array *arr = (upb_array*)upb_arena_malloc(a, bytes); if (!arr) return NULL; - arr->data = _upb_tag_arrptr(arr + 1, elem_size_lg2); + arr->data = _upb_tag_arrptr(UPB_PTR_AT(arr, arr_size, void), elem_size_lg2); arr->len = 0; arr->size = init_size; return arr; @@ -403,17 +408,17 @@ UPB_INLINE void _upb_map_fromkey(upb_strview key, void* out, size_t size) { } } -UPB_INLINE upb_value _upb_map_tovalue(const void *val, size_t size, - upb_arena *a) { - upb_value ret = {0}; +UPB_INLINE bool _upb_map_tovalue(const void *val, size_t size, upb_value *msgval, + upb_arena *a) { if (size == UPB_MAPTYPE_STRING) { upb_strview *strp = (upb_strview*)upb_arena_malloc(a, sizeof(*strp)); + if (!strp) return false; *strp = *(upb_strview*)val; - ret = upb_value_ptr(strp); + *msgval = upb_value_ptr(strp); } else { - memcpy(&ret, val, size); + memcpy(msgval, val, size); } - return ret; + return true; } UPB_INLINE void _upb_map_fromvalue(upb_value val, void* out, size_t size) { @@ -455,7 +460,8 @@ UPB_INLINE void* _upb_map_next(const upb_map *map, size_t *iter) { UPB_INLINE bool _upb_map_set(upb_map *map, const void *key, size_t key_size, void *val, size_t val_size, upb_arena *arena) { upb_strview strkey = _upb_map_tokey(key, key_size); - upb_value tabval = _upb_map_tovalue(val, val_size, arena); + upb_value tabval = {0}; + if (!_upb_map_tovalue(val, val_size, &tabval, arena)) return false; upb_alloc *a = upb_arena_alloc(arena); /* TODO(haberman): add overwrite operation to minimize number of lookups. */ diff --git a/upb/port.c b/upb/port.c deleted file mode 100644 index 9ecf135167..0000000000 --- a/upb/port.c +++ /dev/null @@ -1,26 +0,0 @@ - -#include "upb/port_def.inc" - -#ifdef UPB_MSVC_VSNPRINTF -/* Visual C++ earlier than 2015 doesn't have standard C99 snprintf and - * vsnprintf. To support them, missing functions are manually implemented - * using the existing secure functions. */ -int msvc_vsnprintf(char* s, size_t n, const char* format, va_list arg) { - if (!s) { - return _vscprintf(format, arg); - } - int ret = _vsnprintf_s(s, n, _TRUNCATE, format, arg); - if (ret < 0) { - ret = _vscprintf(format, arg); - } - return ret; -} - -int msvc_snprintf(char* s, size_t n, const char* format, ...) { - va_list arg; - va_start(arg, format); - int ret = msvc_vsnprintf(s, n, format, arg); - va_end(arg); - return ret; -} -#endif diff --git a/upb/reflection.c b/upb/reflection.c index 738bb186ec..fd8623253a 100644 --- a/upb/reflection.c +++ b/upb/reflection.c @@ -96,20 +96,17 @@ bool upb_msg_has(const upb_msg *msg, const upb_fielddef *f) { const upb_fielddef *upb_msg_whichoneof(const upb_msg *msg, const upb_oneofdef *o) { - upb_oneof_iter i; - const upb_fielddef *f; - const upb_msglayout_field *field; - const upb_msgdef *m = upb_oneofdef_containingtype(o); - uint32_t oneof_case; - - /* This is far from optimal. */ - upb_oneof_begin(&i, o); - if (upb_oneof_done(&i)) return false; - f = upb_oneof_iter_field(&i); - field = upb_fielddef_layout(f); - oneof_case = _upb_getoneofcase_field(msg, field); - - return oneof_case ? upb_msgdef_itof(m, oneof_case) : NULL; + const upb_fielddef *f = upb_oneofdef_field(o, 0); + if (upb_oneofdef_issynthetic(o)) { + UPB_ASSERT(upb_oneofdef_fieldcount(o) == 1); + return upb_msg_has(msg, f) ? f : NULL; + } else { + const upb_msglayout_field *field = upb_fielddef_layout(f); + uint32_t oneof_case = _upb_getoneofcase_field(msg, field); + f = oneof_case ? upb_oneofdef_itof(o, oneof_case) : NULL; + UPB_ASSERT((f != NULL) == (oneof_case != 0)); + return f; + } } upb_msgval upb_msg_get(const upb_msg *msg, const upb_fielddef *f) { diff --git a/upb/table.c b/upb/table.c index 34a20530d8..4b1cf9b466 100644 --- a/upb/table.c +++ b/upb/table.c @@ -4,10 +4,12 @@ ** Implementation is heavily inspired by Lua's ltable.c. */ -#include "upb/table.int.h" - #include +#include "third_party/wyhash/wyhash.h" +#include "upb/table.int.h" + +/* Must be last. */ #include "upb/port_def.inc" #define UPB_MAXARRSIZE 16 /* 64k. */ @@ -87,11 +89,7 @@ static upb_tabent *mutable_entries(upb_table *t) { } static bool isfull(upb_table *t) { - if (upb_table_size(t) == 0) { - return true; - } else { - return ((double)(t->count + 1) / upb_table_size(t)) > MAX_LOAD; - } + return t->count == t->max_count; } static bool init(upb_table *t, uint8_t size_lg2, upb_alloc *a) { @@ -100,6 +98,7 @@ static bool init(upb_table *t, uint8_t size_lg2, upb_alloc *a) { t->count = 0; t->size_lg2 = size_lg2; t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0; + t->max_count = upb_table_size(t) * MAX_LOAD; bytes = upb_table_size(t) * sizeof(upb_tabent); if (bytes > 0) { t->entries = upb_malloc(a, bytes); @@ -115,9 +114,17 @@ static void uninit(upb_table *t, upb_alloc *a) { upb_free(a, mutable_entries(t)); } -static upb_tabent *emptyent(upb_table *t) { - upb_tabent *e = mutable_entries(t) + upb_table_size(t); - while (1) { if (upb_tabent_isempty(--e)) return e; UPB_ASSERT(e > t->entries); } +static upb_tabent *emptyent(upb_table *t, upb_tabent *e) { + upb_tabent *begin = mutable_entries(t); + upb_tabent *end = begin + upb_table_size(t); + for (e = e + 1; e < end; e++) { + if (upb_tabent_isempty(e)) return e; + } + for (e = begin; e < end; e++) { + if (upb_tabent_isempty(e)) return e; + } + UPB_ASSERT(false); + return NULL; } static upb_tabent *getentry_mutable(upb_table *t, uint32_t hash) { @@ -173,7 +180,7 @@ static void insert(upb_table *t, lookupkey_t key, upb_tabkey tabkey, our_e->next = NULL; } else { /* Collision. */ - upb_tabent *new_e = emptyent(t); + upb_tabent *new_e = emptyent(t, mainpos_e); /* Head of collider's chain. */ upb_tabent *chain = getentry_mutable(t, hashfunc(mainpos_e->key)); if (chain == mainpos_e) { @@ -268,10 +275,14 @@ static upb_tabkey strcopy(lookupkey_t k2, upb_alloc *a) { return (uintptr_t)str; } +static uint32_t table_hash(const char *p, size_t n) { + return wyhash(p, n, 0, _wyp); +} + static uint32_t strhash(upb_tabkey key) { uint32_t len; char *str = upb_tabstr(key, &len); - return upb_murmur_hash2(str, len, 0); + return table_hash(str, len); } static bool streql(upb_tabkey k1, lookupkey_t k2) { @@ -280,9 +291,14 @@ static bool streql(upb_tabkey k1, lookupkey_t k2) { return len == k2.str.len && (len == 0 || memcmp(str, k2.str.str, len) == 0); } -bool upb_strtable_init2(upb_strtable *t, upb_ctype_t ctype, upb_alloc *a) { +bool upb_strtable_init2(upb_strtable *t, upb_ctype_t ctype, + size_t expected_size, upb_alloc *a) { UPB_UNUSED(ctype); /* TODO(haberman): rm */ - return init(&t->t, 2, a); + // Multiply by approximate reciprocal of MAX_LOAD (0.85), with pow2 denominator. + size_t need_entries = (expected_size + 1) * 1204 / 1024; + UPB_ASSERT(need_entries >= expected_size * 0.85); + int size_lg2 = _upb_lg2ceil(need_entries); + return init(&t->t, size_lg2, a); } void upb_strtable_clear(upb_strtable *t) { @@ -333,20 +349,20 @@ bool upb_strtable_insert3(upb_strtable *t, const char *k, size_t len, tabkey = strcopy(key, a); if (tabkey == 0) return false; - hash = upb_murmur_hash2(key.str.str, key.str.len, 0); + hash = table_hash(key.str.str, key.str.len); insert(&t->t, key, tabkey, v, hash, &strhash, &streql); return true; } bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len, upb_value *v) { - uint32_t hash = upb_murmur_hash2(key, len, 0); + uint32_t hash = table_hash(key, len); return lookup(&t->t, strkey2(key, len), v, hash, &streql); } bool upb_strtable_remove3(upb_strtable *t, const char *key, size_t len, upb_value *val, upb_alloc *alloc) { - uint32_t hash = upb_murmur_hash2(key, len, 0); + uint32_t hash = table_hash(key, len); upb_tabkey tabkey; if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) { if (alloc) { @@ -699,182 +715,3 @@ bool upb_inttable_iter_isequal(const upb_inttable_iter *i1, return i1->t == i2->t && i1->index == i2->index && i1->array_part == i2->array_part; } - -#if defined(UPB_UNALIGNED_READS_OK) || defined(__s390x__) -/* ----------------------------------------------------------------------------- - * MurmurHash2, by Austin Appleby (released as public domain). - * Reformatted and C99-ified by Joshua Haberman. - * Note - This code makes a few assumptions about how your machine behaves - - * 1. We can read a 4-byte value from any address without crashing - * 2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t - * And it has a few limitations - - * 1. It will not work incrementally. - * 2. It will not produce the same results on little-endian and big-endian - * machines. */ -uint32_t upb_murmur_hash2(const void *key, size_t len, uint32_t seed) { - /* 'm' and 'r' are mixing constants generated offline. - * They're not really 'magic', they just happen to work well. */ - const uint32_t m = 0x5bd1e995; - const int32_t r = 24; - - /* Initialize the hash to a 'random' value */ - uint32_t h = seed ^ len; - - /* Mix 4 bytes at a time into the hash */ - const uint8_t * data = (const uint8_t *)key; - while(len >= 4) { - uint32_t k; - memcpy(&k, data, sizeof(k)); - - k *= m; - k ^= k >> r; - k *= m; - - h *= m; - h ^= k; - - data += 4; - len -= 4; - } - - /* Handle the last few bytes of the input array */ - switch(len) { - case 3: h ^= data[2] << 16; - case 2: h ^= data[1] << 8; - case 1: h ^= data[0]; h *= m; - }; - - /* Do a few final mixes of the hash to ensure the last few - * bytes are well-incorporated. */ - h ^= h >> 13; - h *= m; - h ^= h >> 15; - - return h; -} - -#else /* !UPB_UNALIGNED_READS_OK */ - -/* ----------------------------------------------------------------------------- - * MurmurHashAligned2, by Austin Appleby - * Same algorithm as MurmurHash2, but only does aligned reads - should be safer - * on certain platforms. - * Performance will be lower than MurmurHash2 */ - -#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } - -uint32_t upb_murmur_hash2(const void * key, size_t len, uint32_t seed) { - const uint32_t m = 0x5bd1e995; - const int32_t r = 24; - const uint8_t * data = (const uint8_t *)key; - uint32_t h = (uint32_t)(seed ^ len); - uint8_t align = (uintptr_t)data & 3; - - if(align && (len >= 4)) { - /* Pre-load the temp registers */ - uint32_t t = 0, d = 0; - int32_t sl; - int32_t sr; - - switch(align) { - case 1: t |= data[2] << 16; /* fallthrough */ - case 2: t |= data[1] << 8; /* fallthrough */ - case 3: t |= data[0]; - } - - t <<= (8 * align); - - data += 4-align; - len -= 4-align; - - sl = 8 * (4-align); - sr = 8 * align; - - /* Mix */ - - while(len >= 4) { - uint32_t k; - - d = *(uint32_t *)data; - t = (t >> sr) | (d << sl); - - k = t; - - MIX(h,k,m); - - t = d; - - data += 4; - len -= 4; - } - - /* Handle leftover data in temp registers */ - - d = 0; - - if(len >= align) { - uint32_t k; - - switch(align) { - case 3: d |= data[2] << 16; /* fallthrough */ - case 2: d |= data[1] << 8; /* fallthrough */ - case 1: d |= data[0]; /* fallthrough */ - } - - k = (t >> sr) | (d << sl); - MIX(h,k,m); - - data += align; - len -= align; - - /* ---------- - * Handle tail bytes */ - - switch(len) { - case 3: h ^= data[2] << 16; /* fallthrough */ - case 2: h ^= data[1] << 8; /* fallthrough */ - case 1: h ^= data[0]; h *= m; /* fallthrough */ - }; - } else { - switch(len) { - case 3: d |= data[2] << 16; /* fallthrough */ - case 2: d |= data[1] << 8; /* fallthrough */ - case 1: d |= data[0]; /* fallthrough */ - case 0: h ^= (t >> sr) | (d << sl); h *= m; - } - } - - h ^= h >> 13; - h *= m; - h ^= h >> 15; - - return h; - } else { - while(len >= 4) { - uint32_t k = *(uint32_t *)data; - - MIX(h,k,m); - - data += 4; - len -= 4; - } - - /* ---------- - * Handle tail bytes */ - - switch(len) { - case 3: h ^= data[2] << 16; /* fallthrough */ - case 2: h ^= data[1] << 8; /* fallthrough */ - case 1: h ^= data[0]; h *= m; - }; - - h ^= h >> 13; - h *= m; - h ^= h >> 15; - - return h; - } -} -#undef MIX - -#endif /* UPB_UNALIGNED_READS_OK */ diff --git a/upb/table.int.h b/upb/table.int.h index 600637eef2..ab67cc34d6 100644 --- a/upb/table.int.h +++ b/upb/table.int.h @@ -13,7 +13,7 @@ ** store pointers or integers of at least 32 bits (upb isn't really useful on ** systems where sizeof(void*) < 4). ** -** The table must be homogenous (all values of the same type). In debug +** The table must be homogeneous (all values of the same type). In debug ** mode, we check this on insert and lookup. */ @@ -171,7 +171,8 @@ typedef struct _upb_tabent { typedef struct { size_t count; /* Number of entries in the hash part. */ - size_t mask; /* Mask to turn hash value -> bucket. */ + uint32_t mask; /* Mask to turn hash value -> bucket. */ + uint32_t max_count; /* Max count before we hit our load limit. */ uint8_t size_lg2; /* Size of the hashtable part is 2^size_lg2 entries. */ /* Hash table entries. @@ -230,7 +231,8 @@ UPB_INLINE bool upb_arrhas(upb_tabval key) { /* Initialize and uninitialize a table, respectively. If memory allocation * failed, false is returned that the table is uninitialized. */ bool upb_inttable_init2(upb_inttable *table, upb_ctype_t ctype, upb_alloc *a); -bool upb_strtable_init2(upb_strtable *table, upb_ctype_t ctype, upb_alloc *a); +bool upb_strtable_init2(upb_strtable *table, upb_ctype_t ctype, + size_t expected_size, upb_alloc *a); void upb_inttable_uninit2(upb_inttable *table, upb_alloc *a); void upb_strtable_uninit2(upb_strtable *table, upb_alloc *a); @@ -239,7 +241,7 @@ UPB_INLINE bool upb_inttable_init(upb_inttable *table, upb_ctype_t ctype) { } UPB_INLINE bool upb_strtable_init(upb_strtable *table, upb_ctype_t ctype) { - return upb_strtable_init2(table, ctype, &upb_alloc_global); + return upb_strtable_init2(table, ctype, 4, &upb_alloc_global); } UPB_INLINE void upb_inttable_uninit(upb_inttable *table) { diff --git a/upb/upb.h b/upb/upb.h index 54ca705b8b..7e7309dc45 100644 --- a/upb/upb.h +++ b/upb/upb.h @@ -313,6 +313,17 @@ UPB_INLINE uint64_t _upb_be_swap64(uint64_t val) { } } +UPB_INLINE int _upb_lg2ceil(int x) { + if (x <= 1) return 0; +#ifdef __GNUC__ + return 32 - __builtin_clz(x - 1); +#else + int lg2 = 0; + while (1 << lg2 < x) lg2++; + return lg2; +#endif +} + #include "upb/port_undef.inc" #ifdef __cplusplus