Merge branch 'master' into size-benchmarks

pull/13171/head
Joshua Haberman 4 years ago
commit 7b8ae7ec4e
  1. 20
      .bazelrc
  2. 30
      BUILD
  3. 9
      WORKSPACE
  4. 6
      bazel/build_defs.bzl
  5. 31
      bazel/upb_proto_library.bzl
  6. 6
      benchmarks/BUILD
  7. 29
      benchmarks/BUILD.googleapis
  8. 70
      benchmarks/benchmark.cc
  9. 2
      cmake/BUILD
  10. 5
      cmake/CMakeLists.txt
  11. 7
      cmake/make_cmakelists.py
  12. 4
      cmake/upb/json/parser.c
  13. 1
      examples/bazel/BUILD
  14. 31
      kokoro/ubuntu/build.sh
  15. 1
      tests/BUILD
  16. 5
      tests/bindings/lua/BUILD
  17. 2
      tests/bindings/lua/main.c
  18. 16
      tests/bindings/lua/test_upb.lua
  19. 57
      tests/pb/test_decoder.cc
  20. 25
      tests/test_cpp.cc
  21. 18
      tests/test_generated_code.c
  22. 10
      tests/test_table.cc
  23. 25
      third_party/wyhash/LICENSE
  24. 145
      third_party/wyhash/wyhash.h
  25. 229
      upb/decode.c
  26. 921
      upb/def.c
  27. 1
      upb/def.h
  28. 4
      upb/json/parser.rl
  29. 30
      upb/json/printer.c
  30. 13
      upb/json_decode.c
  31. 10
      upb/json_encode.c
  32. 2
      upb/json_encode.h
  33. 17
      upb/msg.c
  34. 24
      upb/msg.h
  35. 4
      upb/pb/textprinter.c
  36. 4
      upb/pb/varint.int.h
  37. 26
      upb/port.c
  38. 75
      upb/port_def.inc
  39. 7
      upb/port_undef.inc
  40. 25
      upb/reflection.c
  41. 229
      upb/table.c
  42. 10
      upb/table.int.h
  43. 2
      upb/text_encode.c
  44. 4
      upb/upb.c
  45. 11
      upb/upb.h
  46. 8
      upb/upb.hpp

@ -0,0 +1,20 @@
# Use our custom-configured c++ toolchain.
build:m32 --copt=-m32 --linkopt=-m32
build:asan --copt=-fsanitize=address --linkopt=-fsanitize=address
build:valgrind --run_under='valgrind --leak-check=full --error-exitcode=1'
build:ubsan --copt=-fsanitize=undefined --linkopt=-fsanitize=undefined --action_env=UBSAN_OPTIONS=halt_on_error=1:print_stacktrace=1
# Workaround for the fact that Bazel links with $CC, not $CXX
# https://github.com/bazelbuild/bazel/issues/11122#issuecomment-613746748
build:ubsan --copt=-fno-sanitize=function --copt=-fno-sanitize=vptr
build:Werror --copt=-Werror
build:Werror --per_file_copt=json/parser@-Wno-error
build:Werror --per_file_copt=com_google_protobuf@-Wno-error
# GCC's -fanalyzer, a deeper static analysis than normal warnings.
build:analyzer --copt=-fanalyzer --copt=-Werror
build:analyzer --per_file_copt=json/parser@-fno-analyzer
build:analyzer --per_file_copt=com_google_protobuf@-fno-analyzer
build:analyzer --per_file_copt=com_github_google_benchmark@-fno-analyzer

30
BUILD

@ -6,6 +6,7 @@ load(
load(
"//bazel:upb_proto_library.bzl",
"upb_proto_library",
"upb_proto_library_copts",
"upb_proto_reflection_library",
)
@ -35,13 +36,17 @@ config_setting(
constraint_values = ["@bazel_tools//platforms:windows"],
)
upb_proto_library_copts(
name = "upb_proto_library_copts__for_generated_code_only_do_not_use",
copts = UPB_DEFAULT_COPTS,
visibility = ["//visibility:public"],
)
# Public C/C++ libraries #######################################################
cc_library(
name = "port",
srcs = [
"upb/port.c",
],
copts = UPB_DEFAULT_COPTS,
textual_hdrs = [
"upb/port_def.inc",
"upb/port_undef.inc",
@ -60,6 +65,7 @@ cc_library(
"upb/table.int.h",
"upb/upb.c",
"upb/upb.int.h",
"third_party/wyhash/wyhash.h",
],
hdrs = [
"upb/decode.h",
@ -250,6 +256,7 @@ genrule(
outs = ["upb/json/parser.c"],
cmd = "$(location @ragel//:ragelc) -C -o upb/json/parser.c $< && mv upb/json/parser.c $@",
tools = ["@ragel//:ragelc"],
visibility = ["//cmake:__pkg__"],
)
# Amalgamation #################################################################
@ -279,7 +286,10 @@ upb_amalgamation(
cc_library(
name = "amalgamation",
srcs = ["upb.c"],
srcs = [
"upb.c",
"third_party/wyhash/wyhash.h",
],
hdrs = ["upb.h"],
copts = UPB_DEFAULT_COPTS,
)
@ -304,9 +314,13 @@ upb_amalgamation(
cc_library(
name = "php_amalgamation",
srcs = ["php-upb.c"],
srcs = [
"php-upb.c",
"third_party/wyhash/wyhash.h",
],
hdrs = ["php-upb.h"],
copts = UPB_DEFAULT_COPTS,
)
upb_amalgamation(
@ -328,7 +342,10 @@ upb_amalgamation(
cc_library(
name = "ruby_amalgamation",
srcs = ["ruby-upb.c"],
srcs = [
"ruby-upb.c",
"third_party/wyhash/wyhash.h",
],
hdrs = ["ruby-upb.h"],
copts = UPB_DEFAULT_COPTS,
)
@ -360,6 +377,7 @@ filegroup(
"upbc/**/*",
"upb/**/*",
"tests/**/*",
"third_party/**/*",
]),
visibility = ["//cmake:__pkg__"],
)

@ -1,6 +1,7 @@
workspace(name = "upb")
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
load("@bazel_tools//tools/build_defs/repo:git.bzl", "new_git_repository")
load("//bazel:workspace_deps.bzl", "upb_deps")
upb_deps()
@ -37,3 +38,11 @@ http_archive(
strip_prefix = "benchmark-16703ff83c1ae6d53e5155df3bb3ab0bc96083be",
sha256 = "59f918c8ccd4d74b6ac43484467b500f1d64b40cc1010daa055375b322a43ba3",
)
new_git_repository(
name = "com_google_googleapis",
remote = "https://github.com/googleapis/googleapis.git",
branch = "master",
build_file = "//benchmarks:BUILD.googleapis",
patch_cmds = ["find google -type f -name BUILD.bazel -delete"],
)

@ -18,9 +18,15 @@ UPB_DEFAULT_COPTS = select({
"//:windows": [],
"//conditions:default": [
# copybara:strip_for_google3_begin
"-std=c99",
"-pedantic",
"-Werror=pedantic",
"-Wall",
"-Wstrict-prototypes",
# GCC (at least) emits spurious warnings for this that cannot be fixed
# without introducing redundant initialization (with runtime cost):
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80635
#"-Wno-maybe-uninitialized",
# copybara:strip_end
],
})

@ -52,7 +52,7 @@ def _filter_none(elems):
out.append(elem)
return out
def _cc_library_func(ctx, name, hdrs, srcs, dep_ccinfos):
def _cc_library_func(ctx, name, hdrs, srcs, copts, dep_ccinfos):
"""Like cc_library(), but callable from rules.
Args:
@ -88,6 +88,7 @@ def _cc_library_func(ctx, name, hdrs, srcs, dep_ccinfos):
name = name,
srcs = srcs,
public_hdrs = hdrs,
user_compile_flags = copts,
compilation_contexts = compilation_contexts,
**blaze_only_args
)
@ -106,6 +107,22 @@ def _cc_library_func(ctx, name, hdrs, srcs, dep_ccinfos):
linking_context = linking_context,
)
# Dummy rule to expose select() copts to aspects ##############################
_UpbProtoLibraryCopts = provider(
fields = {
"copts": "copts for upb_proto_library()",
},
)
def upb_proto_library_copts_impl(ctx):
return _UpbProtoLibraryCopts(copts = ctx.attr.copts)
upb_proto_library_copts = rule(
implementation = upb_proto_library_copts_impl,
attrs = {"copts": attr.string_list(default = [])},
)
# upb_proto_library / upb_proto_reflection_library shared code #################
GeneratedSrcsInfo = provider(
@ -166,10 +183,7 @@ def _upb_proto_rule_impl(ctx):
fail("proto_library rule must generate _UpbWrappedCcInfo or " +
"_UpbDefsWrappedCcInfo (aspect should have handled this).")
if type(cc_info.linking_context.libraries_to_link) == "list":
lib = cc_info.linking_context.libraries_to_link[0]
else:
lib = cc_info.linking_context.libraries_to_link.to_list()[0]
lib = cc_info.linking_context.linker_inputs.to_list()[0].libraries[0]
files = _filter_none([
lib.static_library,
lib.pic_static_library,
@ -199,6 +213,7 @@ def _upb_proto_aspect_impl(target, ctx, cc_provider, file_provider):
name = ctx.rule.attr.name + ctx.attr._ext,
hdrs = files.hdrs,
srcs = files.srcs,
copts = ctx.attr._copts[_UpbProtoLibraryCopts].copts,
dep_ccinfos = dep_ccinfos,
)
return [cc_provider(cc_info = cc_info), file_provider(srcs = files)]
@ -222,6 +237,9 @@ def _maybe_add(d):
_upb_proto_library_aspect = aspect(
attrs = _maybe_add({
"_copts": attr.label(
default = "//:upb_proto_library_copts__for_generated_code_only_do_not_use",
),
"_upbc": attr.label(
executable = True,
cfg = "host",
@ -267,6 +285,9 @@ upb_proto_library = rule(
_upb_proto_reflection_library_aspect = aspect(
attrs = _maybe_add({
"_copts": attr.label(
default = "//:upb_proto_library_copts__for_generated_code_only_do_not_use",
),
"_upbc": attr.label(
executable = True,
cfg = "host",

@ -21,6 +21,11 @@ upb_proto_reflection_library(
deps = [":benchmark_descriptor_proto"],
)
upb_proto_reflection_library(
name = "ads_upb_proto_reflection",
deps = ["@com_google_googleapis//:ads_proto"],
)
cc_proto_library(
name = "benchmark_descriptor_cc_proto",
deps = [":benchmark_descriptor_proto"],
@ -45,6 +50,7 @@ cc_binary(
":benchmark_descriptor_sv_cc_proto",
":benchmark_descriptor_upb_proto",
":benchmark_descriptor_upb_proto_reflection",
":ads_upb_proto_reflection",
"//:descriptor_upb_proto",
"//:reflection",
"@com_github_google_benchmark//:benchmark_main",

@ -0,0 +1,29 @@
load(
"@rules_proto//proto:defs.bzl",
"proto_library",
)
proto_library(
name = "ads_proto",
srcs = glob([
"google/ads/googleads/v5/**/*.proto",
"google/api/**/*.proto",
"google/rpc/**/*.proto",
"google/longrunning/**/*.proto",
"google/logging/**/*.proto",
]),
#srcs = ["google/ads/googleads/v5/services/google_ads_service.proto"],
visibility = ["//visibility:public"],
deps = [
"@com_google_protobuf//:any_proto",
"@com_google_protobuf//:empty_proto",
"@com_google_protobuf//:descriptor_proto",
"@com_google_protobuf//:field_mask_proto",
"@com_google_protobuf//:duration_proto",
"@com_google_protobuf//:timestamp_proto",
"@com_google_protobuf//:struct_proto",
"@com_google_protobuf//:api_proto",
"@com_google_protobuf//:type_proto",
"@com_google_protobuf//:wrappers_proto",
],
)

@ -9,9 +9,10 @@
#include "benchmarks/descriptor_sv.pb.h"
// For for benchmarks of building descriptors.
#include "google/protobuf/descriptor.upb.h"
#include "google/ads/googleads/v5/services/google_ads_service.upbdefs.h"
#include "google/protobuf/descriptor.pb.h"
#include "google/protobuf/descriptor.upb.h"
#include "google/protobuf/descriptor.upbdefs.h"
#include "upb/def.hpp"
upb_strview descriptor = benchmarks_descriptor_proto_upbdefinit.descriptor;
@ -20,6 +21,16 @@ namespace protobuf = ::google::protobuf;
/* A buffer big enough to parse descriptor.proto without going to heap. */
char buf[65535];
void CollectFileDescriptors(const upb_def_init* file,
std::vector<upb_strview>& serialized_files,
std::unordered_set<const upb_def_init*>& seen) {
if (!seen.insert(file).second) return;
for (upb_def_init **deps = file->deps; *deps; deps++) {
CollectFileDescriptors(*deps, serialized_files, seen);
}
serialized_files.push_back(file->descriptor);
}
static void BM_ArenaOneAlloc(benchmark::State& state) {
for (auto _ : state) {
upb_arena* arena = upb_arena_new();
@ -39,22 +50,28 @@ static void BM_ArenaInitialBlockOneAlloc(benchmark::State& state) {
BENCHMARK(BM_ArenaInitialBlockOneAlloc);
static void BM_LoadDescriptor_Upb(benchmark::State& state) {
size_t bytes_per_iter = 0;
for (auto _ : state) {
upb::SymbolTable symtab;
upb::Arena arena;
google_protobuf_FileDescriptorProto* file_proto =
google_protobuf_FileDescriptorProto_parse(descriptor.data,
descriptor.size, arena.ptr());
upb::FileDefPtr file_def = symtab.AddFile(file_proto, NULL);
if (!file_def) {
printf("Failed to add file.\n");
exit(1);
}
google_protobuf_DescriptorProto_getmsgdef(symtab.ptr());
bytes_per_iter = _upb_symtab_bytesloaded(symtab.ptr());
}
state.SetBytesProcessed(state.iterations() * descriptor.size);
state.SetBytesProcessed(state.iterations() * bytes_per_iter);
}
BENCHMARK(BM_LoadDescriptor_Upb);
static void BM_LoadAdsDescriptor_Upb(benchmark::State& state) {
size_t bytes_per_iter = 0;
for (auto _ : state) {
upb::SymbolTable symtab;
google_ads_googleads_v5_services_SearchGoogleAdsRequest_getmsgdef(
symtab.ptr());
bytes_per_iter = _upb_symtab_bytesloaded(symtab.ptr());
}
state.SetBytesProcessed(state.iterations() * bytes_per_iter);
}
BENCHMARK(BM_LoadAdsDescriptor_Upb);
static void BM_LoadDescriptor_Proto2(benchmark::State& state) {
for (auto _ : state) {
protobuf::Arena arena;
@ -73,6 +90,35 @@ static void BM_LoadDescriptor_Proto2(benchmark::State& state) {
}
BENCHMARK(BM_LoadDescriptor_Proto2);
static void BM_LoadAdsDescriptor_Proto2(benchmark::State& state) {
extern upb_def_init google_ads_googleads_v5_services_google_ads_service_proto_upbdefinit;
std::vector<upb_strview> serialized_files;
std::unordered_set<const upb_def_init*> seen_files;
CollectFileDescriptors(
&google_ads_googleads_v5_services_google_ads_service_proto_upbdefinit,
serialized_files, seen_files);
size_t bytes_per_iter = 0;
for (auto _ : state) {
bytes_per_iter = 0;
protobuf::Arena arena;
protobuf::DescriptorPool pool;
for (auto file : serialized_files) {
protobuf::StringPiece input(file.data, file.size);
auto proto = protobuf::Arena::CreateMessage<protobuf::FileDescriptorProto>(
&arena);
bool ok = proto->ParseFrom<protobuf::MessageLite::kMergePartial>(input) &&
pool.BuildFile(*proto) != nullptr;
if (!ok) {
printf("Failed to add file.\n");
exit(1);
}
bytes_per_iter += input.size();
}
}
state.SetBytesProcessed(state.iterations() * bytes_per_iter);
}
BENCHMARK(BM_LoadAdsDescriptor_Proto2);
static void BM_Parse_Upb_FileDesc_WithArena(benchmark::State& state) {
size_t bytes = 0;
for (auto _ : state) {

@ -37,7 +37,7 @@ genrule(
genrule(
name = "copy_json_ragel",
srcs = ["upb/json/parser.c"],
srcs = ["//:upb/json/parser.c"],
outs = ["generated-in/upb/json/parser.c"],
cmd = "cp $< $@",
)

@ -12,6 +12,7 @@ cmake_minimum_required (VERSION 3.0)
cmake_policy(SET CMP0048 NEW)
project(upb)
set(CMAKE_C_STANDARD 99)
# Prevent CMake from setting -rdynamic on Linux (!!).
@ -60,8 +61,7 @@ endif()
enable_testing()
add_library(port
../upb/port.c)
add_library(port INTERFACE)
add_library(upb
../upb/decode.c
../upb/encode.c
@ -71,6 +71,7 @@ add_library(upb
../upb/table.int.h
../upb/upb.c
../upb/upb.int.h
../third_party/wyhash/wyhash.h
../upb/decode.h
../upb/encode.h
../upb/upb.h

@ -129,6 +129,9 @@ class BuildFileFunctions(object):
def upb_proto_library(self, **kwargs):
pass
def upb_proto_library_copts(self, **kwargs):
pass
def upb_proto_reflection_library(self, **kwargs):
pass
@ -166,6 +169,7 @@ class WorkspaceFileFunctions(object):
def workspace(self, **kwargs):
self.converter.prelude += "project(%s)\n" % (kwargs["name"])
self.converter.prelude += "set(CMAKE_C_STANDARD 99)\n"
def http_archive(self, **kwargs):
pass
@ -173,6 +177,9 @@ class WorkspaceFileFunctions(object):
def git_repository(self, **kwargs):
pass
def new_git_repository(self, **kwargs):
pass
def bazel_version_repository(self, **kwargs):
pass

@ -953,7 +953,7 @@ static bool parse_number_from_buffer(upb_json_parser *p, const char *buf,
upb_fieldtype_t type = upb_fielddef_type(p->top->f);
double val;
double dummy;
double inf = UPB_INFINITY;
double inf = INFINITY;
errno = 0;
@ -3306,7 +3306,7 @@ static upb_json_parsermethod *parsermethod_new(upb_json_codecache *c,
upb_byteshandler_setstring(&m->input_handler_, parse, m);
upb_byteshandler_setendstr(&m->input_handler_, end, m);
upb_strtable_init2(&m->name_table, UPB_CTYPE_CONSTPTR, alloc);
upb_strtable_init2(&m->name_table, UPB_CTYPE_CONSTPTR, 4, alloc);
/* Build name_table */

@ -17,4 +17,5 @@ cc_binary(
name = "test_binary",
srcs = ["test_binary.c"],
deps = [":foo_upbproto"],
copts = ["-std=c99"],
)

@ -11,29 +11,36 @@ fi
echo PATH=$PATH
ls -l `which cmake`
cmake --version
echo CC=${CC:-cc}
${CC:-cc} --version
# Log the bazel path and version.
which bazel
bazel version
cd $(dirname $0)/../..
bazel test --test_output=errors ...
if [[ $(uname) = "Linux" ]]; then
# Verify the ASAN build. Have to exclude test_conformance_upb as protobuf
# currently leaks memory in the conformance test runner.
bazel test --copt=-fsanitize=address --linkopt=-fsanitize=address --test_output=errors ...
if which gcc; then
gcc --version
CC=gcc bazel test --test_output=errors ...
CC=gcc bazel test -c opt --test_output=errors ...
# TODO: work through these errors and enable this.
# if gcc -fanalyzer -x c /dev/null -c -o /dev/null; then
# CC=gcc bazel test --copt=-fanalyzer --test_output=errors ...
# fi
fi
# Verify the UBSan build. Have to exclude Lua as the version we are using
# fails some UBSan tests.
if which clang; then
CC=clang bazel test --test_output=errors ...
CC=clang bazel test --test_output=errors -c opt ...
# For some reason kokoro doesn't have Clang available right now.
#CC=clang CXX=clang++ bazel test -c dbg --copt=-fsanitize=undefined --copt=-fno-sanitize=function,vptr --linkopt=-fsanitize=undefined --action_env=UBSAN_OPTIONS=halt_on_error=1:print_stacktrace=1 -- :all -:test_lua
if [[ $(uname) = "Linux" ]]; then
CC=clang bazel test --test_output=errors --config=m32 ...
CC=clang bazel test --test_output=errors --config=asan ...
# TODO: update to a newer Lua that hopefully does not trigger UBSAN.
CC=clang bazel test --test_output=errors --config=ubsan ... -- -tests/bindings/lua:test_lua
fi
fi
if which valgrind; then
bazel test --run_under='valgrind --leak-check=full --error-exitcode=1' ... -- -tests:test_conformance_upb -cmake:cmake_build
bazel test --config=valgrind ... -- -tests:test_conformance_upb -cmake:cmake_build
fi

@ -50,6 +50,7 @@ upb_proto_library(
cc_test(
name = "test_generated_code",
srcs = ["test_generated_code.c"],
copts = UPB_DEFAULT_COPTS,
deps = [
":empty_upbdefs_proto",
":test_messages_proto3_proto_upb",

@ -2,6 +2,10 @@ load(
"//upb/bindings/lua:lua_proto_library.bzl",
"lua_proto_library",
)
load(
"//bazel:build_defs.bzl",
"UPB_DEFAULT_COPTS",
)
licenses(["notice"])
@ -20,6 +24,7 @@ cc_test(
"@com_google_protobuf//:conformance_proto",
"@com_google_protobuf//:descriptor_proto",
],
copts = UPB_DEFAULT_COPTS,
linkstatic = 1,
deps = [
"//upb/bindings/lua:lupb",

@ -34,7 +34,7 @@ const char *init =
"upb/bindings/lua/?.lua"
"'";
int main() {
int main(int argc, char **argv) {
int ret = 0;
L = luaL_newstate();
luaL_openlibs(L);

@ -104,7 +104,7 @@ function test_utf8()
upb.decode(test_messages_proto3.TestAllTypesProto3, serialized)
end)
-- TOOD(haberman): should proto3 accessors also check UTF-8 at set time?
-- TODO(haberman): should proto3 accessors also check UTF-8 at set time?
end
function test_string_double_map()
@ -512,6 +512,20 @@ function test_foo()
assert_equal(set.file[1].name, "google/protobuf/descriptor.proto")
end
function test_descriptor_error()
local symtab = upb.SymbolTable()
local file = descriptor.FileDescriptorProto()
file.name = "test.proto"
file.message_type[1] = descriptor.DescriptorProto{
name = "ABC"
}
file.message_type[2] = descriptor.DescriptorProto{
name = "BC."
}
assert_error(function () symtab:add_file(upb.encode(file)) end)
assert_nil(symtab:lookup_msg("ABC"))
end
function test_gc()
local top = test_messages_proto3.TestAllTypesProto3()
local n = 100

@ -52,17 +52,6 @@
#define PRINT_FAILURE(expr) \
fprintf(stderr, "Assertion failed: %s:%d\n", __FILE__, __LINE__); \
fprintf(stderr, "expr: %s\n", #expr); \
if (testhash) { \
fprintf(stderr, "assertion failed running test %x.\n", testhash); \
if (!filter_hash) { \
fprintf(stderr, \
"Run with the arg %x to run only this test. " \
"(This will also turn on extra debugging output)\n", \
testhash); \
} \
fprintf(stderr, "Failed at %02.2f%% through tests.\n", \
(float)completed * 100 / total); \
}
#define MAX_NESTING 64
@ -113,7 +102,7 @@ using std::string;
void vappendf(string* str, const char *format, va_list args) {
va_list copy;
_upb_va_copy(copy, args);
va_copy(copy, args);
int count = vsnprintf(NULL, 0, format, args);
if (count >= 0)
@ -467,17 +456,6 @@ upb::pb::DecoderPtr CreateDecoder(upb::Arena* arena,
return ret;
}
uint32_t Hash(const string& proto, const string* expected_output, size_t seam1,
size_t seam2, bool may_skip) {
uint32_t hash = upb_murmur_hash2(proto.c_str(), proto.size(), 0);
if (expected_output)
hash = upb_murmur_hash2(expected_output->c_str(), expected_output->size(), hash);
hash = upb_murmur_hash2(&seam1, sizeof(seam1), hash);
hash = upb_murmur_hash2(&seam2, sizeof(seam2), hash);
hash = upb_murmur_hash2(&may_skip, sizeof(may_skip), hash);
return hash;
}
void CheckBytesParsed(upb::pb::DecoderPtr decoder, size_t ofs) {
// We can't have parsed more data than the decoder callback is telling us it
// parsed.
@ -506,13 +484,11 @@ void do_run_decoder(VerboseParserEnvironment* env, upb::pb::DecoderPtr decoder,
env->Reset(proto.c_str(), proto.size(), may_skip, expected_output == NULL);
decoder.Reset();
testhash = Hash(proto, expected_output, i, j, may_skip);
if (filter_hash && testhash != filter_hash) return;
if (test_mode != COUNT_ONLY) {
output.clear();
if (filter_hash) {
fprintf(stderr, "RUNNING TEST CASE, hash=%x\n", testhash);
fprintf(stderr, "RUNNING TEST CASE\n");
fprintf(stderr, "Input (len=%u): ", (unsigned)proto.size());
PrintBinary(proto);
fprintf(stderr, "\n");
@ -571,7 +547,6 @@ void run_decoder(const string& proto, const string* expected_output) {
}
}
}
testhash = 0;
}
const static string thirty_byte_nop = cat(
@ -871,23 +846,17 @@ void test_valid() {
// Empty protobuf where we never call PutString between
// StartString/EndString.
// Randomly generated hash for this test, hope it doesn't conflict with others
// by chance.
const uint32_t emptyhash = 0x5709be8e;
if (!filter_hash || filter_hash == testhash) {
testhash = emptyhash;
upb::Status status;
upb::Arena arena;
upb::Sink sink(global_handlers, &closures[0]);
upb::pb::DecoderPtr decoder =
CreateDecoder(&arena, global_method, sink, &status);
output.clear();
bool ok = upb::PutBuffer(std::string(), decoder.input());
ASSERT(ok);
ASSERT(status.ok());
if (test_mode == ALL_HANDLERS) {
ASSERT(output == string("<\n>\n"));
}
upb::Status status;
upb::Arena arena;
upb::Sink sink(global_handlers, &closures[0]);
upb::pb::DecoderPtr decoder =
CreateDecoder(&arena, global_method, sink, &status);
output.clear();
bool ok = upb::PutBuffer(std::string(), decoder.input());
ASSERT(ok);
ASSERT(status.ok());
if (test_mode == ALL_HANDLERS) {
ASSERT(output == string("<\n>\n"));
}
test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_DOUBLE,

@ -952,6 +952,31 @@ void TestArena() {
}
}
void TestInlinedArena() {
int n = 100000;
struct Decrementer {
Decrementer(int* _p) : p(_p) {}
~Decrementer() { (*p)--; }
int* p;
};
{
upb::InlinedArena<1024> arena;
for (int i = 0; i < n; i++) {
arena.Own(new Decrementer(&n));
// Intersperse allocation and ensure we can write to it.
int* val = static_cast<int*>(upb_arena_malloc(arena.ptr(), sizeof(int)));
*val = i;
}
// Test a large allocation.
upb_arena_malloc(arena.ptr(), 1000000);
}
ASSERT(n == 0);
}
extern "C" {
int run_tests() {

@ -24,7 +24,7 @@ const int32_t test_int32_2 = -20;
const int32_t test_int32_3 = 30;
const int32_t test_int32_4 = -40;
static void test_scalars() {
static void test_scalars(void) {
upb_arena *arena = upb_arena_new();
protobuf_test_messages_proto3_TestAllTypesProto3 *msg =
protobuf_test_messages_proto3_TestAllTypesProto3_new(arena);
@ -56,9 +56,9 @@ static void test_scalars() {
ASSERT(protobuf_test_messages_proto3_TestAllTypesProto3_optional_uint64(
msg2) == 40);
ASSERT(protobuf_test_messages_proto3_TestAllTypesProto3_optional_float(
msg2) == 50.5);
msg2) - 50.5 < 0.01);
ASSERT(protobuf_test_messages_proto3_TestAllTypesProto3_optional_double(
msg2) == 60.6);
msg2) - 60.6 < 0.01);
ASSERT(protobuf_test_messages_proto3_TestAllTypesProto3_optional_bool(
msg2) == 1);
ASSERT(upb_strview_eql(
@ -117,7 +117,7 @@ static void check_string_map_one_entry(
ASSERT(!const_ent);
}
static void test_string_double_map() {
static void test_string_double_map(void) {
upb_arena *arena = upb_arena_new();
upb_strview serialized;
upb_test_MapTest *msg = upb_test_MapTest_new(arena);
@ -141,7 +141,7 @@ static void test_string_double_map() {
upb_arena_free(arena);
}
static void test_string_map() {
static void test_string_map(void) {
upb_arena *arena = upb_arena_new();
protobuf_test_messages_proto3_TestAllTypesProto3 *msg =
protobuf_test_messages_proto3_TestAllTypesProto3_new(arena);
@ -259,7 +259,7 @@ static void check_int32_map_one_entry(
ASSERT(!const_ent);
}
static void test_int32_map() {
static void test_int32_map(void) {
upb_arena *arena = upb_arena_new();
protobuf_test_messages_proto3_TestAllTypesProto3 *msg =
protobuf_test_messages_proto3_TestAllTypesProto3_new(arena);
@ -328,7 +328,7 @@ static void test_int32_map() {
upb_arena_free(arena);
}
void test_repeated() {
void test_repeated(void) {
upb_arena *arena = upb_arena_new();
protobuf_test_messages_proto3_TestAllTypesProto3 *msg =
protobuf_test_messages_proto3_TestAllTypesProto3_new(arena);
@ -347,7 +347,7 @@ void test_repeated() {
upb_arena_free(arena);
}
void test_null_decode_buf() {
void test_null_decode_buf(void) {
upb_arena *arena = upb_arena_new();
protobuf_test_messages_proto3_TestAllTypesProto3 *msg =
protobuf_test_messages_proto3_TestAllTypesProto3_parse(NULL, 0, arena);
@ -359,7 +359,7 @@ void test_null_decode_buf() {
upb_arena_free(arena);
}
void test_status_truncation() {
void test_status_truncation(void) {
int i, j;
upb_status status;
upb_status status2;

@ -618,6 +618,16 @@ void test_delete() {
upb_inttable_uninit(&t);
}
void test_init() {
for (int i = 0; i < 2048; i++) {
/* Tests that the size calculations in init() (lg2 size for target load)
* work for all expected sizes. */
upb_strtable t;
upb_strtable_init2(&t, UPB_CTYPE_BOOL, i, &upb_alloc_global);
upb_strtable_uninit(&t);
}
}
extern "C" {
int run_tests(int argc, char *argv[]) {

@ -0,0 +1,25 @@
This is free and unencumbered software released into the public domain.
Anyone is free to copy, modify, publish, use, compile, sell, or
distribute this software, either in source code form or as a compiled
binary, for any purpose, commercial or non-commercial, and by any
means.
In jurisdictions that recognize copyright laws, the author or authors
of this software dedicate any and all copyright interest in the
software to the public domain. We make this dedication for the benefit
of the public at large and to the detriment of our heirs and
successors. We intend this dedication to be an overt act of
relinquishment in perpetuity of all present and future rights to this
software under copyright law.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
For more information, please refer to <http://unlicense.org/>

@ -0,0 +1,145 @@
/* Copyright 2020 王一 Wang Yi <godspeed_china@yeah.net>
This is free and unencumbered software released into the public domain. http://unlicense.org/
See github.com/wangyi-fudan/wyhash/ LICENSE
*/
#ifndef wyhash_final_version
#define wyhash_final_version
//defines that change behavior
#ifndef WYHASH_CONDOM
#define WYHASH_CONDOM 1 //0: read 8 bytes before and after boundaries, dangerous but fastest. 1: normal valid behavior 2: extra protection against entropy loss (probability=2^-63), aka. "blind multiplication"
#endif
#define WYHASH_32BIT_MUM 0 //faster on 32 bit system
//includes
#include <stdint.h>
#include <string.h>
#if defined(_MSC_VER) && defined(_M_X64)
#include <intrin.h>
#pragma intrinsic(_umul128)
#endif
#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
#define _likely_(x) __builtin_expect(x,1)
#define _unlikely_(x) __builtin_expect(x,0)
#else
#define _likely_(x) (x)
#define _unlikely_(x) (x)
#endif
//mum function
static inline uint64_t _wyrot(uint64_t x) { return (x>>32)|(x<<32); }
static inline void _wymum(uint64_t *A, uint64_t *B){
#if(WYHASH_32BIT_MUM)
uint64_t hh=(*A>>32)*(*B>>32), hl=(*A>>32)*(unsigned)*B, lh=(unsigned)*A*(*B>>32), ll=(uint64_t)(unsigned)*A*(unsigned)*B;
#if(WYHASH_CONDOM>1)
*A^=_wyrot(hl)^hh; *B^=_wyrot(lh)^ll;
#else
*A=_wyrot(hl)^hh; *B=_wyrot(lh)^ll;
#endif
#elif defined(__SIZEOF_INT128__)
__uint128_t r=*A; r*=*B;
#if(WYHASH_CONDOM>1)
*A^=(uint64_t)r; *B^=(uint64_t)(r>>64);
#else
*A=(uint64_t)r; *B=(uint64_t)(r>>64);
#endif
#elif defined(_MSC_VER) && defined(_M_X64)
#if(WYHASH_CONDOM>1)
uint64_t a, b;
a=_umul128(*A,*B,&b);
*A^=a; *B^=b;
#else
*A=_umul128(*A,*B,B);
#endif
#else
uint64_t ha=*A>>32, hb=*B>>32, la=(uint32_t)*A, lb=(uint32_t)*B, hi, lo;
uint64_t rh=ha*hb, rm0=ha*lb, rm1=hb*la, rl=la*lb, t=rl+(rm0<<32), c=t<rl;
lo=t+(rm1<<32); c+=lo<t; hi=rh+(rm0>>32)+(rm1>>32)+c;
#if(WYHASH_CONDOM>1)
*A^=lo; *B^=hi;
#else
*A=lo; *B=hi;
#endif
#endif
}
static inline uint64_t _wymix(uint64_t A, uint64_t B){ _wymum(&A,&B); return A^B; }
//read functions
#ifndef WYHASH_LITTLE_ENDIAN
#if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
#define WYHASH_LITTLE_ENDIAN 1
#elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
#define WYHASH_LITTLE_ENDIAN 0
#endif
#endif
#if (WYHASH_LITTLE_ENDIAN)
static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return v;}
static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return v;}
#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return __builtin_bswap64(v);}
static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return __builtin_bswap32(v);}
#elif defined(_MSC_VER)
static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return _byteswap_uint64(v);}
static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return _byteswap_ulong(v);}
#endif
static inline uint64_t _wyr3(const uint8_t *p, unsigned k) { return (((uint64_t)p[0])<<16)|(((uint64_t)p[k>>1])<<8)|p[k-1];}
//wyhash function
static inline uint64_t _wyfinish16(const uint8_t *p, uint64_t len, uint64_t seed, const uint64_t *secret, uint64_t i){
#if(WYHASH_CONDOM>0)
uint64_t a, b;
if(_likely_(i<=8)){
if(_likely_(i>=4)){ a=_wyr4(p); b=_wyr4(p+i-4); }
else if (_likely_(i)){ a=_wyr3(p,i); b=0; }
else a=b=0;
}
else{ a=_wyr8(p); b=_wyr8(p+i-8); }
return _wymix(secret[1]^len,_wymix(a^secret[1], b^seed));
#else
#define oneshot_shift ((i<8)*((8-i)<<3))
return _wymix(secret[1]^len,_wymix((_wyr8(p)<<oneshot_shift)^secret[1],(_wyr8(p+i-8)>>oneshot_shift)^seed));
#endif
}
static inline uint64_t _wyfinish(const uint8_t *p, uint64_t len, uint64_t seed, const uint64_t *secret, uint64_t i){
if(_likely_(i<=16)) return _wyfinish16(p,len,seed,secret,i);
return _wyfinish(p+16,len,_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed),secret,i-16);
}
static inline uint64_t wyhash(const void *key, uint64_t len, uint64_t seed, const uint64_t *secret){
const uint8_t *p=(const uint8_t *)key;
uint64_t i=len; seed^=*secret;
if(_unlikely_(i>64)){
uint64_t see1=seed;
do{
seed=_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed)^_wymix(_wyr8(p+16)^secret[2],_wyr8(p+24)^seed);
see1=_wymix(_wyr8(p+32)^secret[3],_wyr8(p+40)^see1)^_wymix(_wyr8(p+48)^secret[4],_wyr8(p+56)^see1);
p+=64; i-=64;
}while(i>64);
seed^=see1;
}
return _wyfinish(p,len,seed,secret,i);
}
//utility functions
const uint64_t _wyp[5] = {0xa0761d6478bd642full, 0xe7037ed1a0b428dbull, 0x8ebc6af09c88c6e3ull, 0x589965cc75374cc3ull, 0x1d8e4e27c47d124full};
static inline uint64_t wyhash64(uint64_t A, uint64_t B){ A^=_wyp[0]; B^=_wyp[1]; _wymum(&A,&B); return _wymix(A^_wyp[0],B^_wyp[1]);}
static inline uint64_t wyrand(uint64_t *seed){ *seed+=_wyp[0]; return _wymix(*seed,*seed^_wyp[1]);}
static inline double wy2u01(uint64_t r){ const double _wynorm=1.0/(1ull<<52); return (r>>12)*_wynorm;}
static inline double wy2gau(uint64_t r){ const double _wynorm=1.0/(1ull<<20); return ((r&0x1fffff)+((r>>21)&0x1fffff)+((r>>42)&0x1fffff))*_wynorm-3.0;}
static inline uint64_t wy2u0k(uint64_t r, uint64_t k){ _wymum(&r,&k); return k; }
static inline void make_secret(uint64_t seed, uint64_t *secret){
uint8_t c[] = {15, 23, 27, 29, 30, 39, 43, 45, 46, 51, 53, 54, 57, 58, 60, 71, 75, 77, 78, 83, 85, 86, 89, 90, 92, 99, 101, 102, 105, 106, 108, 113, 114, 116, 120, 135, 139, 141, 142, 147, 149, 150, 153, 154, 156, 163, 165, 166, 169, 170, 172, 177, 178, 180, 184, 195, 197, 198, 201, 202, 204, 209, 210, 212, 216, 225, 226, 228, 232, 240 };
for(size_t i=0;i<5;i++){
uint8_t ok;
do{
ok=1; secret[i]=0;
for(size_t j=0;j<64;j+=8) secret[i]|=((uint64_t)c[wyrand(&seed)%sizeof(c)])<<j;
if(secret[i]%2==0){ ok=0; continue; }
for(size_t j=0;j<i;j++)
#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
if(__builtin_popcountll(secret[j]^secret[i])!=32){ ok=0; break; }
#elif defined(_MSC_VER) && defined(_M_X64)
if(_mm_popcnt_u64(secret[j]^secret[i])!=32){ ok=0; break; }
#endif
if(!ok)continue;
for(uint64_t j=3;j<0x100000000ull;j+=2) if(secret[i]%j==0){ ok=0; break; }
}while(!ok);
}
}
#endif

@ -139,10 +139,14 @@ static const int8_t delim_ops[37] = {
/* Data pertaining to the parse. */
typedef struct {
const char *limit; /* End of delimited region or end of buffer. */
upb_arena arena;
const char *end; /* Can read up to 16 bytes slop beyond this. */
const char *limit_ptr; /* = end + UPB_MIN(limit, 0) */
int limit; /* Submessage limit relative to end. */
int depth;
uint32_t end_group; /* Set to field number of END_GROUP tag, if any. */
bool alias;
char patch[32];
upb_arena arena;
jmp_buf err;
} upb_decstate;
@ -150,7 +154,7 @@ typedef union {
bool bool_val;
uint32_t uint32_val;
uint64_t uint64_val;
upb_strview str_val;
uint32_t size;
} wireval;
static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
@ -200,41 +204,48 @@ static bool decode_reserve(upb_decstate *d, upb_array *arr, size_t elem) {
return need_realloc;
}
typedef struct {
const char *ptr;
uint64_t val;
} decode_vret;
UPB_NOINLINE
static const char *decode_longvarint64(upb_decstate *d, const char *ptr,
const char *limit, uint64_t *val) {
uint8_t byte;
int bitpos = 0;
uint64_t out = 0;
do {
if (bitpos >= 70 || ptr == limit) decode_err(d);
byte = *ptr;
out |= (uint64_t)(byte & 0x7F) << bitpos;
ptr++;
bitpos += 7;
} while (byte & 0x80);
*val = out;
return ptr;
static decode_vret decode_longvarint64(const char *ptr, uint64_t val) {
decode_vret ret = {NULL, 0};
uint64_t byte;
int i;
for (i = 1; i < 10; i++) {
byte = (uint8_t)ptr[i];
val += (byte - 1) << (i * 7);
if (!(byte & 0x80)) {
ret.ptr = ptr + i + 1;
ret.val = val;
return ret;
}
}
return ret;
}
UPB_FORCEINLINE
static const char *decode_varint64(upb_decstate *d, const char *ptr,
const char *limit, uint64_t *val) {
if (UPB_LIKELY(ptr < limit && (*ptr & 0x80) == 0)) {
*val = (uint8_t)*ptr;
uint64_t *val) {
uint64_t byte = (uint8_t)*ptr;
if (UPB_LIKELY((byte & 0x80) == 0)) {
*val = byte;
return ptr + 1;
} else {
return decode_longvarint64(d, ptr, limit, val);
decode_vret res = decode_longvarint64(ptr, byte);
if (!res.ptr) decode_err(d);
*val = res.val;
return res.ptr;
}
}
UPB_FORCEINLINE
static const char *decode_varint32(upb_decstate *d, const char *ptr,
const char *limit, uint32_t *val) {
uint32_t *val) {
uint64_t u64;
ptr = decode_varint64(d, ptr, limit, &u64);
ptr = decode_varint64(d, ptr, &u64);
if (u64 > UINT32_MAX) decode_err(d);
*val = (uint32_t)u64;
return ptr;
@ -287,17 +298,82 @@ static upb_msg *decode_newsubmsg(upb_decstate *d, const upb_msglayout *layout,
return _upb_msg_new_inl(subl, &d->arena);
}
static void decode_tosubmsg(upb_decstate *d, upb_msg *submsg,
const upb_msglayout *layout,
const upb_msglayout_field *field, upb_strview val) {
static int decode_pushlimit(upb_decstate *d, const char *ptr, int size) {
int limit = size + (int)(ptr - d->end);
int delta = d->limit - limit;
d->limit = limit;
d->limit_ptr = d->end + UPB_MIN(0, limit);
return delta;
}
static void decode_poplimit(upb_decstate *d, int saved_delta) {
d->limit += saved_delta;
d->limit_ptr = d->end + UPB_MIN(0, d->limit);
}
typedef struct {
bool ok;
const char *ptr;
} decode_doneret;
UPB_NOINLINE
static const char *decode_isdonefallback(upb_decstate *d, const char *ptr,
int overrun) {
if (overrun < d->limit) {
/* Need to copy remaining data into patch buffer. */
UPB_ASSERT(overrun < 16);
memset(d->patch + 16, 0, 16);
memcpy(d->patch, d->end, 16);
ptr = &d->patch[0] + overrun;
d->end = &d->patch[16];
d->limit -= 16;
d->limit_ptr = d->end + d->limit;
d->alias = false;
UPB_ASSERT(ptr < d->limit_ptr);
return ptr;
} else {
decode_err(d);
}
}
UPB_FORCEINLINE
static bool decode_isdone(upb_decstate *d, const char **ptr) {
int overrun = *ptr - d->end;
if (UPB_LIKELY(*ptr < d->limit_ptr)) {
return false;
} else if (UPB_LIKELY(overrun == d->limit)) {
return true;
} else {
*ptr = decode_isdonefallback(d, *ptr, overrun);
return false;
}
}
static const char *decode_readstr(upb_decstate *d, const char *ptr, int size,
upb_strview *str) {
if (d->alias) {
str->data = ptr;
} else {
char *data = upb_arena_malloc(&d->arena, size);
if (!data) decode_err(d);
memcpy(data, ptr, size);
str->data = data;
}
str->size = size;
return ptr + size;
}
static const char *decode_tosubmsg(upb_decstate *d, const char *ptr,
upb_msg *submsg, const upb_msglayout *layout,
const upb_msglayout_field *field, int size) {
const upb_msglayout *subl = layout->submsgs[field->submsg_index];
const char *saved_limit = d->limit;
int saved_delta = decode_pushlimit(d, ptr, size);
if (--d->depth < 0) decode_err(d);
d->limit = val.data + val.size;
decode_msg(d, val.data, submsg, subl);
d->limit = saved_limit;
ptr = decode_msg(d, ptr, submsg, subl);
decode_poplimit(d, saved_delta);
if (d->end_group != 0) decode_err(d);
d->depth++;
return ptr;
}
static const char *decode_group(upb_decstate *d, const char *ptr,
@ -345,15 +421,14 @@ static const char *decode_toarray(upb_decstate *d, const char *ptr,
memcpy(mem, &val, 1 << op);
return ptr;
case OP_STRING:
decode_verifyutf8(d, val.str_val.data, val.str_val.size);
decode_verifyutf8(d, ptr, val.size);
/* Fallthrough. */
case OP_BYTES:
case OP_BYTES: {
/* Append bytes. */
mem =
UPB_PTR_AT(_upb_array_ptr(arr), arr->len * sizeof(upb_strview), void);
upb_strview *str = (upb_strview*)_upb_array_ptr(arr) + arr->len;
arr->len++;
memcpy(mem, &val, sizeof(upb_strview));
return ptr;
return decode_readstr(d, ptr, val.size, str);
}
case OP_SUBMSG: {
/* Append submessage / group. */
upb_msg *submsg = decode_newsubmsg(d, layout, field);
@ -361,26 +436,25 @@ static const char *decode_toarray(upb_decstate *d, const char *ptr,
submsg;
arr->len++;
if (UPB_UNLIKELY(field->descriptortype == UPB_DTYPE_GROUP)) {
ptr = decode_togroup(d, ptr, submsg, layout, field);
return decode_togroup(d, ptr, submsg, layout, field);
} else {
decode_tosubmsg(d, submsg, layout, field, val.str_val);
return decode_tosubmsg(d, ptr, submsg, layout, field, val.size);
}
return ptr;
}
case OP_FIXPCK_LG2(2):
case OP_FIXPCK_LG2(3): {
/* Fixed packed. */
int lg2 = op - OP_FIXPCK_LG2(0);
int mask = (1 << lg2) - 1;
size_t count = val.str_val.size >> lg2;
if ((val.str_val.size & mask) != 0) {
size_t count = val.size >> lg2;
if ((val.size & mask) != 0) {
decode_err(d); /* Length isn't a round multiple of elem size. */
}
decode_reserve(d, arr, count);
mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
arr->len += count;
memcpy(mem, val.str_val.data, val.str_val.size);
return ptr;
memcpy(mem, ptr, val.size); /* XXX: ptr boundary. */
return ptr + val.size;
}
case OP_VARPCK_LG2(0):
case OP_VARPCK_LG2(2):
@ -388,12 +462,11 @@ static const char *decode_toarray(upb_decstate *d, const char *ptr,
/* Varint packed. */
int lg2 = op - OP_VARPCK_LG2(0);
int scale = 1 << lg2;
const char *ptr = val.str_val.data;
const char *end = ptr + val.str_val.size;
int saved_limit = decode_pushlimit(d, ptr, val.size);
char *out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
while (ptr < end) {
while (!decode_isdone(d, &ptr)) {
wireval elem;
ptr = decode_varint64(d, ptr, end, &elem.uint64_val);
ptr = decode_varint64(d, ptr, &elem.uint64_val);
decode_munge(field->descriptortype, &elem);
if (decode_reserve(d, arr, 1)) {
out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
@ -402,7 +475,7 @@ static const char *decode_toarray(upb_decstate *d, const char *ptr,
memcpy(out, &elem, scale);
out += scale;
}
if (ptr != end) decode_err(d);
decode_poplimit(d, saved_limit);
return ptr;
}
default:
@ -410,9 +483,9 @@ static const char *decode_toarray(upb_decstate *d, const char *ptr,
}
}
static void decode_tomap(upb_decstate *d, upb_msg *msg,
const upb_msglayout *layout,
const upb_msglayout_field *field, wireval val) {
static const char *decode_tomap(upb_decstate *d, const char *ptr, upb_msg *msg,
const upb_msglayout *layout,
const upb_msglayout_field *field, wireval val) {
upb_map **map_p = UPB_PTR_AT(msg, field->offset, upb_map *);
upb_map *map = *map_p;
upb_map_entry ent;
@ -440,10 +513,9 @@ static void decode_tomap(upb_decstate *d, upb_msg *msg,
ent.v.val = upb_value_ptr(_upb_msg_new(entry->submsgs[0], &d->arena));
}
decode_tosubmsg(d, &ent.k, layout, field, val.str_val);
/* Insert into map. */
ptr = decode_tosubmsg(d, ptr, &ent.k, layout, field, val.size);
_upb_map_set(map, &ent.k, map->key_size, &ent.v, map->val_size, &d->arena);
return ptr;
}
static const char *decode_tomsg(upb_decstate *d, const char *ptr, upb_msg *msg,
@ -477,16 +549,15 @@ static const char *decode_tomsg(upb_decstate *d, const char *ptr, upb_msg *msg,
if (UPB_UNLIKELY(type == UPB_DTYPE_GROUP)) {
ptr = decode_togroup(d, ptr, submsg, layout, field);
} else {
decode_tosubmsg(d, submsg, layout, field, val.str_val);
ptr = decode_tosubmsg(d, ptr, submsg, layout, field, val.size);
}
break;
}
case OP_STRING:
decode_verifyutf8(d, val.str_val.data, val.str_val.size);
decode_verifyutf8(d, ptr, val.size);
/* Fallthrough. */
case OP_BYTES:
memcpy(mem, &val, sizeof(upb_strview));
break;
return decode_readstr(d, ptr, val.size, mem);
case OP_SCALAR_LG2(3):
memcpy(mem, &val, 8);
break;
@ -505,7 +576,7 @@ static const char *decode_tomsg(upb_decstate *d, const char *ptr, upb_msg *msg,
static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
const upb_msglayout *layout) {
while (ptr < d->limit) {
while (!decode_isdone(d, &ptr)) {
uint32_t tag;
const upb_msglayout_field *field;
int field_number;
@ -514,7 +585,7 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
wireval val;
int op;
ptr = decode_varint32(d, ptr, d->limit, &tag);
ptr = decode_varint32(d, ptr, &tag);
field_number = tag >> 3;
wire_type = tag & 7;
@ -522,12 +593,11 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
switch (wire_type) {
case UPB_WIRE_TYPE_VARINT:
ptr = decode_varint64(d, ptr, d->limit, &val.uint64_val);
ptr = decode_varint64(d, ptr, &val.uint64_val);
op = varint_ops[field->descriptortype];
decode_munge(field->descriptortype, &val);
break;
case UPB_WIRE_TYPE_32BIT:
if (d->limit - ptr < 4) decode_err(d);
memcpy(&val.uint32_val, ptr, 4);
val.uint32_val = _upb_be_swap32(val.uint32_val);
ptr += 4;
@ -535,7 +605,6 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
if (((1 << field->descriptortype) & fixed32_ok) == 0) goto unknown;
break;
case UPB_WIRE_TYPE_64BIT:
if (d->limit - ptr < 8) decode_err(d);
memcpy(&val.uint64_val, ptr, 8);
val.uint64_val = _upb_be_swap64(val.uint64_val);
ptr += 8;
@ -543,16 +612,13 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
if (((1 << field->descriptortype) & fixed64_ok) == 0) goto unknown;
break;
case UPB_WIRE_TYPE_DELIMITED: {
uint32_t size;
int ndx = field->descriptortype;
if (_upb_isrepeated(field)) ndx += 18;
ptr = decode_varint32(d, ptr, d->limit, &size);
if (size >= INT32_MAX || (size_t)(d->limit - ptr) < size) {
ptr = decode_varint32(d, ptr, &val.size);
if (val.size >= INT32_MAX ||
ptr - d->end + (int32_t)val.size > d->limit) {
decode_err(d); /* Length overflow. */
}
val.str_val.data = ptr;
val.str_val.size = size;
ptr += size;
op = delim_ops[ndx];
break;
}
@ -576,7 +642,7 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
ptr = decode_toarray(d, ptr, msg, layout, field, val, op);
break;
case _UPB_LABEL_MAP:
decode_tomap(d, msg, layout, field, val);
ptr = decode_tomap(d, ptr, msg, layout, field, val);
break;
default:
ptr = decode_tomsg(d, ptr, msg, layout, field, val, op);
@ -590,6 +656,7 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
ptr = decode_group(d, ptr, NULL, NULL, field_number);
}
if (msg) {
if (wire_type == UPB_WIRE_TYPE_DELIMITED) ptr += val.size;
if (!_upb_msg_addunknown(msg, field_start, ptr - field_start,
&d->arena)) {
decode_err(d);
@ -598,7 +665,6 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
}
}
if (ptr != d->limit) decode_err(d);
return ptr;
}
@ -607,9 +673,22 @@ bool upb_decode(const char *buf, size_t size, void *msg, const upb_msglayout *l,
bool ok;
upb_decstate state;
if (size == 0) return true;
if (size == 0) {
return true;
} else if (size < 16) {
memset(&state.patch, 0, 32);
memcpy(&state.patch, buf, size);
buf = state.patch;
state.end = buf + size;
state.limit = 0;
state.alias = false;
} else {
state.end = buf + size - 16;
state.limit = 16;
state.alias = true;
}
state.limit = buf + size;
state.limit_ptr = state.end;
state.depth = 64;
state.end_group = 0;
state.arena.head = arena->head;

File diff suppressed because it is too large Load Diff

@ -293,6 +293,7 @@ int upb_symtab_filecount(const upb_symtab *s);
const upb_filedef *upb_symtab_addfile(
upb_symtab *s, const google_protobuf_FileDescriptorProto *file,
upb_status *status);
size_t _upb_symtab_bytesloaded(const upb_symtab *s);
/* For generated code only: loads a generated descriptor. */
typedef struct upb_def_init {

@ -951,7 +951,7 @@ static bool parse_number_from_buffer(upb_json_parser *p, const char *buf,
upb_fieldtype_t type = upb_fielddef_type(p->top->f);
double val;
double dummy;
double inf = UPB_INFINITY;
double inf = INFINITY;
errno = 0;
@ -2869,7 +2869,7 @@ static upb_json_parsermethod *parsermethod_new(upb_json_codecache *c,
upb_byteshandler_setstring(&m->input_handler_, parse, m);
upb_byteshandler_setendstr(&m->input_handler_, end, m);
upb_strtable_init2(&m->name_table, UPB_CTYPE_CONSTPTR, alloc);
upb_strtable_init2(&m->name_table, UPB_CTYPE_CONSTPTR, 4, alloc);
/* Build name_table */

@ -7,7 +7,9 @@
#include <ctype.h>
#include <inttypes.h>
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
@ -139,7 +141,7 @@ static void putstring(upb_json_printer *p, const char *buf, size_t len) {
char escape_buf[8];
if (!escape) {
unsigned char byte = (unsigned char)c;
_upb_snprintf(escape_buf, sizeof(escape_buf), "\\u%04x", (int)byte);
snprintf(escape_buf, sizeof(escape_buf), "\\u%04x", (int)byte);
escape = escape_buf;
}
@ -178,53 +180,53 @@ const char neginf[] = "\"-Infinity\"";
const char inf[] = "\"Infinity\"";
static size_t fmt_double(double val, char* buf, size_t length) {
if (val == UPB_INFINITY) {
if (val == INFINITY) {
CHKLENGTH(length >= strlen(inf));
strcpy(buf, inf);
return strlen(inf);
} else if (val == -UPB_INFINITY) {
} else if (val == -INFINITY) {
CHKLENGTH(length >= strlen(neginf));
strcpy(buf, neginf);
return strlen(neginf);
} else {
size_t n = _upb_snprintf(buf, length, "%.17g", val);
size_t n = snprintf(buf, length, "%.17g", val);
CHKLENGTH(n > 0 && n < length);
return n;
}
}
static size_t fmt_float(float val, char* buf, size_t length) {
size_t n = _upb_snprintf(buf, length, "%.8g", val);
size_t n = snprintf(buf, length, "%.8g", val);
CHKLENGTH(n > 0 && n < length);
return n;
}
static size_t fmt_bool(bool val, char* buf, size_t length) {
size_t n = _upb_snprintf(buf, length, "%s", (val ? "true" : "false"));
size_t n = snprintf(buf, length, "%s", (val ? "true" : "false"));
CHKLENGTH(n > 0 && n < length);
return n;
}
static size_t fmt_int64_as_number(int64_t val, char* buf, size_t length) {
size_t n = _upb_snprintf(buf, length, "%" PRId64, val);
size_t n = snprintf(buf, length, "%" PRId64, val);
CHKLENGTH(n > 0 && n < length);
return n;
}
static size_t fmt_uint64_as_number(uint64_t val, char* buf, size_t length) {
size_t n = _upb_snprintf(buf, length, "%" PRIu64, val);
size_t n = snprintf(buf, length, "%" PRIu64, val);
CHKLENGTH(n > 0 && n < length);
return n;
}
static size_t fmt_int64_as_string(int64_t val, char* buf, size_t length) {
size_t n = _upb_snprintf(buf, length, "\"%" PRId64 "\"", val);
size_t n = snprintf(buf, length, "\"%" PRId64 "\"", val);
CHKLENGTH(n > 0 && n < length);
return n;
}
static size_t fmt_uint64_as_string(uint64_t val, char* buf, size_t length) {
size_t n = _upb_snprintf(buf, length, "\"%" PRIu64 "\"", val);
size_t n = snprintf(buf, length, "\"%" PRIu64 "\"", val);
CHKLENGTH(n > 0 && n < length);
return n;
}
@ -870,12 +872,12 @@ static bool printer_enddurationmsg(void *closure, const void *handler_data,
return false;
}
_upb_snprintf(buffer, sizeof(buffer), "%ld", (long)p->seconds);
snprintf(buffer, sizeof(buffer), "%ld", (long)p->seconds);
base_len = strlen(buffer);
if (p->nanos != 0) {
char nanos_buffer[UPB_DURATION_MAX_NANO_LEN + 3];
_upb_snprintf(nanos_buffer, sizeof(nanos_buffer), "%.9f",
snprintf(nanos_buffer, sizeof(nanos_buffer), "%.9f",
p->nanos / 1000000000.0);
/* Remove trailing 0. */
for (i = UPB_DURATION_MAX_NANO_LEN + 2;
@ -949,8 +951,8 @@ static bool printer_endtimestampmsg(void *closure, const void *handler_data,
"%Y-%m-%dT%H:%M:%S", gmtime(&time));
if (p->nanos != 0) {
char nanos_buffer[UPB_TIMESTAMP_MAX_NANO_LEN + 3];
_upb_snprintf(nanos_buffer, sizeof(nanos_buffer), "%.9f",
p->nanos / 1000000000.0);
snprintf(nanos_buffer, sizeof(nanos_buffer), "%.9f",
p->nanos / 1000000000.0);
/* Remove trailing 0. */
for (i = UPB_TIMESTAMP_MAX_NANO_LEN + 2;
nanos_buffer[i] == '0'; i--) {

@ -5,6 +5,7 @@
#include <float.h>
#include <inttypes.h>
#include <limits.h>
#include <math.h>
#include <setjmp.h>
#include <stdlib.h>
#include <string.h>
@ -395,6 +396,8 @@ static void jsondec_resize(jsondec *d, char **buf, char **end, char **buf_end) {
size_t size = UPB_MAX(8, 2 * oldsize);
*buf = upb_arena_realloc(d->arena, *buf, len, size);
if (!*buf) jsondec_err(d, "Out of memory");
*end = *buf + len;
*buf_end = *buf + size;
}
@ -747,11 +750,11 @@ static upb_msgval jsondec_double(jsondec *d, const upb_fielddef *f) {
case JD_STRING:
str = jsondec_string(d);
if (jsondec_streql(str, "NaN")) {
val.double_val = UPB_NAN;
val.double_val = NAN;
} else if (jsondec_streql(str, "Infinity")) {
val.double_val = UPB_INFINITY;
val.double_val = INFINITY;
} else if (jsondec_streql(str, "-Infinity")) {
val.double_val = -UPB_INFINITY;
val.double_val = -INFINITY;
} else {
val.double_val = strtod(str.data, NULL);
}
@ -761,7 +764,7 @@ static upb_msgval jsondec_double(jsondec *d, const upb_fielddef *f) {
}
if (upb_fielddef_type(f) == UPB_TYPE_FLOAT) {
if (val.double_val != UPB_INFINITY && val.double_val != -UPB_INFINITY &&
if (val.double_val != INFINITY && val.double_val != -INFINITY &&
(val.double_val > FLT_MAX || val.double_val < -FLT_MAX)) {
jsondec_err(d, "Float out of range");
}
@ -909,7 +912,7 @@ static void jsondec_field(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
return;
}
if (upb_fielddef_containingoneof(f) &&
if (upb_fielddef_realcontainingoneof(f) &&
upb_msg_whichoneof(msg, upb_fielddef_containingoneof(f))) {
jsondec_err(d, "More than one field for this oneof.");
}

@ -4,14 +4,16 @@
#include <ctype.h>
#include <float.h>
#include <inttypes.h>
#include <math.h>
#include <setjmp.h>
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
#include <setjmp.h>
#include "upb/decode.h"
#include "upb/reflection.h"
/* Must be last. */
#include "upb/port_def.inc"
typedef struct {
@ -76,7 +78,7 @@ static void jsonenc_printf(jsonenc *e, const char *fmt, ...) {
va_list args;
va_start(args, fmt);
n = _upb_vsnprintf(e->ptr, have, fmt, args);
n = vsnprintf(e->ptr, have, fmt, args);
va_end(args);
if (UPB_LIKELY(have > n)) {
@ -268,9 +270,9 @@ static void jsonenc_string(jsonenc *e, upb_strview str) {
}
static void jsonenc_double(jsonenc *e, const char *fmt, double val) {
if (val == UPB_INFINITY) {
if (val == INFINITY) {
jsonenc_putstr(e, "\"Infinity\"");
} else if (val == -UPB_INFINITY) {
} else if (val == -INFINITY) {
jsonenc_putstr(e, "\"-Infinity\"");
} else if (val != val) {
jsonenc_putstr(e, "\"NaN\"");

@ -9,7 +9,7 @@ extern "C" {
#endif
enum {
/* When set, emits 0/default values. TOOD(haberman): proto3 only? */
/* When set, emits 0/default values. TODO(haberman): proto3 only? */
UPB_JSONENC_EMITDEFAULTS = 1,
/* When set, use normal (snake_caes) field names instead of JSON (camelCase)

@ -11,7 +11,7 @@ static const size_t overhead = sizeof(upb_msg_internal);
static const upb_msg_internal *upb_msg_getinternal_const(const upb_msg *msg) {
ptrdiff_t size = sizeof(upb_msg_internal);
return UPB_PTR_AT(msg, -size, upb_msg_internal);
return (upb_msg_internal*)((char*)msg - size);
}
upb_msg *_upb_msg_new(const upb_msglayout *l, upb_arena *a) {
@ -111,13 +111,16 @@ void *_upb_array_resize_fallback(upb_array **arr_ptr, size_t size,
bool _upb_array_append_fallback(upb_array **arr_ptr, const void *value,
int elem_size_lg2, upb_arena *arena) {
upb_array *arr = getorcreate_array(arr_ptr, elem_size_lg2, arena);
size_t elem = arr->len;
char *data;
if (!arr) return false;
if (!arr || !_upb_array_resize(arr, elem + 1, arena)) return false;
size_t elems = arr->len;
data = _upb_array_ptr(arr);
memcpy(data + (elem << elem_size_lg2), value, 1 << elem_size_lg2);
if (!_upb_array_resize(arr, elems + 1, arena)) {
return false;
}
char *data = _upb_array_ptr(arr);
memcpy(data + (elems << elem_size_lg2), value, 1 << elem_size_lg2);
return true;
}
@ -130,7 +133,7 @@ upb_map *_upb_map_new(upb_arena *a, size_t key_size, size_t value_size) {
return NULL;
}
upb_strtable_init2(&map->table, UPB_CTYPE_INT32, upb_arena_alloc(a));
upb_strtable_init2(&map->table, UPB_CTYPE_INT32, 4, upb_arena_alloc(a));
map->key_size = key_size;
map->val_size = value_size;

@ -94,7 +94,8 @@ UPB_INLINE upb_msg *_upb_msg_new_inl(const upb_msglayout *l, upb_arena *a) {
upb_msg *_upb_msg_new(const upb_msglayout *l, upb_arena *a);
UPB_INLINE upb_msg_internal *upb_msg_getinternal(upb_msg *msg) {
return UPB_PTR_AT(msg, -sizeof(upb_msg_internal), upb_msg_internal);
ptrdiff_t size = sizeof(upb_msg_internal);
return (upb_msg_internal*)((char*)msg - size);
}
/* Clears the given message. */
@ -189,9 +190,11 @@ typedef struct {
uintptr_t data; /* Tagged ptr: low 3 bits of ptr are lg2(elem size). */
size_t len; /* Measured in elements. */
size_t size; /* Measured in elements. */
uint64_t junk;
} upb_array;
UPB_INLINE const void *_upb_array_constptr(const upb_array *arr) {
UPB_ASSERT((arr->data & 7) <= 4);
return (void*)(arr->data & ~(uintptr_t)7);
}
@ -201,15 +204,17 @@ UPB_INLINE void *_upb_array_ptr(upb_array *arr) {
UPB_INLINE uintptr_t _upb_tag_arrptr(void* ptr, int elem_size_lg2) {
UPB_ASSERT(elem_size_lg2 <= 4);
UPB_ASSERT(((uintptr_t)ptr & 7) == 0);
return (uintptr_t)ptr | (unsigned)elem_size_lg2;
}
UPB_INLINE upb_array *_upb_array_new(upb_arena *a, size_t init_size,
int elem_size_lg2) {
const size_t arr_size = UPB_ALIGN_UP(sizeof(upb_array), 8);
const size_t bytes = sizeof(upb_array) + (init_size << elem_size_lg2);
upb_array *arr = (upb_array*)upb_arena_malloc(a, bytes);
if (!arr) return NULL;
arr->data = _upb_tag_arrptr(arr + 1, elem_size_lg2);
arr->data = _upb_tag_arrptr(UPB_PTR_AT(arr, arr_size, void), elem_size_lg2);
arr->len = 0;
arr->size = init_size;
return arr;
@ -382,17 +387,17 @@ UPB_INLINE void _upb_map_fromkey(upb_strview key, void* out, size_t size) {
}
}
UPB_INLINE upb_value _upb_map_tovalue(const void *val, size_t size,
upb_arena *a) {
upb_value ret = {0};
UPB_INLINE bool _upb_map_tovalue(const void *val, size_t size, upb_value *msgval,
upb_arena *a) {
if (size == UPB_MAPTYPE_STRING) {
upb_strview *strp = (upb_strview*)upb_arena_malloc(a, sizeof(*strp));
if (!strp) return false;
*strp = *(upb_strview*)val;
ret = upb_value_ptr(strp);
*msgval = upb_value_ptr(strp);
} else {
memcpy(&ret, val, size);
memcpy(msgval, val, size);
}
return ret;
return true;
}
UPB_INLINE void _upb_map_fromvalue(upb_value val, void* out, size_t size) {
@ -434,7 +439,8 @@ UPB_INLINE void* _upb_map_next(const upb_map *map, size_t *iter) {
UPB_INLINE bool _upb_map_set(upb_map *map, const void *key, size_t key_size,
void *val, size_t val_size, upb_arena *arena) {
upb_strview strkey = _upb_map_tokey(key, key_size);
upb_value tabval = _upb_map_tovalue(val, val_size, arena);
upb_value tabval = {0};
if (!_upb_map_tovalue(val, val_size, &tabval, arena)) return false;
upb_alloc *a = upb_arena_alloc(arena);
/* TODO(haberman): add overwrite operation to minimize number of lookups. */

@ -105,8 +105,8 @@ bool putf(upb_textprinter *p, const char *fmt, ...) {
va_start(args, fmt);
/* Run once to get the length of the string. */
_upb_va_copy(args_copy, args);
len = _upb_vsnprintf(NULL, 0, fmt, args_copy);
va_copy(args_copy, args);
len = vsnprintf(NULL, 0, fmt, args_copy);
va_end(args_copy);
/* + 1 for NULL terminator (vsprintf() requires it even if we don't). */

@ -150,9 +150,7 @@ UPB_INLINE uint64_t upb_vencode32(uint32_t val) {
uint64_t ret = 0;
UPB_ASSERT(bytes <= 5);
memcpy(&ret, buf, bytes);
#ifdef UPB_BIG_ENDIAN
ret = byteswap64(ret);
#endif
ret = _upb_be_swap64(ret);
UPB_ASSERT(ret <= 0xffffffffffU);
return ret;
}

@ -1,26 +0,0 @@
#include "upb/port_def.inc"
#ifdef UPB_MSVC_VSNPRINTF
/* Visual C++ earlier than 2015 doesn't have standard C99 snprintf and
* vsnprintf. To support them, missing functions are manually implemented
* using the existing secure functions. */
int msvc_vsnprintf(char* s, size_t n, const char* format, va_list arg) {
if (!s) {
return _vscprintf(format, arg);
}
int ret = _vsnprintf_s(s, n, _TRUNCATE, format, arg);
if (ret < 0) {
ret = _vscprintf(format, arg);
}
return ret;
}
int msvc_snprintf(char* s, size_t n, const char* format, ...) {
va_list arg;
va_start(arg, format);
int ret = msvc_vsnprintf(s, n, format, arg);
va_end(arg);
return ret;
}
#endif

@ -20,6 +20,15 @@
*
* This file is private and must not be included by users!
*/
#if !(__STDC_VERSION__ >= 199901L || __cplusplus >= 201103L)
#error upb requires C99 or C++11
#endif
#if (defined(_MSC_VER) && _MSC_VER < 1900)
#error upb requires MSVC >= 2015.
#endif
#include <stdint.h>
#include <stddef.h>
@ -68,12 +77,6 @@
#define UPB_UNLIKELY(x) (x)
#endif
/* Define UPB_BIG_ENDIAN manually if you're on big endian and your compiler
* doesn't provide these preprocessor symbols. */
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
#define UPB_BIG_ENDIAN
#endif
/* Macros for function attributes on compilers that support them. */
#ifdef __GNUC__
#define UPB_FORCEINLINE __inline__ __attribute__((always_inline))
@ -89,49 +92,6 @@
#define UPB_NORETURN
#endif
#if __STDC_VERSION__ >= 199901L || __cplusplus >= 201103L
/* C99/C++11 versions. */
#include <stdio.h>
#define _upb_snprintf snprintf
#define _upb_vsnprintf vsnprintf
#define _upb_va_copy(a, b) va_copy(a, b)
#elif defined(_MSC_VER)
/* Microsoft C/C++ versions. */
#include <stdarg.h>
#include <stdio.h>
#if _MSC_VER < 1900
int msvc_snprintf(char* s, size_t n, const char* format, ...);
int msvc_vsnprintf(char* s, size_t n, const char* format, va_list arg);
#define UPB_MSVC_VSNPRINTF
#define _upb_snprintf msvc_snprintf
#define _upb_vsnprintf msvc_vsnprintf
#else
#define _upb_snprintf snprintf
#define _upb_vsnprintf vsnprintf
#endif
#define _upb_va_copy(a, b) va_copy(a, b)
#elif defined __GNUC__
/* A few hacky workarounds for functions not in C89.
* For internal use only!
* TODO(haberman): fix these by including our own implementations, or finding
* another workaround.
*/
#define _upb_snprintf __builtin_snprintf
#define _upb_vsnprintf __builtin_vsnprintf
#define _upb_va_copy(a, b) __va_copy(a, b)
#else
#error Need implementations of [v]snprintf and va_copy
#endif
#ifdef __cplusplus
#if __cplusplus >= 201103L || defined(__GXX_EXPERIMENTAL_CXX0X__) || \
(defined(_MSC_VER) && _MSC_VER >= 1900)
/* C++11 is present */
#else
#error upb requires C++11 for C++ support
#endif
#endif
#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
@ -159,29 +119,12 @@ int msvc_vsnprintf(char* s, size_t n, const char* format, va_list arg);
#define UPB_ASSERT(expr) assert(expr)
#endif
/* UPB_ASSERT_DEBUGVAR(): assert that uses functions or variables that only
* exist in debug mode. This turns into regular assert. */
#define UPB_ASSERT_DEBUGVAR(expr) assert(expr)
#if defined(__GNUC__) || defined(__clang__)
#define UPB_UNREACHABLE() do { assert(0); __builtin_unreachable(); } while(0)
#else
#define UPB_UNREACHABLE() do { assert(0); } while(0)
#endif
/* UPB_INFINITY representing floating-point positive infinity. */
#include <math.h>
#ifdef INFINITY
#define UPB_INFINITY INFINITY
#else
#define UPB_INFINITY (1.0 / 0.0)
#endif
#ifdef NAN
#define UPB_NAN NAN
#else
#define UPB_NAN (0.0 / 0.0)
#endif
#if defined(__SANITIZE_ADDRESS__)
#define UPB_ASAN 1
#ifdef __cplusplus

@ -18,14 +18,7 @@
#undef UPB_UNUSED
#undef UPB_ASSUME
#undef UPB_ASSERT
#undef UPB_ASSERT_DEBUGVAR
#undef UPB_UNREACHABLE
#undef UPB_INFINITY
#undef UPB_NAN
#undef UPB_MSVC_VSNPRINTF
#undef _upb_snprintf
#undef _upb_vsnprintf
#undef _upb_va_copy
#undef UPB_POISON_MEMORY_REGION
#undef UPB_UNPOISON_MEMORY_REGION
#undef UPB_ASAN

@ -96,20 +96,17 @@ bool upb_msg_has(const upb_msg *msg, const upb_fielddef *f) {
const upb_fielddef *upb_msg_whichoneof(const upb_msg *msg,
const upb_oneofdef *o) {
upb_oneof_iter i;
const upb_fielddef *f;
const upb_msglayout_field *field;
const upb_msgdef *m = upb_oneofdef_containingtype(o);
uint32_t oneof_case;
/* This is far from optimal. */
upb_oneof_begin(&i, o);
if (upb_oneof_done(&i)) return false;
f = upb_oneof_iter_field(&i);
field = upb_fielddef_layout(f);
oneof_case = _upb_getoneofcase_field(msg, field);
return oneof_case ? upb_msgdef_itof(m, oneof_case) : NULL;
const upb_fielddef *f = upb_oneofdef_field(o, 0);
if (upb_oneofdef_issynthetic(o)) {
UPB_ASSERT(upb_oneofdef_fieldcount(o) == 1);
return upb_msg_has(msg, f) ? f : NULL;
} else {
const upb_msglayout_field *field = upb_fielddef_layout(f);
uint32_t oneof_case = _upb_getoneofcase_field(msg, field);
f = oneof_case ? upb_oneofdef_itof(o, oneof_case) : NULL;
UPB_ASSERT((f != NULL) == (oneof_case != 0));
return f;
}
}
upb_msgval upb_msg_get(const upb_msg *msg, const upb_fielddef *f) {

@ -4,10 +4,12 @@
** Implementation is heavily inspired by Lua's ltable.c.
*/
#include "upb/table.int.h"
#include <string.h>
#include "third_party/wyhash/wyhash.h"
#include "upb/table.int.h"
/* Must be last. */
#include "upb/port_def.inc"
#define UPB_MAXARRSIZE 16 /* 64k. */
@ -87,11 +89,7 @@ static upb_tabent *mutable_entries(upb_table *t) {
}
static bool isfull(upb_table *t) {
if (upb_table_size(t) == 0) {
return true;
} else {
return ((double)(t->count + 1) / upb_table_size(t)) > MAX_LOAD;
}
return t->count == t->max_count;
}
static bool init(upb_table *t, uint8_t size_lg2, upb_alloc *a) {
@ -100,6 +98,7 @@ static bool init(upb_table *t, uint8_t size_lg2, upb_alloc *a) {
t->count = 0;
t->size_lg2 = size_lg2;
t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
t->max_count = upb_table_size(t) * MAX_LOAD;
bytes = upb_table_size(t) * sizeof(upb_tabent);
if (bytes > 0) {
t->entries = upb_malloc(a, bytes);
@ -115,9 +114,17 @@ static void uninit(upb_table *t, upb_alloc *a) {
upb_free(a, mutable_entries(t));
}
static upb_tabent *emptyent(upb_table *t) {
upb_tabent *e = mutable_entries(t) + upb_table_size(t);
while (1) { if (upb_tabent_isempty(--e)) return e; UPB_ASSERT(e > t->entries); }
static upb_tabent *emptyent(upb_table *t, upb_tabent *e) {
upb_tabent *begin = mutable_entries(t);
upb_tabent *end = begin + upb_table_size(t);
for (e = e + 1; e < end; e++) {
if (upb_tabent_isempty(e)) return e;
}
for (e = begin; e < end; e++) {
if (upb_tabent_isempty(e)) return e;
}
UPB_ASSERT(false);
return NULL;
}
static upb_tabent *getentry_mutable(upb_table *t, uint32_t hash) {
@ -173,7 +180,7 @@ static void insert(upb_table *t, lookupkey_t key, upb_tabkey tabkey,
our_e->next = NULL;
} else {
/* Collision. */
upb_tabent *new_e = emptyent(t);
upb_tabent *new_e = emptyent(t, mainpos_e);
/* Head of collider's chain. */
upb_tabent *chain = getentry_mutable(t, hashfunc(mainpos_e->key));
if (chain == mainpos_e) {
@ -268,10 +275,14 @@ static upb_tabkey strcopy(lookupkey_t k2, upb_alloc *a) {
return (uintptr_t)str;
}
static uint32_t table_hash(const char *p, size_t n) {
return wyhash(p, n, 0, _wyp);
}
static uint32_t strhash(upb_tabkey key) {
uint32_t len;
char *str = upb_tabstr(key, &len);
return upb_murmur_hash2(str, len, 0);
return table_hash(str, len);
}
static bool streql(upb_tabkey k1, lookupkey_t k2) {
@ -280,9 +291,14 @@ static bool streql(upb_tabkey k1, lookupkey_t k2) {
return len == k2.str.len && (len == 0 || memcmp(str, k2.str.str, len) == 0);
}
bool upb_strtable_init2(upb_strtable *t, upb_ctype_t ctype, upb_alloc *a) {
bool upb_strtable_init2(upb_strtable *t, upb_ctype_t ctype,
size_t expected_size, upb_alloc *a) {
UPB_UNUSED(ctype); /* TODO(haberman): rm */
return init(&t->t, 2, a);
// Multiply by approximate reciprocal of MAX_LOAD (0.85), with pow2 denominator.
size_t need_entries = (expected_size + 1) * 1204 / 1024;
UPB_ASSERT(need_entries >= expected_size * 0.85);
int size_lg2 = _upb_lg2ceil(need_entries);
return init(&t->t, size_lg2, a);
}
void upb_strtable_clear(upb_strtable *t) {
@ -333,20 +349,20 @@ bool upb_strtable_insert3(upb_strtable *t, const char *k, size_t len,
tabkey = strcopy(key, a);
if (tabkey == 0) return false;
hash = upb_murmur_hash2(key.str.str, key.str.len, 0);
hash = table_hash(key.str.str, key.str.len);
insert(&t->t, key, tabkey, v, hash, &strhash, &streql);
return true;
}
bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
upb_value *v) {
uint32_t hash = upb_murmur_hash2(key, len, 0);
uint32_t hash = table_hash(key, len);
return lookup(&t->t, strkey2(key, len), v, hash, &streql);
}
bool upb_strtable_remove3(upb_strtable *t, const char *key, size_t len,
upb_value *val, upb_alloc *alloc) {
uint32_t hash = upb_murmur_hash2(key, len, 0);
uint32_t hash = table_hash(key, len);
upb_tabkey tabkey;
if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) {
if (alloc) {
@ -699,182 +715,3 @@ bool upb_inttable_iter_isequal(const upb_inttable_iter *i1,
return i1->t == i2->t && i1->index == i2->index &&
i1->array_part == i2->array_part;
}
#if defined(UPB_UNALIGNED_READS_OK) || defined(__s390x__)
/* -----------------------------------------------------------------------------
* MurmurHash2, by Austin Appleby (released as public domain).
* Reformatted and C99-ified by Joshua Haberman.
* Note - This code makes a few assumptions about how your machine behaves -
* 1. We can read a 4-byte value from any address without crashing
* 2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t
* And it has a few limitations -
* 1. It will not work incrementally.
* 2. It will not produce the same results on little-endian and big-endian
* machines. */
uint32_t upb_murmur_hash2(const void *key, size_t len, uint32_t seed) {
/* 'm' and 'r' are mixing constants generated offline.
* They're not really 'magic', they just happen to work well. */
const uint32_t m = 0x5bd1e995;
const int32_t r = 24;
/* Initialize the hash to a 'random' value */
uint32_t h = seed ^ len;
/* Mix 4 bytes at a time into the hash */
const uint8_t * data = (const uint8_t *)key;
while(len >= 4) {
uint32_t k;
memcpy(&k, data, sizeof(k));
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
/* Handle the last few bytes of the input array */
switch(len) {
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0]; h *= m;
};
/* Do a few final mixes of the hash to ensure the last few
* bytes are well-incorporated. */
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
#else /* !UPB_UNALIGNED_READS_OK */
/* -----------------------------------------------------------------------------
* MurmurHashAligned2, by Austin Appleby
* Same algorithm as MurmurHash2, but only does aligned reads - should be safer
* on certain platforms.
* Performance will be lower than MurmurHash2 */
#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
uint32_t upb_murmur_hash2(const void * key, size_t len, uint32_t seed) {
const uint32_t m = 0x5bd1e995;
const int32_t r = 24;
const uint8_t * data = (const uint8_t *)key;
uint32_t h = (uint32_t)(seed ^ len);
uint8_t align = (uintptr_t)data & 3;
if(align && (len >= 4)) {
/* Pre-load the temp registers */
uint32_t t = 0, d = 0;
int32_t sl;
int32_t sr;
switch(align) {
case 1: t |= data[2] << 16; /* fallthrough */
case 2: t |= data[1] << 8; /* fallthrough */
case 3: t |= data[0];
}
t <<= (8 * align);
data += 4-align;
len -= 4-align;
sl = 8 * (4-align);
sr = 8 * align;
/* Mix */
while(len >= 4) {
uint32_t k;
d = *(uint32_t *)data;
t = (t >> sr) | (d << sl);
k = t;
MIX(h,k,m);
t = d;
data += 4;
len -= 4;
}
/* Handle leftover data in temp registers */
d = 0;
if(len >= align) {
uint32_t k;
switch(align) {
case 3: d |= data[2] << 16; /* fallthrough */
case 2: d |= data[1] << 8; /* fallthrough */
case 1: d |= data[0]; /* fallthrough */
}
k = (t >> sr) | (d << sl);
MIX(h,k,m);
data += align;
len -= align;
/* ----------
* Handle tail bytes */
switch(len) {
case 3: h ^= data[2] << 16; /* fallthrough */
case 2: h ^= data[1] << 8; /* fallthrough */
case 1: h ^= data[0]; h *= m; /* fallthrough */
};
} else {
switch(len) {
case 3: d |= data[2] << 16; /* fallthrough */
case 2: d |= data[1] << 8; /* fallthrough */
case 1: d |= data[0]; /* fallthrough */
case 0: h ^= (t >> sr) | (d << sl); h *= m;
}
}
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
} else {
while(len >= 4) {
uint32_t k = *(uint32_t *)data;
MIX(h,k,m);
data += 4;
len -= 4;
}
/* ----------
* Handle tail bytes */
switch(len) {
case 3: h ^= data[2] << 16; /* fallthrough */
case 2: h ^= data[1] << 8; /* fallthrough */
case 1: h ^= data[0]; h *= m;
};
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
}
#undef MIX
#endif /* UPB_UNALIGNED_READS_OK */

@ -13,7 +13,7 @@
** store pointers or integers of at least 32 bits (upb isn't really useful on
** systems where sizeof(void*) < 4).
**
** The table must be homogenous (all values of the same type). In debug
** The table must be homogeneous (all values of the same type). In debug
** mode, we check this on insert and lookup.
*/
@ -171,7 +171,8 @@ typedef struct _upb_tabent {
typedef struct {
size_t count; /* Number of entries in the hash part. */
size_t mask; /* Mask to turn hash value -> bucket. */
uint32_t mask; /* Mask to turn hash value -> bucket. */
uint32_t max_count; /* Max count before we hit our load limit. */
uint8_t size_lg2; /* Size of the hashtable part is 2^size_lg2 entries. */
/* Hash table entries.
@ -230,7 +231,8 @@ UPB_INLINE bool upb_arrhas(upb_tabval key) {
/* Initialize and uninitialize a table, respectively. If memory allocation
* failed, false is returned that the table is uninitialized. */
bool upb_inttable_init2(upb_inttable *table, upb_ctype_t ctype, upb_alloc *a);
bool upb_strtable_init2(upb_strtable *table, upb_ctype_t ctype, upb_alloc *a);
bool upb_strtable_init2(upb_strtable *table, upb_ctype_t ctype,
size_t expected_size, upb_alloc *a);
void upb_inttable_uninit2(upb_inttable *table, upb_alloc *a);
void upb_strtable_uninit2(upb_strtable *table, upb_alloc *a);
@ -239,7 +241,7 @@ UPB_INLINE bool upb_inttable_init(upb_inttable *table, upb_ctype_t ctype) {
}
UPB_INLINE bool upb_strtable_init(upb_strtable *table, upb_ctype_t ctype) {
return upb_strtable_init2(table, ctype, &upb_alloc_global);
return upb_strtable_init2(table, ctype, 4, &upb_alloc_global);
}
UPB_INLINE void upb_inttable_uninit(upb_inttable *table) {

@ -43,7 +43,7 @@ static void txtenc_printf(txtenc *e, const char *fmt, ...) {
va_list args;
va_start(args, fmt);
n = _upb_vsnprintf(e->ptr, have, fmt, args);
n = vsnprintf(e->ptr, have, fmt, args);
va_end(args);
if (UPB_LIKELY(have > n)) {

@ -40,7 +40,7 @@ void upb_status_seterrf(upb_status *status, const char *fmt, ...) {
void upb_status_vseterrf(upb_status *status, const char *fmt, va_list args) {
if (!status) return;
status->ok = false;
_upb_vsnprintf(status->msg, sizeof(status->msg), fmt, args);
vsnprintf(status->msg, sizeof(status->msg), fmt, args);
status->msg[UPB_STATUS_MAX_MESSAGE - 1] = '\0';
}
@ -49,7 +49,7 @@ void upb_status_vappenderrf(upb_status *status, const char *fmt, va_list args) {
if (!status) return;
status->ok = false;
len = strlen(status->msg);
_upb_vsnprintf(status->msg + len, sizeof(status->msg) - len, fmt, args);
vsnprintf(status->msg + len, sizeof(status->msg) - len, fmt, args);
status->msg[UPB_STATUS_MAX_MESSAGE - 1] = '\0';
}

@ -313,6 +313,17 @@ UPB_INLINE uint64_t _upb_be_swap64(uint64_t val) {
}
}
UPB_INLINE int _upb_lg2ceil(int x) {
if (x <= 1) return 0;
#ifdef __GNUC__
return 32 - __builtin_clz(x - 1);
#else
int lg2 = 0;
while (1 << lg2 < x) lg2++;
return lg2;
#endif
}
#include "upb/port_undef.inc"
#ifdef __cplusplus

@ -41,6 +41,9 @@ class Arena {
public:
// A simple arena with no initial memory block and the default allocator.
Arena() : ptr_(upb_arena_new(), upb_arena_free) {}
Arena(char *initial_block, size_t size)
: ptr_(upb_arena_init(initial_block, size, &upb_alloc_global),
upb_arena_free) {}
upb_arena* ptr() { return ptr_.get(); }
@ -71,15 +74,12 @@ class Arena {
template <int N>
class InlinedArena : public Arena {
public:
InlinedArena() : ptr_(upb_arena_new(&initial_block_, N, &upb_alloc_global)) {}
upb_arena* ptr() { return ptr_.get(); }
InlinedArena() : Arena(initial_block_, N) {}
private:
InlinedArena(const InlinedArena*) = delete;
InlinedArena& operator=(const InlinedArena*) = delete;
std::unique_ptr<upb_arena, decltype(&upb_arena_free)> ptr_;
char initial_block_[N];
};

Loading…
Cancel
Save