Merge commit '21df81dfc2ddfb77c6fe48afea2801a152a03769' as 'third_party/upb'

pull/20725/head
Esun Kim 5 years ago
commit 8ea02a3971
  1. 10
      third_party/upb/.bazelci/presubmit.yml
  2. 4
      third_party/upb/.gitignore
  3. 3
      third_party/upb/.gitmodules
  4. 727
      third_party/upb/BUILD
  5. 147
      third_party/upb/CMakeLists.txt
  6. 7
      third_party/upb/CONTRIBUTING.md
  7. 72
      third_party/upb/DESIGN.md
  8. 26
      third_party/upb/LICENSE
  9. 134
      third_party/upb/README.md
  10. 39
      third_party/upb/WORKSPACE
  11. 0
      third_party/upb/bazel/BUILD
  12. 221
      third_party/upb/bazel/build_defs.bzl
  13. 102
      third_party/upb/bazel/lua.BUILD
  14. 193
      third_party/upb/bazel/ragel.BUILD
  15. 15
      third_party/upb/bazel/repository_defs.bzl
  16. 299
      third_party/upb/bazel/upb_proto_library.bzl
  17. 36
      third_party/upb/bazel/workspace_deps.bzl
  18. 18
      third_party/upb/examples/bazel/BUILD
  19. 14
      third_party/upb/examples/bazel/WORKSPACE
  20. 7
      third_party/upb/examples/bazel/foo.proto
  21. 17
      third_party/upb/examples/bazel/test_binary.c
  22. 485
      third_party/upb/generated_for_cmake/google/protobuf/descriptor.upb.c
  23. 1690
      third_party/upb/generated_for_cmake/google/protobuf/descriptor.upb.h
  24. 3454
      third_party/upb/generated_for_cmake/upb/json/parser.c
  25. 16
      third_party/upb/kokoro/ubuntu/build.sh
  26. 2
      third_party/upb/kokoro/ubuntu/continuous.cfg
  27. 2
      third_party/upb/kokoro/ubuntu/presubmit.cfg
  28. 36
      third_party/upb/tests/benchmark.cc
  29. 165
      third_party/upb/tests/bindings/googlepb/test_vs_proto2.cc
  30. 750
      third_party/upb/tests/bindings/lua/test_upb.lua
  31. 80
      third_party/upb/tests/bindings/lua/test_upb.pb.lua
  32. 62
      third_party/upb/tests/bindings/ruby/upb.rb
  33. 179
      third_party/upb/tests/conformance_upb.c
  34. 1
      third_party/upb/tests/conformance_upb_failures.txt
  35. 1
      third_party/upb/tests/corpus/README
  36. 1
      third_party/upb/tests/corpus/temp.cc
  37. 15
      third_party/upb/tests/file_descriptor_parsenew_fuzzer.cc
  38. BIN
      third_party/upb/tests/google_message1.dat
  39. BIN
      third_party/upb/tests/google_message2.dat
  40. 149
      third_party/upb/tests/google_messages.proto
  41. 9
      third_party/upb/tests/json/enum_from_separate_file.proto
  42. 47
      third_party/upb/tests/json/test.proto
  43. BIN
      third_party/upb/tests/json/test.proto.pb
  44. 256
      third_party/upb/tests/json/test_json.cc
  45. 1203
      third_party/upb/tests/pb/test_decoder.cc
  46. 128
      third_party/upb/tests/pb/test_decoder.proto
  47. 48
      third_party/upb/tests/pb/test_encoder.cc
  48. 117
      third_party/upb/tests/pb/test_varint.c
  49. 68
      third_party/upb/tests/test.proto
  50. BIN
      third_party/upb/tests/test.proto.pb
  51. 957
      third_party/upb/tests/test_cpp.cc
  52. 12
      third_party/upb/tests/test_cpp.proto
  53. 679
      third_party/upb/tests/test_table.cc
  54. 230
      third_party/upb/tests/test_util.h
  55. 16
      third_party/upb/tests/testmain.cc
  56. 53
      third_party/upb/tests/upb_test.h
  57. 32
      third_party/upb/third_party/lunit/LICENSE
  58. 9
      third_party/upb/third_party/lunit/README.google
  59. 156
      third_party/upb/third_party/lunit/console.lua
  60. 725
      third_party/upb/third_party/lunit/lunit.lua
  61. 81
      third_party/upb/tools/amalgamate.py
  62. 279
      third_party/upb/tools/make_cmakelists.py
  63. 30
      third_party/upb/tools/staleness_test.py
  64. 158
      third_party/upb/tools/staleness_test_lib.py
  65. 5
      third_party/upb/upb/bindings/README
  66. 766
      third_party/upb/upb/bindings/lua/def.c
  67. 1060
      third_party/upb/upb/bindings/lua/msg.c
  68. 245
      third_party/upb/upb/bindings/lua/upb.c
  69. 127
      third_party/upb/upb/bindings/lua/upb.h
  70. 172
      third_party/upb/upb/bindings/lua/upb.lua
  71. 56
      third_party/upb/upb/bindings/lua/upb/pb.c
  72. 3
      third_party/upb/upb/bindings/lua/upb/pb.lua
  73. 69
      third_party/upb/upb/bindings/stdc++/string.h
  74. 604
      third_party/upb/upb/decode.c
  75. 21
      third_party/upb/upb/decode.h
  76. 1756
      third_party/upb/upb/def.c
  77. 909
      third_party/upb/upb/def.h
  78. 378
      third_party/upb/upb/encode.c
  79. 21
      third_party/upb/upb/encode.h
  80. 105
      third_party/upb/upb/generated_util.h
  81. 923
      third_party/upb/upb/handlers-inl.h
  82. 567
      third_party/upb/upb/handlers.c
  83. 732
      third_party/upb/upb/handlers.h
  84. 140
      third_party/upb/upb/json/parser.h
  85. 3017
      third_party/upb/upb/json/parser.rl
  86. 1406
      third_party/upb/upb/json/printer.c
  87. 72
      third_party/upb/upb/json/printer.h
  88. 399
      third_party/upb/upb/legacy_msg_reflection.c
  89. 191
      third_party/upb/upb/legacy_msg_reflection.h
  90. 111
      third_party/upb/upb/msg.c
  91. 69
      third_party/upb/upb/msg.h
  92. 248
      third_party/upb/upb/msgfactory.c
  93. 48
      third_party/upb/upb/msgfactory.h
  94. 919
      third_party/upb/upb/pb/compile_decoder.c
  95. 1050
      third_party/upb/upb/pb/decoder.c
  96. 240
      third_party/upb/upb/pb/decoder.h
  97. 288
      third_party/upb/upb/pb/decoder.int.h
  98. 570
      third_party/upb/upb/pb/encoder.c
  99. 83
      third_party/upb/upb/pb/encoder.h
  100. 36
      third_party/upb/upb/pb/make-gdb-script.rb
  101. Some files were not shown because too many files have changed in this diff Show More

@ -0,0 +1,10 @@
---
tasks:
ubuntu:
platform: ubuntu1604
test_targets:
- //...
macos:
platform: macos
test_targets:
- //...

@ -0,0 +1,4 @@
*.s??
obj/
lib/
bazel-*

@ -0,0 +1,3 @@
[submodule "third_party/protobuf"]
path = third_party/protobuf
url = https://github.com/google/protobuf.git

@ -0,0 +1,727 @@
load(
"//bazel:build_defs.bzl",
"generated_file_staleness_test",
"licenses", # copybara:strip_for_google3
"lua_binary",
"lua_cclibrary",
"lua_library",
"lua_test",
"make_shell_script",
"upb_amalgamation",
)
load(
"//bazel:upb_proto_library.bzl",
"upb_proto_library",
"upb_proto_reflection_library",
)
licenses(["notice"]) # BSD (Google-authored w/ possible external contributions)
exports_files([
"LICENSE",
"build_defs",
])
CPPOPTS = [
# copybara:strip_for_google3_begin
"-Werror",
"-Wno-long-long",
# copybara:strip_end
]
COPTS = CPPOPTS + [
# copybara:strip_for_google3_begin
"-pedantic",
"-Wstrict-prototypes",
# copybara:strip_end
]
config_setting(
name = "darwin",
values = {"cpu": "darwin"},
visibility = ["//visibility:public"],
)
config_setting(
name = "windows",
constraint_values = ["@bazel_tools//platforms:windows"],
)
config_setting(
name = "fuzz",
values = {"define": "fuzz=true"},
)
# Public C/C++ libraries #######################################################
cc_library(
name = "upb",
srcs = [
"upb/decode.c",
"upb/encode.c",
"upb/generated_util.h",
"upb/msg.c",
"upb/msg.h",
"upb/port.c",
"upb/port_def.inc",
"upb/port_undef.inc",
"upb/table.c",
"upb/table.int.h",
"upb/upb.c",
],
hdrs = [
"upb/decode.h",
"upb/encode.h",
"upb/upb.h",
],
copts = select({
":windows": [],
"//conditions:default": COPTS
}),
visibility = ["//visibility:public"],
)
# Common support routines used by generated code. This library has no
# implementation, but depends on :upb and exposes a few more hdrs.
#
# This is public only because we have no way of visibility-limiting it to
# upb_proto_library() only. This interface is not stable and by using it you
# give up any backward compatibility guarantees.
cc_library(
name = "generated_code_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me",
hdrs = [
"upb/generated_util.h",
"upb/msg.h",
],
copts = select({
":windows": [],
"//conditions:default": COPTS
}),
textual_hdrs = [
"upb/port_def.inc",
"upb/port_undef.inc",
],
visibility = ["//visibility:public"],
deps = [":upb"],
)
upb_proto_library(
name = "descriptor_upbproto",
visibility = ["//visibility:public"],
deps = ["@com_google_protobuf//:descriptor_proto"],
)
cc_library(
name = "reflection",
srcs = [
"upb/def.c",
"upb/msgfactory.c",
],
hdrs = [
"upb/def.h",
"upb/msgfactory.h",
],
copts = select({
":windows": [],
"//conditions:default": COPTS
}),
visibility = ["//visibility:public"],
deps = [
":descriptor_upbproto",
":table",
":upb",
],
)
# Internal C/C++ libraries #####################################################
cc_library(
name = "table",
hdrs = ["upb/table.int.h"],
deps = [":upb"],
)
# Legacy C/C++ Libraries (not recommended for new code) ########################
cc_library(
name = "legacy_msg_reflection",
srcs = [
"upb/legacy_msg_reflection.c",
],
hdrs = ["upb/legacy_msg_reflection.h"],
copts = select({
":windows": [],
"//conditions:default": COPTS
}),
deps = [
":table",
":upb",
],
)
cc_library(
name = "handlers",
srcs = [
"upb/handlers.c",
"upb/handlers-inl.h",
"upb/sink.c",
],
hdrs = [
"upb/handlers.h",
"upb/sink.h",
],
copts = select({
":windows": [],
"//conditions:default": COPTS
}),
deps = [
":reflection",
":table",
":upb",
],
)
cc_library(
name = "upb_pb",
srcs = [
"upb/pb/compile_decoder.c",
"upb/pb/decoder.c",
"upb/pb/decoder.int.h",
"upb/pb/encoder.c",
"upb/pb/textprinter.c",
"upb/pb/varint.c",
"upb/pb/varint.int.h",
],
hdrs = [
"upb/pb/decoder.h",
"upb/pb/encoder.h",
"upb/pb/textprinter.h",
],
copts = select({
":windows": [],
"//conditions:default": COPTS
}),
deps = [
":descriptor_upbproto",
":handlers",
":reflection",
":table",
":upb",
],
)
# copybara:strip_for_google3_begin
cc_library(
name = "upb_json",
srcs = [
"upb/json/parser.c",
"upb/json/printer.c",
],
hdrs = [
"upb/json/parser.h",
"upb/json/printer.h",
],
copts = select({
":windows": [],
"//conditions:default": COPTS
}),
deps = [
":upb",
":upb_pb",
],
)
# copybara:strip_end
cc_library(
name = "upb_cc_bindings",
hdrs = [
"upb/bindings/stdc++/string.h",
],
deps = [
":descriptor_upbproto",
":handlers",
":upb",
],
)
# upb compiler #################################################################
cc_library(
name = "upbc_generator",
srcs = [
"upbc/generator.cc",
"upbc/message_layout.cc",
"upbc/message_layout.h",
],
hdrs = ["upbc/generator.h"],
copts = select({
":windows": [],
"//conditions:default": CPPOPTS
}),
deps = [
"@absl//absl/base:core_headers",
"@absl//absl/container:flat_hash_map",
"@absl//absl/strings",
"@com_google_protobuf//:protobuf",
"@com_google_protobuf//:protoc_lib",
],
)
cc_binary(
name = "protoc-gen-upb",
srcs = ["upbc/main.cc"],
copts = select({
":windows": [],
"//conditions:default": CPPOPTS
}),
visibility = ["//visibility:public"],
deps = [
":upbc_generator",
"@com_google_protobuf//:protoc_lib",
],
)
# We strip the tests and remaining rules from google3 until the upb_proto_library()
# and upb_proto_reflection_library() rules are fixed.
# C/C++ tests ##################################################################
cc_binary(
name = "benchmark",
testonly = 1,
srcs = ["tests/benchmark.cc"],
deps = [
":descriptor_upbproto",
":descriptor_upbreflection",
"@com_github_google_benchmark//:benchmark_main",
],
)
cc_library(
name = "upb_test",
testonly = 1,
srcs = [
"tests/testmain.cc",
],
hdrs = [
"tests/test_util.h",
"tests/upb_test.h",
],
copts = select({
":windows": [],
"//conditions:default": CPPOPTS
}),
deps = [
":handlers",
":upb",
],
)
cc_test(
name = "test_varint",
srcs = [
"tests/pb/test_varint.c",
"upb/pb/varint.int.h",
],
copts = select({
":windows": [],
"//conditions:default": COPTS
}),
deps = [
":upb",
":upb_pb",
":upb_test",
],
)
proto_library(
name = "test_decoder_proto",
srcs = [
"tests/pb/test_decoder.proto",
],
)
upb_proto_reflection_library(
name = "test_decoder_upbproto",
deps = [":test_decoder_proto"],
)
cc_test(
name = "test_decoder",
srcs = [
"tests/pb/test_decoder.cc",
"upb/pb/varint.int.h",
],
copts = select({
":windows": [],
"//conditions:default": CPPOPTS
}),
deps = [
":handlers",
":test_decoder_upbproto",
":upb",
":upb_pb",
":upb_test",
],
)
proto_library(
name = "test_cpp_proto",
srcs = [
"tests/test_cpp.proto",
],
)
upb_proto_reflection_library(
name = "test_cpp_upbproto",
deps = ["test_cpp_proto"],
)
cc_test(
name = "test_cpp",
srcs = ["tests/test_cpp.cc"],
copts = select({
":windows": [],
"//conditions:default": CPPOPTS
}),
deps = [
":handlers",
":reflection",
":test_cpp_upbproto",
":upb",
":upb_pb",
":upb_test",
],
)
cc_test(
name = "test_table",
srcs = ["tests/test_table.cc"],
copts = select({
":windows": [],
"//conditions:default": CPPOPTS
}),
deps = [
":table",
":upb",
":upb_test",
],
)
# OSS-Fuzz test
cc_binary(
name = "file_descriptor_parsenew_fuzzer",
testonly = 1,
srcs = ["tests/file_descriptor_parsenew_fuzzer.cc"],
copts = select({
":windows": [],
"//conditions:default": CPPOPTS
}) + select({
"//conditions:default": [],
":fuzz": ["-fsanitize=fuzzer,address"],
}),
defines = select({
"//conditions:default": [],
":fuzz": ["HAVE_FUZZER"],
}),
deps = [
":descriptor_upbproto",
":upb",
],
)
# copybara:strip_for_google3_begin
upb_proto_reflection_library(
name = "descriptor_upbreflection",
deps = ["@com_google_protobuf//:descriptor_proto"],
)
cc_test(
name = "test_encoder",
srcs = ["tests/pb/test_encoder.cc"],
copts = select({
":windows": [],
"//conditions:default": CPPOPTS
}),
deps = [
":descriptor_upbproto",
":descriptor_upbreflection",
":upb",
":upb_cc_bindings",
":upb_pb",
":upb_test",
],
)
proto_library(
name = "test_json_enum_from_separate",
srcs = ["tests/json/enum_from_separate_file.proto"],
deps = [":test_json_proto"],
)
proto_library(
name = "test_json_proto",
srcs = ["tests/json/test.proto"],
)
upb_proto_reflection_library(
name = "test_json_upbprotoreflection",
deps = ["test_json_proto"],
)
upb_proto_library(
name = "test_json_enum_from_separate_upbproto",
deps = [":test_json_enum_from_separate"],
)
upb_proto_library(
name = "test_json_upbproto",
deps = [":test_json_proto"],
)
cc_test(
name = "test_json",
srcs = [
"tests/json/test_json.cc",
],
copts = select({
":windows": [],
"//conditions:default": CPPOPTS
}),
deps = [
":test_json_upbproto",
":test_json_upbprotoreflection",
":upb_json",
":upb_test",
],
)
# copybara:strip_end
upb_proto_library(
name = "conformance_proto_upb",
testonly = 1,
deps = ["@com_google_protobuf//:conformance_proto"],
)
upb_proto_library(
name = "test_messages_proto3_proto_upb",
testonly = 1,
deps = ["@com_google_protobuf//:test_messages_proto3_proto"],
)
cc_binary(
name = "conformance_upb",
testonly = 1,
srcs = [
"tests/conformance_upb.c",
],
copts = select({
":windows": [],
"//conditions:default": COPTS
}) + ["-Ibazel-out/k8-fastbuild/bin"],
deps = [
":conformance_proto_upb",
":test_messages_proto3_proto_upb",
":upb",
],
)
make_shell_script(
name = "gen_test_conformance_upb",
out = "test_conformance_upb.sh",
contents = "external/com_google_protobuf/conformance_test_runner ./conformance_upb",
)
sh_test(
name = "test_conformance_upb",
srcs = ["test_conformance_upb.sh"],
data = [
"tests/conformance_upb_failures.txt",
":conformance_upb",
"@com_google_protobuf//:conformance_test_runner",
],
)
# copybara:strip_for_google3_begin
# Amalgamation #################################################################
py_binary(
name = "amalgamate",
srcs = ["tools/amalgamate.py"],
)
upb_amalgamation(
name = "gen_amalgamation",
outs = [
"upb.c",
"upb.h",
],
amalgamator = ":amalgamate",
libs = [
":upb",
":descriptor_upbproto",
":reflection",
":handlers",
":upb_pb",
":upb_json",
],
)
cc_library(
name = "amalgamation",
srcs = ["upb.c"],
hdrs = ["upb.h"],
copts = select({
":windows": [],
"//conditions:default": COPTS
}),
)
# Lua libraries. ###############################################################
lua_cclibrary(
name = "lua/upb_c",
srcs = [
"upb/bindings/lua/def.c",
"upb/bindings/lua/msg.c",
"upb/bindings/lua/upb.c",
],
hdrs = [
"upb/bindings/lua/upb.h",
],
deps = [
"legacy_msg_reflection",
"upb",
"upb_pb",
],
)
lua_library(
name = "lua/upb",
srcs = ["upb/bindings/lua/upb.lua"],
luadeps = ["lua/upb_c"],
strip_prefix = "upb/bindings/lua",
)
lua_cclibrary(
name = "lua/upb/pb_c",
srcs = ["upb/bindings/lua/upb/pb.c"],
luadeps = ["lua/upb_c"],
deps = ["upb_pb"],
)
lua_library(
name = "lua/upb/pb",
srcs = ["upb/bindings/lua/upb/pb.lua"],
luadeps = [
"lua/upb",
"lua/upb/pb_c",
],
strip_prefix = "upb/bindings/lua",
)
# Lua tests. ###################################################################
lua_test(
name = "lua/test_upb",
luadeps = ["lua/upb"],
luamain = "tests/bindings/lua/test_upb.lua",
)
lua_test(
name = "lua/test_upb_pb",
luadeps = ["lua/upb/pb"],
luamain = "tests/bindings/lua/test_upb.pb.lua",
)
# Test the CMake build #########################################################
filegroup(
name = "cmake_files",
srcs = glob([
"CMakeLists.txt",
"generated_for_cmake/**/*",
"google/**/*",
"upbc/**/*",
"upb/**/*",
"tests/**/*",
]),
)
make_shell_script(
name = "gen_run_cmake_build",
out = "run_cmake_build.sh",
contents = "find . && mkdir build && cd build && cmake .. && make -j8 && make test",
)
sh_test(
name = "cmake_build",
srcs = ["run_cmake_build.sh"],
data = [":cmake_files"],
)
# Generated files ##############################################################
exports_files(["tools/staleness_test.py"])
py_library(
name = "staleness_test_lib",
testonly = 1,
srcs = ["tools/staleness_test_lib.py"],
)
py_binary(
name = "make_cmakelists",
srcs = ["tools/make_cmakelists.py"],
)
genrule(
name = "gen_cmakelists",
srcs = [
"BUILD",
"WORKSPACE",
":cmake_files",
],
outs = ["generated-in/CMakeLists.txt"],
cmd = "$(location :make_cmakelists) $@",
tools = [":make_cmakelists"],
)
genrule(
name = "generate_json_ragel",
srcs = ["upb/json/parser.rl"],
outs = ["upb/json/parser.c"],
cmd = "$(location @ragel//:ragelc) -C -o upb/json/parser.c $< && mv upb/json/parser.c $@",
tools = ["@ragel//:ragelc"],
)
genrule(
name = "copy_json_ragel",
srcs = ["upb/json/parser.c"],
outs = ["generated-in/generated_for_cmake/upb/json/parser.c"],
cmd = "cp $< $@",
)
genrule(
name = "copy_protos",
srcs = [":descriptor_upbproto"],
outs = [
"generated-in/generated_for_cmake/google/protobuf/descriptor.upb.c",
"generated-in/generated_for_cmake/google/protobuf/descriptor.upb.h",
],
cmd = "cp $(SRCS) $(@D)/generated-in/generated_for_cmake/google/protobuf",
)
generated_file_staleness_test(
name = "test_generated_files",
outs = [
"CMakeLists.txt",
"generated_for_cmake/google/protobuf/descriptor.upb.c",
"generated_for_cmake/google/protobuf/descriptor.upb.h",
"generated_for_cmake/upb/json/parser.c",
],
generated_pattern = "generated-in/%s",
)
# copybara:strip_end

@ -0,0 +1,147 @@
# This file was generated from BUILD using tools/make_cmakelists.py.
cmake_minimum_required(VERSION 3.1)
if(${CMAKE_VERSION} VERSION_LESS 3.12)
cmake_policy(VERSION ${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION})
else()
cmake_policy(VERSION 3.12)
endif()
cmake_minimum_required (VERSION 3.0)
cmake_policy(SET CMP0048 NEW)
project(upb)
# Prevent CMake from setting -rdynamic on Linux (!!).
SET(CMAKE_SHARED_LIBRARY_LINK_C_FLAGS "")
SET(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "")
# Set default build type.
if(NOT CMAKE_BUILD_TYPE)
message(STATUS "Setting build type to 'RelWithDebInfo' as none was specified.")
set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING
"Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel."
FORCE)
endif()
# When using Ninja, compiler output won't be colorized without this.
include(CheckCXXCompilerFlag)
CHECK_CXX_COMPILER_FLAG(-fdiagnostics-color=always SUPPORTS_COLOR_ALWAYS)
if(SUPPORTS_COLOR_ALWAYS)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color=always")
endif()
# Implement ASAN/UBSAN options
if(UPB_ENABLE_ASAN)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fsanitize=address")
endif()
if(UPB_ENABLE_UBSAN)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fsanitize=address")
endif()
include_directories(.)
include_directories(generated_for_cmake)
include_directories(${CMAKE_CURRENT_BINARY_DIR})
if(APPLE)
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -undefined dynamic_lookup -flat_namespace")
elseif(UNIX)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--build-id")
endif()
enable_testing()
add_library(upb
upb/decode.c
upb/encode.c
upb/generated_util.h
upb/msg.c
upb/msg.h
upb/port.c
upb/port_def.inc
upb/port_undef.inc
upb/table.c
upb/table.int.h
upb/upb.c
upb/decode.h
upb/encode.h
upb/upb.h)
add_library(generated_code_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me INTERFACE)
target_link_libraries(generated_code_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me INTERFACE
upb)
add_library(reflection
upb/def.c
upb/msgfactory.c
upb/def.h
upb/msgfactory.h)
target_link_libraries(reflection
descriptor_upbproto
table
upb)
add_library(table INTERFACE)
target_link_libraries(table INTERFACE
upb)
add_library(legacy_msg_reflection
upb/legacy_msg_reflection.c
upb/legacy_msg_reflection.h)
target_link_libraries(legacy_msg_reflection
table
upb)
add_library(handlers
upb/handlers.c
upb/handlers-inl.h
upb/sink.c
upb/handlers.h
upb/sink.h)
target_link_libraries(handlers
reflection
table
upb)
add_library(upb_pb
upb/pb/compile_decoder.c
upb/pb/decoder.c
upb/pb/decoder.int.h
upb/pb/encoder.c
upb/pb/textprinter.c
upb/pb/varint.c
upb/pb/varint.int.h
upb/pb/decoder.h
upb/pb/encoder.h
upb/pb/textprinter.h)
target_link_libraries(upb_pb
descriptor_upbproto
handlers
reflection
table
upb)
add_library(upb_json
generated_for_cmake/upb/json/parser.c
upb/json/printer.c
upb/json/parser.h
upb/json/printer.h)
target_link_libraries(upb_json
upb
upb_pb)
add_library(upb_cc_bindings INTERFACE)
target_link_libraries(upb_cc_bindings INTERFACE
descriptor_upbproto
handlers
upb)
add_library(upb_test
tests/testmain.cc
tests/test_util.h
tests/upb_test.h)
target_link_libraries(upb_test
handlers
upb)

@ -0,0 +1,7 @@
## <a name="cla"></a> Signing the CLA
Please sign the [Google Contributor License Agreement
(CLA)](https://cla.developers.google.com/)
before sending pull requests. For any code changes to be
accepted, the CLA must be signed. It's a quick process, I
promise!

@ -0,0 +1,72 @@
μpb Design
----------
μpb has the following design goals:
- C89 compatible.
- small code size (both for the core library and generated messages).
- fast performance (hundreds of MB/s).
- idiomatic for C programs.
- easy to wrap in high-level languages (Python, Ruby, Lua, etc) with
good performance and all standard protobuf features.
- hands-off about memory management, allowing for easy integration
with existing VMs and/or garbage collectors.
- offers binary ABI compatibility between apps, generated messages, and
the core library (doesn't require re-generating messages or recompiling
your application when the core library changes).
- provides all features that users expect from a protobuf library
(generated messages in C, reflection, text format, etc.).
- layered, so the core is small and doesn't require descriptors.
- tidy about symbol references, so that any messages or features that
aren't used by a C program can have their code GC'd by the linker.
- possible to use protobuf binary format without leaking message/field
names into the binary.
μpb accomplishes these goals by keeping a very small core that does not contain
descriptors. We need some way of knowing what fields are in each message and
where they live, but instead of descriptors, we keep a small/lightweight summary
of the .proto file. We call this a `upb_msglayout`. It contains the bare
minimum of what we need to know to parse and serialize protobuf binary format
into our internal representation for messages, `upb_msg`.
The core then contains functions to parse/serialize a message, given a `upb_msg*`
and a `const upb_msglayout*`.
This approach is similar to [nanopb](https://github.com/nanopb/nanopb) which
also compiles message definitions to a compact, internal representation without
names. However nanopb does not aim to be a fully-featured library, and has no
support for text format, JSON, or descriptors. μpb is unique in that it has a
small core similar to nanopb (though not quite as small), but also offers a
full-featured protobuf library for applications that want reflection, text
format, JSON format, etc.
Without descriptors, the core doesn't have access to field names, so it cannot
parse/serialize to protobuf text format or JSON. Instead this functionality
lives in separate modules that depend on the module implementing descriptors.
With the descriptor module we can parse/serialize binary descriptors and
validate that they follow all the rules of protobuf schemas.
To provide binary compatibility, we version the structs that generated messages
use to create a `upb_msglayout*`. The current initializers are
`upb_msglayout_msginit_v1`, `upb_msglayout_fieldinit_v1`, etc. Then
`upb_msglayout*` uses these as its internal representation. If upb changes its
internal representation for a `upb_msglayout*`, it will also include code to
convert the old representation to the new representation. This will use some
more memory/CPU at runtime to convert between the two, but apps that statically
link μpb will never need to worry about this.
TODO
----
1. revise our generated code until it is in a state where we feel comfortable
committing to API/ABI stability for it. In particular there is an open
question of whether non-ABI-compatible field accesses should have a
fastpath different from the ABI-compatible field access.
1. Add missing features (maps, extensions, unknown fields).
1. Flesh out C++ wrappers.
1. *(lower-priority)*: revise all of the existing encoders/decoders and
handlers. We probably will want to keep handlers, since they let us decouple
encoders/decoders from `upb_msg`, but we need to simplify all of that a LOT.
Likely we will want to make handlers only per-message instead of per-field,
except for variable-length fields.

@ -0,0 +1,26 @@
Copyright (c) 2009-2011, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Google Inc. nor the names of any other
contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
EVENT SHALL GOOGLE INC. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.

@ -0,0 +1,134 @@
# μpb - a small protobuf implementation in C
|Platform|Build Status|
|--------|------------|
|macOS|[![Build Status](https://storage.googleapis.com/upb-kokoro-results/status-badge/macos.png)](https://fusion.corp.google.com/projectanalysis/summary/KOKORO/prod%3Aupb%2Fmacos%2Fcontinuous)|
|ubuntu|[![Build Status](https://storage.googleapis.com/upb-kokoro-results/status-badge/ubuntu.png)](https://fusion.corp.google.com/projectanalysis/summary/KOKORO/prod%3Aupb%2Fubuntu%2Fcontinuous)|
μpb (often written 'upb') is a small protobuf implementation written in C.
upb generates a C API for creating, parsing, and serializing messages
as declared in `.proto` files. upb is heavily arena-based: all
messages always live in an arena (note: the arena can live in stack or
static memory if desired). Here is a simple example:
```c
#include "conformance/conformance.upb.h"
void foo(const char* data, size_t size) {
upb_arena *arena;
/* Generated message type. */
conformance_ConformanceRequest *request;
conformance_ConformanceResponse *response;
arena = upb_arena_new();
request = conformance_ConformanceRequest_parse(data, size, arena);
response = conformance_ConformanceResponse_new(arena);
switch (conformance_ConformanceRequest_payload_case(request)) {
case conformance_ConformanceRequest_payload_protobuf_payload: {
upb_strview payload = conformance_ConformanceRequest_protobuf_payload(request);
// ...
break;
}
case conformance_ConformanceRequest_payload_NOT_SET:
fprintf(stderr, "conformance_upb: Request didn't have payload.\n");
break;
default: {
static const char msg[] = "Unsupported input format.";
conformance_ConformanceResponse_set_skipped(
response, upb_strview_make(msg, sizeof(msg)));
break;
}
}
/* Frees all messages on the arena. */
upb_arena_free(arena);
}
```
API and ABI are both subject to change! Please do not distribute
as a shared library for this reason (for now at least).
## Using upb in your project
Currently only Bazel is supported (CMake support is partial and incomplete
but full CMake support is an eventual goal).
To use upb in your Bazel project, first add upb to your `WORKSPACE` file,
either as a `git_repository()` or as a `new_local_repository()` with a
Git Submodule. (For an example, see `examples/bazel/ in this repo).
```python
# Add this to your WORKSPACE file.
load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository")
git_repository(
name = "upb",
remote = "https://github.com/protocolbuffers/upb.git",
commit = "d16bf99ac4658793748cda3251226059892b3b7b",
)
load("@upb//bazel:workspace_deps.bzl", "upb_deps")
upb_deps()
```
Then in your BUILD file you can add `upb_proto_library()` rules that
generate code for a corresponding `proto_library()` rule. For
example:
```python
# Add this to your BUILD file.
load("@upb//bazel:upb_proto_library.bzl", "upb_proto_library")
proto_library(
name = "foo_proto",
srcs = ["foo.proto"],
)
upb_proto_library(
name = "foo_upbproto",
deps = [":foo_proto"],
)
cc_binary(
name = "test_binary",
srcs = ["test_binary.c"],
deps = [":foo_upbproto"],
)
```
Then in your `.c` file you can #include the generated header:
```c
#include "foo.upb.h"
/* Insert code that uses generated types. */
```
## Old "handlers" interfaces
This library contains several semi-deprecated interfaces (see BUILD
file for more info about which interfaces are deprecated). These
deprecated interfaces are still used in some significant projects,
such as the Ruby and PHP C bindings for protobuf in the [main protobuf
repo](https://github.com/protocolbuffers/protobuf). The goal is to
migrate the Ruby/PHP bindings to use the newer, simpler interfaces
instead. Please do not use the old interfaces in new code.
## Lua bindings
This repo has some Lua bindings for the core library. These are
experimental and very incomplete. These are currently included in
order to validate that the C API is suitable for wrapping. As the
project matures these Lua bindings may become publicly available.
## Contact
Author: Josh Haberman ([jhaberman@gmail.com](mailto:jhaberman@gmail.com),
[haberman@google.com](mailto:haberman@google.com))

@ -0,0 +1,39 @@
workspace(name = "upb")
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
load("//bazel:workspace_deps.bzl", "upb_deps")
upb_deps()
http_archive(
name = "lua",
build_file = "//bazel:lua.BUILD",
sha256 = "b9e2e4aad6789b3b63a056d442f7b39f0ecfca3ae0f1fc0ae4e9614401b69f4b",
strip_prefix = "lua-5.2.4",
urls = [
"https://mirror.bazel.build/www.lua.org/ftp/lua-5.2.4.tar.gz",
"https://www.lua.org/ftp/lua-5.2.4.tar.gz",
],
)
http_archive(
name = "ragel",
build_file = "//bazel:ragel.BUILD",
sha256 = "5f156edb65d20b856d638dd9ee2dfb43285914d9aa2b6ec779dac0270cd56c3f",
strip_prefix = "ragel-6.10",
urls = ["http://www.colm.net/files/ragel/ragel-6.10.tar.gz"],
)
http_archive(
name = "com_google_googletest",
urls = ["https://github.com/google/googletest/archive/b6cd405286ed8635ece71c72f118e659f4ade3fb.zip"], # 2019-01-07
strip_prefix = "googletest-b6cd405286ed8635ece71c72f118e659f4ade3fb",
sha256 = "ff7a82736e158c077e76188232eac77913a15dac0b22508c390ab3f88e6d6d86",
)
http_archive(
name = "com_github_google_benchmark",
urls = ["https://github.com/google/benchmark/archive/16703ff83c1ae6d53e5155df3bb3ab0bc96083be.zip"],
strip_prefix = "benchmark-16703ff83c1ae6d53e5155df3bb3ab0bc96083be",
sha256 = "59f918c8ccd4d74b6ac43484467b500f1d64b40cc1010daa055375b322a43ba3",
)

@ -0,0 +1,221 @@
"""Internal rules for building upb."""
load(":upb_proto_library.bzl", "GeneratedSrcsInfo")
def _librule(name):
return name + "_lib"
def _get_real_short_path(file):
# For some reason, files from other archives have short paths that look like:
# ../com_google_protobuf/google/protobuf/descriptor.proto
short_path = file.short_path
if short_path.startswith("../"):
second_slash = short_path.index("/", 3)
short_path = short_path[second_slash + 1:]
return short_path
def _get_real_root(file):
real_short_path = _get_real_short_path(file)
return file.path[:-len(real_short_path) - 1]
def _get_real_roots(files):
roots = {}
for file in files:
real_root = _get_real_root(file)
if real_root:
roots[real_root] = True
return roots.keys()
def lua_cclibrary(name, srcs, hdrs = [], deps = [], luadeps = []):
lib_rule = name + "_lib"
so_rule = "lib" + name + ".so"
so_file = _remove_prefix(name, "lua/") + ".so"
native.cc_library(
name = _librule(name),
hdrs = hdrs,
srcs = srcs,
deps = deps + [_librule(dep) for dep in luadeps] + ["@lua//:liblua_headers"],
)
native.cc_binary(
name = so_rule,
linkshared = True,
deps = [_librule(name)],
linkopts = select({
":darwin": [
"-undefined dynamic_lookup",
],
"//conditions:default": [],
}),
)
native.genrule(
name = name + "_copy",
srcs = [":" + so_rule],
outs = [so_file],
cmd = "cp $< $@",
)
native.filegroup(
name = name,
data = [so_file],
)
def _remove_prefix(str, prefix):
if not str.startswith(prefix):
fail("%s doesn't start with %s" % (str, prefix))
return str[len(prefix):]
def _remove_suffix(str, suffix):
if not str.endswith(suffix):
fail("%s doesn't end with %s" % (str, suffix))
return str[:-len(suffix)]
def lua_library(name, srcs, strip_prefix, luadeps = []):
outs = [_remove_prefix(src, strip_prefix + "/") for src in srcs]
native.genrule(
name = name + "_copy",
srcs = srcs,
outs = outs,
cmd = "cp $(SRCS) $(@D)",
)
native.filegroup(
name = name,
data = outs + luadeps,
)
def make_shell_script(name, contents, out):
contents = contents.replace("$", "$$")
native.genrule(
name = "gen_" + name,
outs = [out],
cmd = "(cat <<'HEREDOC'\n%s\nHEREDOC\n) > $@" % contents,
)
def _lua_binary_or_test(name, luamain, luadeps, rule):
script = name + ".sh"
make_shell_script(
name = "gen_" + name,
out = script,
contents = """
BASE=$(dirname $(rlocation upb/upb_c.so))
export LUA_CPATH="$BASE/?.so"
export LUA_PATH="$BASE/?.lua"
$(rlocation lua/lua) $(rlocation upb/tools/upbc.lua) "$@"
""",
)
rule(
name = name,
srcs = [script],
data = ["@lua//:lua", luamain] + luadeps,
)
def lua_binary(name, luamain, luadeps = []):
_lua_binary_or_test(name, luamain, luadeps, native.sh_binary)
def lua_test(name, luamain, luadeps = []):
_lua_binary_or_test(name, luamain, luadeps, native.sh_test)
def generated_file_staleness_test(name, outs, generated_pattern):
"""Tests that checked-in file(s) match the contents of generated file(s).
The resulting test will verify that all output files exist and have the
correct contents. If the test fails, it can be invoked with --fix to
bring the checked-in files up to date.
Args:
name: Name of the rule.
outs: the checked-in files that are copied from generated files.
generated_pattern: the pattern for transforming each "out" file into a
generated file. For example, if generated_pattern="generated/%s" then
a file foo.txt will look for generated file generated/foo.txt.
"""
script_name = name + ".py"
script_src = "//:tools/staleness_test.py"
# Filter out non-existing rules so Blaze doesn't error out before we even
# run the test.
existing_outs = native.glob(include = outs)
# The file list contains a few extra bits of information at the end.
# These get unpacked by the Config class in staleness_test_lib.py.
file_list = outs + [generated_pattern, native.package_name() or ".", name]
native.genrule(
name = name + "_makescript",
outs = [script_name],
srcs = [script_src],
testonly = 1,
cmd = "cat $(location " + script_src + ") > $@; " +
"sed -i.bak -e 's|INSERT_FILE_LIST_HERE|" + "\\\n ".join(file_list) + "|' $@",
)
native.py_test(
name = name,
srcs = [script_name],
data = existing_outs + [generated_pattern % file for file in outs],
deps = [
"//:staleness_test_lib",
],
)
# upb_amalgamation() rule, with file_list aspect.
SrcList = provider(
fields = {
"srcs": "list of srcs",
},
)
def _file_list_aspect_impl(target, ctx):
if GeneratedSrcsInfo in target:
srcs = target[GeneratedSrcsInfo]
return [SrcList(srcs = srcs.srcs + srcs.hdrs)]
srcs = []
for src in ctx.rule.attr.srcs:
srcs += src.files.to_list()
for hdr in ctx.rule.attr.hdrs:
srcs += hdr.files.to_list()
for hdr in ctx.rule.attr.textual_hdrs:
srcs += hdr.files.to_list()
return [SrcList(srcs = srcs)]
_file_list_aspect = aspect(
implementation = _file_list_aspect_impl,
)
def _upb_amalgamation(ctx):
inputs = []
for lib in ctx.attr.libs:
inputs += lib[SrcList].srcs
srcs = [src for src in inputs if src.path.endswith("c")]
ctx.actions.run(
inputs = inputs,
outputs = ctx.outputs.outs,
arguments = [ctx.bin_dir.path + "/"] + [f.path for f in srcs] + ["-I" + root for root in _get_real_roots(inputs)],
progress_message = "Making amalgamation",
executable = ctx.executable.amalgamator,
)
return []
upb_amalgamation = rule(
attrs = {
"amalgamator": attr.label(
executable = True,
cfg = "host",
),
"libs": attr.label_list(aspects = [_file_list_aspect]),
"outs": attr.output_list(),
},
implementation = _upb_amalgamation,
)
def licenses(*args):
# No-op (for Google-internal usage).
pass

@ -0,0 +1,102 @@
package(
default_visibility = ["//visibility:public"],
)
cc_library(
name = "liblua_headers",
defines = ["LUA_USE_LINUX"],
hdrs = [
"src/lauxlib.h",
"src/lua.h",
"src/lua.hpp",
"src/luaconf.h",
"src/lualib.h",
],
includes = ["src"],
)
cc_library(
name = "liblua",
srcs = [
"src/lapi.c",
"src/lapi.h",
"src/lauxlib.c",
"src/lauxlib.h",
"src/lbaselib.c",
"src/lbitlib.c",
"src/lcode.c",
"src/lcode.h",
"src/lcorolib.c",
"src/lctype.c",
"src/lctype.h",
"src/ldblib.c",
"src/ldebug.c",
"src/ldebug.h",
"src/ldo.c",
"src/ldo.h",
"src/ldump.c",
"src/lfunc.c",
"src/lfunc.h",
"src/lgc.c",
"src/lgc.h",
"src/linit.c",
"src/liolib.c",
"src/llex.c",
"src/llex.h",
"src/llimits.h",
"src/lmathlib.c",
"src/lmem.c",
"src/lmem.h",
"src/loadlib.c",
"src/lobject.c",
"src/lobject.h",
"src/lopcodes.c",
"src/lopcodes.h",
"src/loslib.c",
"src/lparser.c",
"src/lparser.h",
"src/lstate.c",
"src/lstate.h",
"src/lstring.c",
"src/lstring.h",
"src/lstrlib.c",
"src/ltable.c",
"src/ltable.h",
"src/ltablib.c",
"src/ltm.c",
"src/ltm.h",
"src/lundump.c",
"src/lundump.h",
"src/lvm.c",
"src/lvm.h",
"src/lzio.c",
"src/lzio.h",
],
defines = ["LUA_USE_LINUX"],
hdrs = [
"src/lauxlib.h",
"src/lua.h",
"src/lua.hpp",
"src/luaconf.h",
"src/lualib.h",
],
includes = ["src"],
linkopts = [
"-lm",
"-ldl",
],
)
cc_binary(
name = "lua",
srcs = [
"src/lua.c",
],
deps = [
":liblua",
],
linkopts = [
"-lreadline",
"-rdynamic",
],
)

@ -0,0 +1,193 @@
package(
default_visibility = ["//visibility:public"],
)
cc_binary(
name = "ragelc",
srcs = [
"ragel/rubycodegen.cpp",
"ragel/goipgoto.h",
"ragel/cdtable.h",
"ragel/rubycodegen.h",
"ragel/gotable.h",
"ragel/gocodegen.cpp",
"ragel/rubyfflat.cpp",
"ragel/common.cpp",
"ragel/gofflat.cpp",
"ragel/cdtable.cpp",
"ragel/cdsplit.cpp",
"ragel/rlparse.cpp",
"ragel/csfgoto.cpp",
"ragel/javacodegen.cpp",
"ragel/gocodegen.h",
"ragel/mlgoto.cpp",
"ragel/fsmgraph.cpp",
"ragel/version.h",
"ragel/mlfflat.h",
"ragel/fsmgraph.h",
"ragel/fsmbase.cpp",
"ragel/fsmstate.cpp",
"ragel/gotablish.cpp",
"ragel/rubyflat.cpp",
"ragel/cdfgoto.h",
"ragel/cscodegen.h",
"ragel/mlflat.cpp",
"ragel/rubyflat.h",
"ragel/goftable.h",
"ragel/rbxgoto.cpp",
"ragel/csfflat.cpp",
"ragel/gofgoto.cpp",
"ragel/gofgoto.h",
"ragel/ragel.h",
"ragel/goftable.cpp",
"ragel/cdcodegen.cpp",
"ragel/rlparse.h",
"ragel/cdsplit.h",
"ragel/xmlcodegen.cpp",
"ragel/goipgoto.cpp",
"ragel/dotcodegen.h",
"ragel/gogoto.cpp",
"ragel/csflat.h",
"ragel/csfflat.h",
#"ragel/config.h.in",
"ragel/csipgoto.cpp",
"ragel/mltable.cpp",
"ragel/mlflat.h",
"ragel/csftable.cpp",
"ragel/cdgoto.h",
"ragel/goflat.cpp",
"ragel/rubyfflat.h",
"ragel/mlftable.h",
"ragel/rubyftable.h",
"ragel/fsmap.cpp",
"ragel/redfsm.cpp",
"ragel/goflat.h",
"ragel/parsetree.cpp",
"ragel/fsmmin.cpp",
"ragel/dotcodegen.cpp",
"ragel/redfsm.h",
"ragel/mlcodegen.cpp",
"ragel/cdfgoto.cpp",
"ragel/cssplit.cpp",
"ragel/cstable.cpp",
"ragel/javacodegen.h",
"ragel/parsedata.cpp",
"ragel/buffer.h",
"ragel/gogoto.h",
"ragel/csgoto.h",
"ragel/pcheck.h",
"ragel/rubyftable.cpp",
"ragel/csfgoto.h",
"ragel/common.h",
"ragel/cdftable.h",
"ragel/mlgoto.h",
"ragel/csgoto.cpp",
"ragel/cdflat.h",
"ragel/cdipgoto.h",
"ragel/cstable.h",
"ragel/gendata.h",
"ragel/cdfflat.cpp",
"ragel/gotable.cpp",
"ragel/cdcodegen.h",
"ragel/gendata.cpp",
"ragel/rubytable.h",
"ragel/csflat.cpp",
"ragel/inputdata.h",
"ragel/inputdata.cpp",
"ragel/rubytable.cpp",
"ragel/fsmattach.cpp",
"ragel/csipgoto.h",
"ragel/cscodegen.cpp",
"ragel/cdfflat.h",
"ragel/rbxgoto.h",
"ragel/xmlcodegen.h",
"ragel/gofflat.h",
"ragel/parsedata.h",
"ragel/mlfgoto.h",
"ragel/cdflat.cpp",
"ragel/config.h",
"ragel/rlscan.cpp",
"ragel/mlcodegen.h",
"ragel/mlfflat.cpp",
"ragel/mlftable.cpp",
"ragel/mltable.h",
"ragel/cdipgoto.cpp",
"ragel/cdftable.cpp",
"ragel/parsetree.h",
"ragel/rlscan.h",
"ragel/main.cpp",
"ragel/cssplit.h",
"ragel/mlfgoto.cpp",
"ragel/csftable.h",
"ragel/gotablish.h",
"ragel/cdgoto.cpp",
"aapl/avlmelkey.h",
"aapl/dlistmel.h",
"aapl/avliset.h",
"aapl/avlkeyless.h",
"aapl/sbstset.h",
"aapl/sbsttable.h",
"aapl/quicksort.h",
"aapl/avlitree.h",
"aapl/avlcommon.h",
"aapl/bstset.h",
"aapl/avlmel.h",
"aapl/insertsort.h",
"aapl/dlist.h",
"aapl/avlmap.h",
"aapl/mergesort.h",
"aapl/resize.h",
"aapl/bstcommon.h",
"aapl/bstmap.h",
"aapl/compare.h",
"aapl/svector.h",
"aapl/avlset.h",
"aapl/bsttable.h",
"aapl/avlikeyless.h",
"aapl/bubblesort.h",
"aapl/table.h",
"aapl/avlbasic.h",
"aapl/vector.h",
"aapl/avlimap.h",
"aapl/dlistval.h",
"aapl/dlcommon.h",
"aapl/avlibasic.h",
"aapl/sbstmap.h",
"aapl/avlimel.h",
"aapl/avlimelkey.h",
"aapl/avltree.h",
],
includes = ["ragel", "aapl"],
)
config_h_contents = """
#define PACKAGE "ragel"
/* Define to the address where bug reports for this package should be sent. */
#define PACKAGE_BUGREPORT ""
/* Define to the full name of this package. */
#define PACKAGE_NAME "ragel"
/* Define to the full name and version of this package. */
#define PACKAGE_STRING "ragel 6.10"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "ragel"
/* Define to the home page for this package. */
#define PACKAGE_URL ""
/* Define to the version of this package. */
#define PACKAGE_VERSION "6.10"
/* Version number of package */
#define VERSION "6.10"
"""
genrule(
name = "gen_config_h",
outs = ["ragel/config.h"],
cmd = "(cat <<'HEREDOC'\n%s\nHEREDOC\n) > $@" % config_h_contents,
)

@ -0,0 +1,15 @@
# A hacky way to work around the fact that native.bazel_version is only
# available from WORKSPACE macros, not BUILD macros or rules.
#
# Hopefully we can remove this if/when this is fixed:
# https://github.com/bazelbuild/bazel/issues/8305
def _impl(repository_ctx):
s = "bazel_version = \"" + native.bazel_version + "\""
repository_ctx.file("bazel_version.bzl", s)
repository_ctx.file("BUILD", "")
bazel_version_repository = repository_rule(
implementation = _impl,
local = True,
)

@ -0,0 +1,299 @@
"""Public rules for using upb protos:
- upb_proto_library()
- upb_proto_reflection_library()
"""
load("@bazel_skylib//lib:paths.bzl", "paths")
load("@bazel_tools//tools/cpp:toolchain_utils.bzl", "find_cpp_toolchain")
# copybara:strip_for_google3_begin
load("@bazel_skylib//lib:versions.bzl", "versions")
load("@bazel_version//:bazel_version.bzl", "bazel_version")
# copybara:strip_end
# Generic support code #########################################################
_is_bazel = not hasattr(native, "genmpm")
def _get_real_short_path(file):
# For some reason, files from other archives have short paths that look like:
# ../com_google_protobuf/google/protobuf/descriptor.proto
short_path = file.short_path
if short_path.startswith("../"):
second_slash = short_path.index("/", 3)
short_path = short_path[second_slash + 1:]
return short_path
def _get_real_root(file):
real_short_path = _get_real_short_path(file)
return file.path[:-len(real_short_path) - 1]
def _get_real_roots(files):
roots = {}
for file in files:
real_root = _get_real_root(file)
if real_root:
roots[real_root] = True
return roots.keys()
def _generate_output_file(ctx, src, extension):
real_short_path = _get_real_short_path(src)
real_short_path = paths.relativize(real_short_path, ctx.label.package)
output_filename = paths.replace_extension(real_short_path, extension)
ret = ctx.actions.declare_file(output_filename)
return ret
def _filter_none(elems):
out = []
for elem in elems:
if elem:
out.append(elem)
return out
def _cc_library_func(ctx, name, hdrs, srcs, dep_ccinfos):
"""Like cc_library(), but callable from rules.
Args:
ctx: Rule context.
name: Unique name used to generate output files.
hdrs: Public headers that can be #included from other rules.
srcs: C/C++ source files.
dep_ccinfos: CcInfo providers of dependencies we should build/link against.
Returns:
CcInfo provider for this compilation.
"""
compilation_contexts = [info.compilation_context for info in dep_ccinfos]
linking_contexts = [info.linking_context for info in dep_ccinfos]
toolchain = find_cpp_toolchain(ctx)
feature_configuration = cc_common.configure_features(
ctx = ctx,
cc_toolchain = toolchain,
requested_features = ctx.features,
unsupported_features = ctx.disabled_features,
)
# copybara:strip_for_google3_begin
if bazel_version == "0.24.1":
# Compatibility code until gRPC is on 0.25.2 or later.
compilation_info = cc_common.compile(
ctx = ctx,
feature_configuration = feature_configuration,
cc_toolchain = toolchain,
srcs = srcs,
hdrs = hdrs,
compilation_contexts = compilation_contexts,
)
linking_info = cc_common.link(
ctx = ctx,
feature_configuration = feature_configuration,
cc_toolchain = toolchain,
cc_compilation_outputs = compilation_info.cc_compilation_outputs,
linking_contexts = linking_contexts,
)
return CcInfo(
compilation_context = compilation_info.compilation_context,
linking_context = linking_info.linking_context,
)
if not versions.is_at_least("0.25.2", bazel_version):
fail("upb requires Bazel >=0.25.2 or 0.24.1")
# copybara:strip_end
blaze_only_args = {}
if not _is_bazel:
blaze_only_args["grep_includes"] = ctx.file._grep_includes
(compilation_context, compilation_outputs) = cc_common.compile(
actions = ctx.actions,
feature_configuration = feature_configuration,
cc_toolchain = toolchain,
name = name,
srcs = srcs,
public_hdrs = hdrs,
compilation_contexts = compilation_contexts,
**blaze_only_args
)
(linking_context, linking_outputs) = cc_common.create_linking_context_from_compilation_outputs(
actions = ctx.actions,
name = name,
feature_configuration = feature_configuration,
cc_toolchain = toolchain,
compilation_outputs = compilation_outputs,
linking_contexts = linking_contexts,
**blaze_only_args
)
return CcInfo(
compilation_context = compilation_context,
linking_context = linking_context,
)
# upb_proto_library / upb_proto_reflection_library shared code #################
GeneratedSrcsInfo = provider(
fields = {
"srcs": "list of srcs",
"hdrs": "list of hdrs",
},
)
_WrappedCcInfo = provider(fields = ["cc_info"])
_WrappedGeneratedSrcsInfo = provider(fields = ["srcs"])
def _compile_upb_protos(ctx, proto_info, proto_sources, ext):
srcs = [_generate_output_file(ctx, name, ext + ".c") for name in proto_sources]
hdrs = [_generate_output_file(ctx, name, ext + ".h") for name in proto_sources]
transitive_sets = proto_info.transitive_descriptor_sets.to_list()
ctx.actions.run(
inputs = depset(
direct = [proto_info.direct_descriptor_set],
transitive = [proto_info.transitive_descriptor_sets],
),
tools = [ctx.executable._upbc],
outputs = srcs + hdrs,
executable = ctx.executable._protoc,
arguments = [
"--upb_out=" + _get_real_root(srcs[0]),
"--plugin=protoc-gen-upb=" + ctx.executable._upbc.path,
"--descriptor_set_in=" + ctx.configuration.host_path_separator.join([f.path for f in transitive_sets]),
] +
[_get_real_short_path(file) for file in proto_sources],
progress_message = "Generating upb protos for :" + ctx.label.name,
)
return GeneratedSrcsInfo(srcs = srcs, hdrs = hdrs)
def _upb_proto_rule_impl(ctx):
if len(ctx.attr.deps) != 1:
fail("only one deps dependency allowed.")
dep = ctx.attr.deps[0]
if _WrappedCcInfo not in dep or _WrappedGeneratedSrcsInfo not in dep:
fail("proto_library rule must generate _WrappedCcInfo and " +
"_WrappedGeneratedSrcsInfo (aspect should have handled this).")
cc_info = dep[_WrappedCcInfo].cc_info
srcs = dep[_WrappedGeneratedSrcsInfo].srcs
if (type(cc_info.linking_context.libraries_to_link) == "list"):
lib = cc_info.linking_context.libraries_to_link[0]
else:
lib = cc_info.linking_context.libraries_to_link.to_list()[0]
files = _filter_none([
lib.static_library,
lib.pic_static_library,
lib.dynamic_library,
])
return [
DefaultInfo(files = depset(files + srcs.hdrs + srcs.srcs)),
srcs,
cc_info,
]
def _upb_proto_aspect_impl(target, ctx):
proto_info = target[ProtoInfo]
files = _compile_upb_protos(ctx, proto_info, proto_info.direct_sources, ctx.attr._ext)
deps = ctx.rule.attr.deps + ctx.attr._upb
dep_ccinfos = [dep[CcInfo] for dep in deps if CcInfo in dep]
dep_ccinfos += [dep[_WrappedCcInfo].cc_info for dep in deps if _WrappedCcInfo in dep]
cc_info = _cc_library_func(
ctx = ctx,
name = ctx.rule.attr.name + ctx.attr._ext,
hdrs = files.hdrs,
srcs = files.srcs,
dep_ccinfos = dep_ccinfos,
)
return [_WrappedCcInfo(cc_info = cc_info), _WrappedGeneratedSrcsInfo(srcs = files)]
def _maybe_add(d):
if not _is_bazel:
d["_grep_includes"] = attr.label(
allow_single_file = True,
cfg = "host",
default = "//tools/cpp:grep-includes",
)
return d
# upb_proto_library() ##########################################################
_upb_proto_library_aspect = aspect(
attrs = _maybe_add({
"_upbc": attr.label(
executable = True,
cfg = "host",
default = "//:protoc-gen-upb",
),
"_protoc": attr.label(
executable = True,
cfg = "host",
default = "@com_google_protobuf//:protoc",
),
"_cc_toolchain": attr.label(
default = "@bazel_tools//tools/cpp:current_cc_toolchain",
),
"_upb": attr.label_list(default = [
"//:generated_code_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me",
"//:upb"
]),
"_ext": attr.string(default = ".upb"),
}),
implementation = _upb_proto_aspect_impl,
attr_aspects = ["deps"],
fragments = ["cpp"],
toolchains = ["@bazel_tools//tools/cpp:toolchain_type"],
)
upb_proto_library = rule(
output_to_genfiles = True,
implementation = _upb_proto_rule_impl,
attrs = {
"deps": attr.label_list(
aspects = [_upb_proto_library_aspect],
allow_rules = ["proto_library"],
providers = [ProtoInfo],
),
},
)
# upb_proto_reflection_library() ###############################################
_upb_proto_reflection_library_aspect = aspect(
attrs = _maybe_add({
"_upbc": attr.label(
executable = True,
cfg = "host",
default = "//:protoc-gen-upb",
),
"_protoc": attr.label(
executable = True,
cfg = "host",
default = "@com_google_protobuf//:protoc",
),
"_cc_toolchain": attr.label(
default = "@bazel_tools//tools/cpp:current_cc_toolchain",
),
"_upb": attr.label_list(
default = [
"//:upb",
"//:reflection",
],
),
"_ext": attr.string(default = ".upbdefs"),
}),
implementation = _upb_proto_aspect_impl,
attr_aspects = ["deps"],
fragments = ["cpp"],
toolchains = ["@bazel_tools//tools/cpp:toolchain_type"],
)
upb_proto_reflection_library = rule(
output_to_genfiles = True,
implementation = _upb_proto_rule_impl,
attrs = {
"deps": attr.label_list(
aspects = [_upb_proto_reflection_library_aspect],
allow_rules = ["proto_library"],
providers = [ProtoInfo],
),
},
)

@ -0,0 +1,36 @@
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository")
load("//bazel:repository_defs.bzl", "bazel_version_repository")
def upb_deps():
bazel_version_repository(
name = "bazel_version",
)
git_repository(
name = "absl",
commit = "070f6e47b33a2909d039e620c873204f78809492",
remote = "https://github.com/abseil/abseil-cpp.git",
shallow_since = "1541627663 -0500",
)
git_repository(
name = "com_google_protobuf",
remote = "https://github.com/protocolbuffers/protobuf.git",
commit = "d41002663fd04325ead28439dfd5ce2822b0d6fb",
)
http_archive(
name = "bazel_skylib",
strip_prefix = "bazel-skylib-master",
urls = ["https://github.com/bazelbuild/bazel-skylib/archive/master.tar.gz"],
)
http_archive(
name = "zlib",
build_file = "@com_google_protobuf//:third_party/zlib.BUILD",
sha256 = "c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1",
strip_prefix = "zlib-1.2.11",
urls = ["https://zlib.net/zlib-1.2.11.tar.gz"],
)

@ -0,0 +1,18 @@
load("@upb//bazel:upb_proto_library.bzl", "upb_proto_library")
proto_library(
name = "foo_proto",
srcs = ["foo.proto"],
)
upb_proto_library(
name = "foo_upbproto",
deps = [":foo_proto"],
)
cc_binary(
name = "test_binary",
srcs = ["test_binary.c"],
deps = [":foo_upbproto"],
)

@ -0,0 +1,14 @@
workspace(name = "upb_example")
load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository")
git_repository(
name = "upb",
remote = "https://github.com/protocolbuffers/upb.git",
commit = "d16bf99ac4658793748cda3251226059892b3b7b",
)
load("@upb//bazel:workspace_deps.bzl", "upb_deps")
upb_deps()

@ -0,0 +1,7 @@
syntax = "proto2";
message Foo {
optional int64 time = 1;
optional string greeting = 2;
}

@ -0,0 +1,17 @@
#include <time.h>
#include "foo.upb.h"
int main() {
upb_arena *arena = upb_arena_new();
Foo* foo = Foo_new(arena);
const char greeting[] = "Hello, World!\n";
Foo_set_time(foo, time(NULL));
/* Warning: the proto will not copy this, the string data must outlive
* the proto. */
Foo_set_greeting(foo, upb_strview_makez(greeting));
upb_arena_free(arena);
}

@ -0,0 +1,485 @@
/* This file was generated by upbc (the upb compiler) from the input
* file:
*
* google/protobuf/descriptor.proto
*
* Do not edit -- your changes will be discarded when the file is
* regenerated. */
#include <stddef.h>
#include "upb/msg.h"
#include "google/protobuf/descriptor.upb.h"
#include "upb/port_def.inc"
static const upb_msglayout *const google_protobuf_FileDescriptorSet_submsgs[1] = {
&google_protobuf_FileDescriptorProto_msginit,
};
static const upb_msglayout_field google_protobuf_FileDescriptorSet__fields[1] = {
{1, UPB_SIZE(0, 0), 0, 0, 11, 3},
};
const upb_msglayout google_protobuf_FileDescriptorSet_msginit = {
&google_protobuf_FileDescriptorSet_submsgs[0],
&google_protobuf_FileDescriptorSet__fields[0],
UPB_SIZE(4, 8), 1, false,
};
static const upb_msglayout *const google_protobuf_FileDescriptorProto_submsgs[6] = {
&google_protobuf_DescriptorProto_msginit,
&google_protobuf_EnumDescriptorProto_msginit,
&google_protobuf_FieldDescriptorProto_msginit,
&google_protobuf_FileOptions_msginit,
&google_protobuf_ServiceDescriptorProto_msginit,
&google_protobuf_SourceCodeInfo_msginit,
};
static const upb_msglayout_field google_protobuf_FileDescriptorProto__fields[12] = {
{1, UPB_SIZE(4, 8), 1, 0, 9, 1},
{2, UPB_SIZE(12, 24), 2, 0, 9, 1},
{3, UPB_SIZE(36, 72), 0, 0, 9, 3},
{4, UPB_SIZE(40, 80), 0, 0, 11, 3},
{5, UPB_SIZE(44, 88), 0, 1, 11, 3},
{6, UPB_SIZE(48, 96), 0, 4, 11, 3},
{7, UPB_SIZE(52, 104), 0, 2, 11, 3},
{8, UPB_SIZE(28, 56), 4, 3, 11, 1},
{9, UPB_SIZE(32, 64), 5, 5, 11, 1},
{10, UPB_SIZE(56, 112), 0, 0, 5, 3},
{11, UPB_SIZE(60, 120), 0, 0, 5, 3},
{12, UPB_SIZE(20, 40), 3, 0, 9, 1},
};
const upb_msglayout google_protobuf_FileDescriptorProto_msginit = {
&google_protobuf_FileDescriptorProto_submsgs[0],
&google_protobuf_FileDescriptorProto__fields[0],
UPB_SIZE(64, 128), 12, false,
};
static const upb_msglayout *const google_protobuf_DescriptorProto_submsgs[8] = {
&google_protobuf_DescriptorProto_msginit,
&google_protobuf_DescriptorProto_ExtensionRange_msginit,
&google_protobuf_DescriptorProto_ReservedRange_msginit,
&google_protobuf_EnumDescriptorProto_msginit,
&google_protobuf_FieldDescriptorProto_msginit,
&google_protobuf_MessageOptions_msginit,
&google_protobuf_OneofDescriptorProto_msginit,
};
static const upb_msglayout_field google_protobuf_DescriptorProto__fields[10] = {
{1, UPB_SIZE(4, 8), 1, 0, 9, 1},
{2, UPB_SIZE(16, 32), 0, 4, 11, 3},
{3, UPB_SIZE(20, 40), 0, 0, 11, 3},
{4, UPB_SIZE(24, 48), 0, 3, 11, 3},
{5, UPB_SIZE(28, 56), 0, 1, 11, 3},
{6, UPB_SIZE(32, 64), 0, 4, 11, 3},
{7, UPB_SIZE(12, 24), 2, 5, 11, 1},
{8, UPB_SIZE(36, 72), 0, 6, 11, 3},
{9, UPB_SIZE(40, 80), 0, 2, 11, 3},
{10, UPB_SIZE(44, 88), 0, 0, 9, 3},
};
const upb_msglayout google_protobuf_DescriptorProto_msginit = {
&google_protobuf_DescriptorProto_submsgs[0],
&google_protobuf_DescriptorProto__fields[0],
UPB_SIZE(48, 96), 10, false,
};
static const upb_msglayout *const google_protobuf_DescriptorProto_ExtensionRange_submsgs[1] = {
&google_protobuf_ExtensionRangeOptions_msginit,
};
static const upb_msglayout_field google_protobuf_DescriptorProto_ExtensionRange__fields[3] = {
{1, UPB_SIZE(4, 4), 1, 0, 5, 1},
{2, UPB_SIZE(8, 8), 2, 0, 5, 1},
{3, UPB_SIZE(12, 16), 3, 0, 11, 1},
};
const upb_msglayout google_protobuf_DescriptorProto_ExtensionRange_msginit = {
&google_protobuf_DescriptorProto_ExtensionRange_submsgs[0],
&google_protobuf_DescriptorProto_ExtensionRange__fields[0],
UPB_SIZE(16, 24), 3, false,
};
static const upb_msglayout_field google_protobuf_DescriptorProto_ReservedRange__fields[2] = {
{1, UPB_SIZE(4, 4), 1, 0, 5, 1},
{2, UPB_SIZE(8, 8), 2, 0, 5, 1},
};
const upb_msglayout google_protobuf_DescriptorProto_ReservedRange_msginit = {
NULL,
&google_protobuf_DescriptorProto_ReservedRange__fields[0],
UPB_SIZE(12, 12), 2, false,
};
static const upb_msglayout *const google_protobuf_ExtensionRangeOptions_submsgs[1] = {
&google_protobuf_UninterpretedOption_msginit,
};
static const upb_msglayout_field google_protobuf_ExtensionRangeOptions__fields[1] = {
{999, UPB_SIZE(0, 0), 0, 0, 11, 3},
};
const upb_msglayout google_protobuf_ExtensionRangeOptions_msginit = {
&google_protobuf_ExtensionRangeOptions_submsgs[0],
&google_protobuf_ExtensionRangeOptions__fields[0],
UPB_SIZE(4, 8), 1, false,
};
static const upb_msglayout *const google_protobuf_FieldDescriptorProto_submsgs[1] = {
&google_protobuf_FieldOptions_msginit,
};
static const upb_msglayout_field google_protobuf_FieldDescriptorProto__fields[10] = {
{1, UPB_SIZE(32, 32), 5, 0, 9, 1},
{2, UPB_SIZE(40, 48), 6, 0, 9, 1},
{3, UPB_SIZE(24, 24), 3, 0, 5, 1},
{4, UPB_SIZE(8, 8), 1, 0, 14, 1},
{5, UPB_SIZE(16, 16), 2, 0, 14, 1},
{6, UPB_SIZE(48, 64), 7, 0, 9, 1},
{7, UPB_SIZE(56, 80), 8, 0, 9, 1},
{8, UPB_SIZE(72, 112), 10, 0, 11, 1},
{9, UPB_SIZE(28, 28), 4, 0, 5, 1},
{10, UPB_SIZE(64, 96), 9, 0, 9, 1},
};
const upb_msglayout google_protobuf_FieldDescriptorProto_msginit = {
&google_protobuf_FieldDescriptorProto_submsgs[0],
&google_protobuf_FieldDescriptorProto__fields[0],
UPB_SIZE(80, 128), 10, false,
};
static const upb_msglayout *const google_protobuf_OneofDescriptorProto_submsgs[1] = {
&google_protobuf_OneofOptions_msginit,
};
static const upb_msglayout_field google_protobuf_OneofDescriptorProto__fields[2] = {
{1, UPB_SIZE(4, 8), 1, 0, 9, 1},
{2, UPB_SIZE(12, 24), 2, 0, 11, 1},
};
const upb_msglayout google_protobuf_OneofDescriptorProto_msginit = {
&google_protobuf_OneofDescriptorProto_submsgs[0],
&google_protobuf_OneofDescriptorProto__fields[0],
UPB_SIZE(16, 32), 2, false,
};
static const upb_msglayout *const google_protobuf_EnumDescriptorProto_submsgs[3] = {
&google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit,
&google_protobuf_EnumOptions_msginit,
&google_protobuf_EnumValueDescriptorProto_msginit,
};
static const upb_msglayout_field google_protobuf_EnumDescriptorProto__fields[5] = {
{1, UPB_SIZE(4, 8), 1, 0, 9, 1},
{2, UPB_SIZE(16, 32), 0, 2, 11, 3},
{3, UPB_SIZE(12, 24), 2, 1, 11, 1},
{4, UPB_SIZE(20, 40), 0, 0, 11, 3},
{5, UPB_SIZE(24, 48), 0, 0, 9, 3},
};
const upb_msglayout google_protobuf_EnumDescriptorProto_msginit = {
&google_protobuf_EnumDescriptorProto_submsgs[0],
&google_protobuf_EnumDescriptorProto__fields[0],
UPB_SIZE(32, 64), 5, false,
};
static const upb_msglayout_field google_protobuf_EnumDescriptorProto_EnumReservedRange__fields[2] = {
{1, UPB_SIZE(4, 4), 1, 0, 5, 1},
{2, UPB_SIZE(8, 8), 2, 0, 5, 1},
};
const upb_msglayout google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit = {
NULL,
&google_protobuf_EnumDescriptorProto_EnumReservedRange__fields[0],
UPB_SIZE(12, 12), 2, false,
};
static const upb_msglayout *const google_protobuf_EnumValueDescriptorProto_submsgs[1] = {
&google_protobuf_EnumValueOptions_msginit,
};
static const upb_msglayout_field google_protobuf_EnumValueDescriptorProto__fields[3] = {
{1, UPB_SIZE(8, 8), 2, 0, 9, 1},
{2, UPB_SIZE(4, 4), 1, 0, 5, 1},
{3, UPB_SIZE(16, 24), 3, 0, 11, 1},
};
const upb_msglayout google_protobuf_EnumValueDescriptorProto_msginit = {
&google_protobuf_EnumValueDescriptorProto_submsgs[0],
&google_protobuf_EnumValueDescriptorProto__fields[0],
UPB_SIZE(24, 32), 3, false,
};
static const upb_msglayout *const google_protobuf_ServiceDescriptorProto_submsgs[2] = {
&google_protobuf_MethodDescriptorProto_msginit,
&google_protobuf_ServiceOptions_msginit,
};
static const upb_msglayout_field google_protobuf_ServiceDescriptorProto__fields[3] = {
{1, UPB_SIZE(4, 8), 1, 0, 9, 1},
{2, UPB_SIZE(16, 32), 0, 0, 11, 3},
{3, UPB_SIZE(12, 24), 2, 1, 11, 1},
};
const upb_msglayout google_protobuf_ServiceDescriptorProto_msginit = {
&google_protobuf_ServiceDescriptorProto_submsgs[0],
&google_protobuf_ServiceDescriptorProto__fields[0],
UPB_SIZE(24, 48), 3, false,
};
static const upb_msglayout *const google_protobuf_MethodDescriptorProto_submsgs[1] = {
&google_protobuf_MethodOptions_msginit,
};
static const upb_msglayout_field google_protobuf_MethodDescriptorProto__fields[6] = {
{1, UPB_SIZE(4, 8), 3, 0, 9, 1},
{2, UPB_SIZE(12, 24), 4, 0, 9, 1},
{3, UPB_SIZE(20, 40), 5, 0, 9, 1},
{4, UPB_SIZE(28, 56), 6, 0, 11, 1},
{5, UPB_SIZE(1, 1), 1, 0, 8, 1},
{6, UPB_SIZE(2, 2), 2, 0, 8, 1},
};
const upb_msglayout google_protobuf_MethodDescriptorProto_msginit = {
&google_protobuf_MethodDescriptorProto_submsgs[0],
&google_protobuf_MethodDescriptorProto__fields[0],
UPB_SIZE(32, 64), 6, false,
};
static const upb_msglayout *const google_protobuf_FileOptions_submsgs[1] = {
&google_protobuf_UninterpretedOption_msginit,
};
static const upb_msglayout_field google_protobuf_FileOptions__fields[21] = {
{1, UPB_SIZE(28, 32), 11, 0, 9, 1},
{8, UPB_SIZE(36, 48), 12, 0, 9, 1},
{9, UPB_SIZE(8, 8), 1, 0, 14, 1},
{10, UPB_SIZE(16, 16), 2, 0, 8, 1},
{11, UPB_SIZE(44, 64), 13, 0, 9, 1},
{16, UPB_SIZE(17, 17), 3, 0, 8, 1},
{17, UPB_SIZE(18, 18), 4, 0, 8, 1},
{18, UPB_SIZE(19, 19), 5, 0, 8, 1},
{20, UPB_SIZE(20, 20), 6, 0, 8, 1},
{23, UPB_SIZE(21, 21), 7, 0, 8, 1},
{27, UPB_SIZE(22, 22), 8, 0, 8, 1},
{31, UPB_SIZE(23, 23), 9, 0, 8, 1},
{36, UPB_SIZE(52, 80), 14, 0, 9, 1},
{37, UPB_SIZE(60, 96), 15, 0, 9, 1},
{39, UPB_SIZE(68, 112), 16, 0, 9, 1},
{40, UPB_SIZE(76, 128), 17, 0, 9, 1},
{41, UPB_SIZE(84, 144), 18, 0, 9, 1},
{42, UPB_SIZE(24, 24), 10, 0, 8, 1},
{44, UPB_SIZE(92, 160), 19, 0, 9, 1},
{45, UPB_SIZE(100, 176), 20, 0, 9, 1},
{999, UPB_SIZE(108, 192), 0, 0, 11, 3},
};
const upb_msglayout google_protobuf_FileOptions_msginit = {
&google_protobuf_FileOptions_submsgs[0],
&google_protobuf_FileOptions__fields[0],
UPB_SIZE(112, 208), 21, false,
};
static const upb_msglayout *const google_protobuf_MessageOptions_submsgs[1] = {
&google_protobuf_UninterpretedOption_msginit,
};
static const upb_msglayout_field google_protobuf_MessageOptions__fields[5] = {
{1, UPB_SIZE(1, 1), 1, 0, 8, 1},
{2, UPB_SIZE(2, 2), 2, 0, 8, 1},
{3, UPB_SIZE(3, 3), 3, 0, 8, 1},
{7, UPB_SIZE(4, 4), 4, 0, 8, 1},
{999, UPB_SIZE(8, 8), 0, 0, 11, 3},
};
const upb_msglayout google_protobuf_MessageOptions_msginit = {
&google_protobuf_MessageOptions_submsgs[0],
&google_protobuf_MessageOptions__fields[0],
UPB_SIZE(12, 16), 5, false,
};
static const upb_msglayout *const google_protobuf_FieldOptions_submsgs[1] = {
&google_protobuf_UninterpretedOption_msginit,
};
static const upb_msglayout_field google_protobuf_FieldOptions__fields[7] = {
{1, UPB_SIZE(8, 8), 1, 0, 14, 1},
{2, UPB_SIZE(24, 24), 3, 0, 8, 1},
{3, UPB_SIZE(25, 25), 4, 0, 8, 1},
{5, UPB_SIZE(26, 26), 5, 0, 8, 1},
{6, UPB_SIZE(16, 16), 2, 0, 14, 1},
{10, UPB_SIZE(27, 27), 6, 0, 8, 1},
{999, UPB_SIZE(28, 32), 0, 0, 11, 3},
};
const upb_msglayout google_protobuf_FieldOptions_msginit = {
&google_protobuf_FieldOptions_submsgs[0],
&google_protobuf_FieldOptions__fields[0],
UPB_SIZE(32, 40), 7, false,
};
static const upb_msglayout *const google_protobuf_OneofOptions_submsgs[1] = {
&google_protobuf_UninterpretedOption_msginit,
};
static const upb_msglayout_field google_protobuf_OneofOptions__fields[1] = {
{999, UPB_SIZE(0, 0), 0, 0, 11, 3},
};
const upb_msglayout google_protobuf_OneofOptions_msginit = {
&google_protobuf_OneofOptions_submsgs[0],
&google_protobuf_OneofOptions__fields[0],
UPB_SIZE(4, 8), 1, false,
};
static const upb_msglayout *const google_protobuf_EnumOptions_submsgs[1] = {
&google_protobuf_UninterpretedOption_msginit,
};
static const upb_msglayout_field google_protobuf_EnumOptions__fields[3] = {
{2, UPB_SIZE(1, 1), 1, 0, 8, 1},
{3, UPB_SIZE(2, 2), 2, 0, 8, 1},
{999, UPB_SIZE(4, 8), 0, 0, 11, 3},
};
const upb_msglayout google_protobuf_EnumOptions_msginit = {
&google_protobuf_EnumOptions_submsgs[0],
&google_protobuf_EnumOptions__fields[0],
UPB_SIZE(8, 16), 3, false,
};
static const upb_msglayout *const google_protobuf_EnumValueOptions_submsgs[1] = {
&google_protobuf_UninterpretedOption_msginit,
};
static const upb_msglayout_field google_protobuf_EnumValueOptions__fields[2] = {
{1, UPB_SIZE(1, 1), 1, 0, 8, 1},
{999, UPB_SIZE(4, 8), 0, 0, 11, 3},
};
const upb_msglayout google_protobuf_EnumValueOptions_msginit = {
&google_protobuf_EnumValueOptions_submsgs[0],
&google_protobuf_EnumValueOptions__fields[0],
UPB_SIZE(8, 16), 2, false,
};
static const upb_msglayout *const google_protobuf_ServiceOptions_submsgs[1] = {
&google_protobuf_UninterpretedOption_msginit,
};
static const upb_msglayout_field google_protobuf_ServiceOptions__fields[2] = {
{33, UPB_SIZE(1, 1), 1, 0, 8, 1},
{999, UPB_SIZE(4, 8), 0, 0, 11, 3},
};
const upb_msglayout google_protobuf_ServiceOptions_msginit = {
&google_protobuf_ServiceOptions_submsgs[0],
&google_protobuf_ServiceOptions__fields[0],
UPB_SIZE(8, 16), 2, false,
};
static const upb_msglayout *const google_protobuf_MethodOptions_submsgs[1] = {
&google_protobuf_UninterpretedOption_msginit,
};
static const upb_msglayout_field google_protobuf_MethodOptions__fields[3] = {
{33, UPB_SIZE(16, 16), 2, 0, 8, 1},
{34, UPB_SIZE(8, 8), 1, 0, 14, 1},
{999, UPB_SIZE(20, 24), 0, 0, 11, 3},
};
const upb_msglayout google_protobuf_MethodOptions_msginit = {
&google_protobuf_MethodOptions_submsgs[0],
&google_protobuf_MethodOptions__fields[0],
UPB_SIZE(24, 32), 3, false,
};
static const upb_msglayout *const google_protobuf_UninterpretedOption_submsgs[1] = {
&google_protobuf_UninterpretedOption_NamePart_msginit,
};
static const upb_msglayout_field google_protobuf_UninterpretedOption__fields[7] = {
{2, UPB_SIZE(56, 80), 0, 0, 11, 3},
{3, UPB_SIZE(32, 32), 4, 0, 9, 1},
{4, UPB_SIZE(8, 8), 1, 0, 4, 1},
{5, UPB_SIZE(16, 16), 2, 0, 3, 1},
{6, UPB_SIZE(24, 24), 3, 0, 1, 1},
{7, UPB_SIZE(40, 48), 5, 0, 12, 1},
{8, UPB_SIZE(48, 64), 6, 0, 9, 1},
};
const upb_msglayout google_protobuf_UninterpretedOption_msginit = {
&google_protobuf_UninterpretedOption_submsgs[0],
&google_protobuf_UninterpretedOption__fields[0],
UPB_SIZE(64, 96), 7, false,
};
static const upb_msglayout_field google_protobuf_UninterpretedOption_NamePart__fields[2] = {
{1, UPB_SIZE(4, 8), 2, 0, 9, 2},
{2, UPB_SIZE(1, 1), 1, 0, 8, 2},
};
const upb_msglayout google_protobuf_UninterpretedOption_NamePart_msginit = {
NULL,
&google_protobuf_UninterpretedOption_NamePart__fields[0],
UPB_SIZE(16, 32), 2, false,
};
static const upb_msglayout *const google_protobuf_SourceCodeInfo_submsgs[1] = {
&google_protobuf_SourceCodeInfo_Location_msginit,
};
static const upb_msglayout_field google_protobuf_SourceCodeInfo__fields[1] = {
{1, UPB_SIZE(0, 0), 0, 0, 11, 3},
};
const upb_msglayout google_protobuf_SourceCodeInfo_msginit = {
&google_protobuf_SourceCodeInfo_submsgs[0],
&google_protobuf_SourceCodeInfo__fields[0],
UPB_SIZE(4, 8), 1, false,
};
static const upb_msglayout_field google_protobuf_SourceCodeInfo_Location__fields[5] = {
{1, UPB_SIZE(20, 40), 0, 0, 5, 3},
{2, UPB_SIZE(24, 48), 0, 0, 5, 3},
{3, UPB_SIZE(4, 8), 1, 0, 9, 1},
{4, UPB_SIZE(12, 24), 2, 0, 9, 1},
{6, UPB_SIZE(28, 56), 0, 0, 9, 3},
};
const upb_msglayout google_protobuf_SourceCodeInfo_Location_msginit = {
NULL,
&google_protobuf_SourceCodeInfo_Location__fields[0],
UPB_SIZE(32, 64), 5, false,
};
static const upb_msglayout *const google_protobuf_GeneratedCodeInfo_submsgs[1] = {
&google_protobuf_GeneratedCodeInfo_Annotation_msginit,
};
static const upb_msglayout_field google_protobuf_GeneratedCodeInfo__fields[1] = {
{1, UPB_SIZE(0, 0), 0, 0, 11, 3},
};
const upb_msglayout google_protobuf_GeneratedCodeInfo_msginit = {
&google_protobuf_GeneratedCodeInfo_submsgs[0],
&google_protobuf_GeneratedCodeInfo__fields[0],
UPB_SIZE(4, 8), 1, false,
};
static const upb_msglayout_field google_protobuf_GeneratedCodeInfo_Annotation__fields[4] = {
{1, UPB_SIZE(20, 32), 0, 0, 5, 3},
{2, UPB_SIZE(12, 16), 3, 0, 9, 1},
{3, UPB_SIZE(4, 4), 1, 0, 5, 1},
{4, UPB_SIZE(8, 8), 2, 0, 5, 1},
};
const upb_msglayout google_protobuf_GeneratedCodeInfo_Annotation_msginit = {
NULL,
&google_protobuf_GeneratedCodeInfo_Annotation__fields[0],
UPB_SIZE(24, 48), 4, false,
};
#include "upb/port_undef.inc"

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,16 @@
#!/bin/bash
# Install the latest version of Bazel.
use_bazel.sh latest
# Verify/query CMake
echo PATH=$PATH
ls -l `which cmake`
cmake --version
# Log the bazel path and version.
which bazel
bazel version
cd $(dirname $0)/../..
bazel test --test_output=errors :all

@ -0,0 +1,2 @@
build_file: "upb/kokoro/ubuntu/build.sh"
timeout_mins: 15

@ -0,0 +1,2 @@
build_file: "upb/kokoro/ubuntu/build.sh"
timeout_mins: 15

@ -0,0 +1,36 @@
#include <string.h>
#include <benchmark/benchmark.h>
#include "google/protobuf/descriptor.upb.h"
#include "google/protobuf/descriptor.upbdefs.h"
upb_strview descriptor = google_protobuf_descriptor_proto_upbdefinit.descriptor;
/* A buffer big enough to parse descriptor.proto without going to heap. */
char buf[65535];
static void BM_CreateArena(benchmark::State& state) {
for (auto _ : state) {
upb_arena* arena = upb_arena_init(buf, sizeof(buf), NULL);
upb_arena_free(arena);
}
}
BENCHMARK(BM_CreateArena);
static void BM_ParseDescriptor(benchmark::State& state) {
size_t bytes = 0;
for (auto _ : state) {
upb_arena* arena = upb_arena_init(buf, sizeof(buf), NULL);
google_protobuf_FileDescriptorProto* set =
google_protobuf_FileDescriptorProto_parse(descriptor.data,
descriptor.size, arena);
if (!set) {
printf("Failed to parse.\n");
exit(1);
}
bytes += descriptor.size;
upb_arena_free(arena);
}
state.SetBytesProcessed(state.iterations() * descriptor.size);
}
BENCHMARK(BM_ParseDescriptor);

@ -0,0 +1,165 @@
/*
*
* A test that verifies that our results are identical to proto2 for a
* given proto type and input protobuf.
*/
#define __STDC_LIMIT_MACROS // So we get UINT32_MAX
#include <assert.h>
#include <google/protobuf/descriptor.h>
#include <google/protobuf/dynamic_message.h>
#include <google/protobuf/message.h>
#include <google/protobuf/text_format.h>
#include <google/protobuf/wire_format_lite.h>
#include <inttypes.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include "tests/google_messages.pb.h"
#include "tests/upb_test.h"
#include "upb/bindings/googlepb/bridge.h"
#include "upb/def.h"
#include "upb/handlers.h"
#include "upb/pb/decoder.h"
#include "upb/pb/glue.h"
#include "upb/pb/varint.int.h"
// Pull in string data from tests/google_message{1,2}.dat
// (the .h files are generated with xxd).
const unsigned char message1_data[] = {
#include "tests/google_message1.h"
};
const unsigned char message2_data[] = {
#include "tests/google_message2.h"
};
void compare_metadata(const google::protobuf::Descriptor* d,
const upb::MessageDef *upb_md) {
ASSERT(d->field_count() == upb_md->field_count());
for (upb::MessageDef::const_field_iterator i = upb_md->field_begin();
i != upb_md->field_end(); ++i) {
const upb::FieldDef* upb_f = *i;
const google::protobuf::FieldDescriptor *proto2_f =
d->FindFieldByNumber(upb_f->number());
ASSERT(upb_f);
ASSERT(proto2_f);
ASSERT(upb_f->number() == (uint32_t)proto2_f->number());
ASSERT(std::string(upb_f->name()) == proto2_f->name());
ASSERT(upb_f->descriptor_type() ==
static_cast<upb::FieldDef::DescriptorType>(proto2_f->type()));
ASSERT(upb_f->IsSequence() == proto2_f->is_repeated());
}
}
void print_diff(const google::protobuf::Message& msg1,
const google::protobuf::Message& msg2) {
std::string text_str1;
std::string text_str2;
google::protobuf::TextFormat::PrintToString(msg1, &text_str1);
google::protobuf::TextFormat::PrintToString(msg2, &text_str2);
fprintf(stderr, "str1: %s, str2: %s\n", text_str1.c_str(), text_str2.c_str());
}
void parse_and_compare(google::protobuf::Message *msg1,
google::protobuf::Message *msg2,
const upb::Handlers *protomsg_handlers,
const char *str, size_t len, bool allow_jit) {
// Parse to both proto2 and upb.
ASSERT(msg1->ParseFromArray(str, len));
upb::pb::CodeCache cache;
ASSERT(cache.set_allow_jit(allow_jit));
upb::reffed_ptr<const upb::pb::DecoderMethod> decoder_method(
cache.GetDecoderMethod(upb::pb::DecoderMethodOptions(protomsg_handlers)));
upb::Status status;
upb::Environment env;
env.ReportErrorsTo(&status);
upb::Sink protomsg_sink(protomsg_handlers, msg2);
upb::pb::Decoder* decoder =
upb::pb::Decoder::Create(&env, decoder_method.get(), &protomsg_sink);
msg2->Clear();
bool ok = upb::BufferSource::PutBuffer(str, len, decoder->input());
if (!ok) {
fprintf(stderr, "error parsing: %s\n", status.error_message());
print_diff(*msg1, *msg2);
}
ASSERT(ok);
ASSERT(status.ok());
// Would like to just compare the message objects themselves, but
// unfortunately MessageDifferencer is not part of the open-source release of
// proto2, so we compare their serialized strings, which we expect will be
// equivalent.
std::string str1;
std::string str2;
msg1->SerializeToString(&str1);
msg2->SerializeToString(&str2);
if (str1 != str2) {
print_diff(*msg1, *msg2);
}
ASSERT(str1 == str2);
ASSERT(std::string(str, len) == str2);
}
void test_zig_zag() {
for (uint64_t num = 5; num * 1.5 < UINT64_MAX; num *= 1.5) {
ASSERT(upb_zzenc_64(num) ==
google::protobuf::internal::WireFormatLite::ZigZagEncode64(num));
if (num < UINT32_MAX) {
ASSERT(upb_zzenc_32(num) ==
google::protobuf::internal::WireFormatLite::ZigZagEncode32(num));
}
}
}
extern "C" {
int run_tests(int argc, char *argv[]) {
UPB_UNUSED(argc);
UPB_UNUSED(argv);
UPB_UNUSED(message1_data);
UPB_UNUSED(message2_data);
size_t len = sizeof(MESSAGE_DATA_IDENT);
const char *str = (const char*)MESSAGE_DATA_IDENT;
MESSAGE_CIDENT msg1;
MESSAGE_CIDENT msg2;
upb::reffed_ptr<const upb::Handlers> h(
upb::googlepb::WriteHandlers::New(msg1));
compare_metadata(msg1.GetDescriptor(), h->message_def());
// Run twice to test proper object reuse.
parse_and_compare(&msg1, &msg2, h.get(), str, len, false);
parse_and_compare(&msg1, &msg2, h.get(), str, len, true);
parse_and_compare(&msg1, &msg2, h.get(), str, len, false);
parse_and_compare(&msg1, &msg2, h.get(), str, len, true);
// Test with DynamicMessage.
google::protobuf::DynamicMessageFactory* factory =
new google::protobuf::DynamicMessageFactory;
const google::protobuf::Message* prototype =
factory->GetPrototype(msg1.descriptor());
google::protobuf::Message* dyn_msg1 = prototype->New();
google::protobuf::Message* dyn_msg2 = prototype->New();
h = upb::googlepb::WriteHandlers::New(*dyn_msg1);
parse_and_compare(dyn_msg1, dyn_msg2, h.get(), str, len, false);
parse_and_compare(dyn_msg1, dyn_msg2, h.get(), str, len, true);
delete dyn_msg1;
delete dyn_msg2;
delete factory;
test_zig_zag();
printf("All tests passed, %d assertions.\n", num_assertions);
google::protobuf::ShutdownProtobufLibrary();
return 0;
}
}

@ -0,0 +1,750 @@
local upb = require "upb"
local lunit = require "lunit"
if _VERSION >= 'Lua 5.2' then
_ENV = lunit.module("testupb", "seeall")
else
module("testupb", lunit.testcase, package.seeall)
end
function iter_to_array(iter)
local arr = {}
for v in iter do
arr[#arr + 1] = v
end
return arr
end
function test_msgdef()
local f2 = upb.FieldDef{name = "field2", number = 1, type = upb.TYPE_INT32}
local o = upb.OneofDef{name = "field1", fields = {f2}}
local f = upb.FieldDef{name = "field3", number = 2, type = upb.TYPE_INT32}
local m = upb.MessageDef{fields = {o, f}}
assert_equal(f, m:lookup_name("field3"))
assert_equal(o, m:lookup_name("field1"))
assert_equal(f2, m:lookup_name("field2"))
end
function test_fielddef()
local f = upb.FieldDef()
assert_false(f:is_frozen())
assert_nil(f:number())
assert_nil(f:name())
assert_nil(f:type())
assert_equal(upb.LABEL_OPTIONAL, f:label())
f:set_name("foo_field")
f:set_number(3)
f:set_label(upb.LABEL_REPEATED)
f:set_type(upb.TYPE_FLOAT)
assert_equal("foo_field", f:name())
assert_equal(3, f:number())
assert_equal(upb.LABEL_REPEATED, f:label())
assert_equal(upb.TYPE_FLOAT, f:type())
local f2 = upb.FieldDef{
name = "foo", number = 5, type = upb.TYPE_DOUBLE, label = upb.LABEL_REQUIRED
}
assert_equal("foo", f2:name())
assert_equal(5, f2:number())
assert_equal(upb.TYPE_DOUBLE, f2:type())
assert_equal(upb.LABEL_REQUIRED, f2:label())
end
function test_enumdef()
local e = upb.EnumDef()
assert_equal(0, #e)
assert_nil(e:value(5))
assert_nil(e:value("NONEXISTENT_NAME"))
for name, value in e:values() do
fail()
end
e:add("VAL1", 1)
e:add("VAL2", 2)
local values = {}
for name, value in e:values() do
values[name] = value
end
assert_equal(1, values["VAL1"])
assert_equal(2, values["VAL2"])
local e2 = upb.EnumDef{
values = {
{"FOO", 1},
{"BAR", 77},
}
}
assert_equal(1, e2:value("FOO"))
assert_equal(77, e2:value("BAR"))
assert_equal("FOO", e2:value(1))
assert_equal("BAR", e2:value(77))
e2:freeze()
local f = upb.FieldDef{type = upb.TYPE_ENUM}
-- No default set and no EnumDef to get a default from.
assert_equal(f:default(), nil)
f:set_subdef(upb.EnumDef())
-- No default to pull in from the EnumDef.
assert_equal(f:default(), nil)
f:set_subdef(e2)
-- First member added to e2.
assert_equal(f:default(), "FOO")
f:set_subdef(nil)
assert_equal(f:default(), nil)
f:set_default(1)
assert_equal(f:default(), 1)
f:set_default("YOYOYO")
assert_equal(f:default(), "YOYOYO")
f:set_subdef(e2)
f:set_default(1)
-- It prefers to return a string, and could resolve the explicit "1" we set
-- it to to the string value.
assert_equal(f:default(), "FOO")
-- FieldDef can specify default value by name or number, but the value must
-- exist at freeze time.
local m1 = upb.build_defs{
upb.MessageDef{
full_name = "A",
fields = {
upb.FieldDef{
name = "f1",
number = 1,
type = upb.TYPE_ENUM,
subdef = e2,
default = "BAR"
},
upb.FieldDef{
name = "f2",
number = 2,
type = upb.TYPE_ENUM,
subdef = e2,
default = 77
}
}
}
}
assert_equal(m1:field("f1"):default(), "BAR")
assert_equal(m1:field("f1"):default(), "BAR")
assert_error_match(
"enum default for field A.f1 .DOESNT_EXIST. is not in the enum",
function()
local m1 = upb.build_defs{
upb.MessageDef{
full_name = "A",
fields = {
upb.FieldDef{
name = "f1",
number = 1,
type = upb.TYPE_ENUM,
subdef = e2,
default = "DOESNT_EXIST"
}
}
}
}
end
)
assert_error_match(
"enum default for field A.f1 .142. is not in the enum",
function()
local m1 = upb.build_defs{
upb.MessageDef{
full_name = "A",
fields = {
upb.FieldDef{
name = "f1",
number = 1,
type = upb.TYPE_ENUM,
subdef = e2,
default = 142
}
}
}
}
end
)
end
function test_empty_msgdef()
local md = upb.MessageDef()
assert_nil(md:full_name()) -- Def without name is anonymous.
assert_false(md:is_frozen())
assert_equal(0, #md)
assert_nil(md:field("nonexistent_field"))
assert_nil(md:field(3))
for field in md:fields() do
fail()
end
upb.freeze(md)
assert_true(md:is_frozen())
assert_equal(0, #md)
assert_nil(md:field("nonexistent_field"))
assert_nil(md:field(3))
for field in md:fields() do
fail()
end
end
function test_msgdef_constructor()
local f1 = upb.FieldDef{name = "field1", number = 7, type = upb.TYPE_INT32}
local f2 = upb.FieldDef{name = "field2", number = 8, type = upb.TYPE_INT32}
local md = upb.MessageDef{
full_name = "TestMessage",
fields = {f1, f2}
}
assert_equal("TestMessage", md:full_name())
assert_false(md:is_frozen())
assert_equal(2, #md)
assert_equal(f1, md:field("field1"))
assert_equal(f2, md:field("field2"))
assert_equal(f1, md:field(7))
assert_equal(f2, md:field(8))
local count = 0
local found = {}
for field in md:fields() do
count = count + 1
found[field] = true
end
assert_equal(2, count)
assert_true(found[f1])
assert_true(found[f2])
upb.freeze(md)
end
function test_iteration()
-- Test that we cannot crash the process even if we modify the set of fields
-- during iteration.
local md = upb.MessageDef{full_name = "TestMessage"}
for i=1,10 do
md:add(upb.FieldDef{
name = "field" .. tostring(i),
number = 1000 - i,
type = upb.TYPE_INT32
})
end
local add = #md
for f in md:fields() do
if add > 0 then
add = add - 1
for i=10000,11000 do
local field_name = "field" .. tostring(i)
-- We want to add fields to the table to trigger a table resize,
-- but we must skip it if the field name or number already exists
-- otherwise it will raise an error.
if md:field(field_name) == nil and
md:field(i) == nil then
md:add(upb.FieldDef{
name = field_name,
number = i,
type = upb.TYPE_INT32
})
end
end
end
end
-- Test that iterators don't crash the process even if the MessageDef goes
-- out of scope.
--
-- Note: have previously verified that this can indeed crash the process if
-- we do not explicitly add a reference from the iterator to the underlying
-- MessageDef.
local iter = md:fields()
md = nil
collectgarbage()
while iter() do
end
local ed = upb.EnumDef{
values = {
{"FOO", 1},
{"BAR", 77},
}
}
iter = ed:values()
ed = nil
collectgarbage()
while iter() do
end
end
function test_msgdef_setters()
local md = upb.MessageDef()
md:set_full_name("Message1")
assert_equal("Message1", md:full_name())
local f = upb.FieldDef{name = "field1", number = 3, type = upb.TYPE_DOUBLE}
md:add(f)
assert_equal(1, #md)
assert_equal(f, md:field("field1"))
end
function test_msgdef_errors()
assert_error(function() upb.MessageDef{bad_initializer_key = 5} end)
local md = upb.MessageDef()
assert_error(function()
-- Duplicate field number.
upb.MessageDef{
fields = {
upb.FieldDef{name = "field1", number = 1, type = upb.TYPE_INT32},
upb.FieldDef{name = "field2", number = 1, type = upb.TYPE_INT32}
}
}
end)
assert_error(function()
-- Duplicate field name.
upb.MessageDef{
fields = {
upb.FieldDef{name = "field1", number = 1, type = upb.TYPE_INT32},
upb.FieldDef{name = "field1", number = 2, type = upb.TYPE_INT32}
}
}
end)
assert_error(function()
-- Duplicate field name.
upb.MessageDef{
fields = {
upb.OneofDef{name = "field1", fields = {
upb.FieldDef{name = "field2", number = 1, type = upb.TYPE_INT32},
}},
upb.FieldDef{name = "field2", number = 2, type = upb.TYPE_INT32}
}
}
end)
-- attempt to set a name with embedded NULLs.
assert_error_match("names cannot have embedded NULLs", function()
md:set_full_name("abc\0def")
end)
upb.freeze(md)
-- Attempt to mutate frozen MessageDef.
assert_error_match("frozen", function()
md:add(upb.FieldDef{name = "field1", number = 1, type = upb.TYPE_INT32})
end)
assert_error_match("frozen", function()
md:set_full_name("abc")
end)
-- Attempt to freeze a msgdef without freezing its subdef.
assert_error_match("is not frozen or being frozen", function()
m1 = upb.MessageDef()
upb.freeze(
upb.MessageDef{
fields = {
upb.FieldDef{name = "f1", number = 1, type = upb.TYPE_MESSAGE,
subdef = m1}
}
}
)
end)
end
function test_symtab()
local empty = upb.SymbolTable()
assert_equal(0, #iter_to_array(empty:defs(upb.DEF_ANY)))
assert_equal(0, #iter_to_array(empty:defs(upb.DEF_MSG)))
assert_equal(0, #iter_to_array(empty:defs(upb.DEF_ENUM)))
local symtab = upb.SymbolTable{
upb.MessageDef{full_name = "TestMessage"},
upb.MessageDef{full_name = "ContainingMessage", fields = {
upb.FieldDef{name = "field1", number = 1, type = upb.TYPE_INT32},
upb.FieldDef{name = "field2", number = 2, type = upb.TYPE_MESSAGE,
subdef_name = ".TestMessage"}
}
}
}
local msgdef1 = symtab:lookup("TestMessage")
local msgdef2 = symtab:lookup("ContainingMessage")
assert_not_nil(msgdef1)
assert_not_nil(msgdef2)
assert_equal(msgdef1, msgdef2:field("field2"):subdef())
assert_true(msgdef1:is_frozen())
assert_true(msgdef2:is_frozen())
symtab:add{
upb.MessageDef{full_name = "ContainingMessage2", fields = {
upb.FieldDef{name = "field5", number = 5, type = upb.TYPE_MESSAGE,
subdef = msgdef2}
}
}
}
local msgdef3 = symtab:lookup("ContainingMessage2")
assert_not_nil(msgdef3)
assert_equal(msgdef3:field("field5"):subdef(), msgdef2)
end
function test_numeric_array()
local function test_for_numeric_type(upb_type, val, too_big, too_small, bad3)
local array = upb.Array(upb_type)
assert_equal(0, #array)
-- 0 is never a valid index in Lua.
assert_error_match("array index", function() return array[0] end)
-- Past the end of the array.
assert_error_match("array index", function() return array[1] end)
array[1] = val
assert_equal(val, array[1])
assert_equal(1, #array)
assert_equal(val, array[1])
-- Past the end of the array.
assert_error_match("array index", function() return array[2] end)
array[2] = 10
assert_equal(val, array[1])
assert_equal(10, array[2])
assert_equal(2, #array)
-- Past the end of the array.
assert_error_match("array index", function() return array[3] end)
local n = 1
for i, val in upb.ipairs(array) do
assert_equal(n, i)
n = n + 1
assert_equal(array[i], val)
end
-- Values that are out of range.
local errmsg = "not an integer or out of range"
if too_small then
assert_error_match(errmsg, function() array[3] = too_small end)
end
if too_big then
assert_error_match(errmsg, function() array[3] = too_big end)
end
if bad3 then
assert_error_match(errmsg, function() array[3] = bad3 end)
end
-- Can't assign other Lua types.
errmsg = "bad argument #3"
assert_error_match(errmsg, function() array[3] = "abc" end)
assert_error_match(errmsg, function() array[3] = true end)
assert_error_match(errmsg, function() array[3] = false end)
assert_error_match(errmsg, function() array[3] = nil end)
assert_error_match(errmsg, function() array[3] = {} end)
assert_error_match(errmsg, function() array[3] = print end)
assert_error_match(errmsg, function() array[3] = array end)
end
-- in-range of 64-bit types but not exactly representable as double
local bad64 = 2^68 - 1
test_for_numeric_type(upb.TYPE_UINT32, 2^32 - 1, 2^32, -1, 5.1)
test_for_numeric_type(upb.TYPE_UINT64, 2^63, 2^64, -1, bad64)
test_for_numeric_type(upb.TYPE_INT32, 2^31 - 1, 2^31, -2^31 - 1, 5.1)
-- Enums don't exist at a language level in Lua, so we just represent enum
-- values as int32s.
test_for_numeric_type(upb.TYPE_ENUM, 2^31 - 1, 2^31, -2^31 - 1, 5.1)
test_for_numeric_type(upb.TYPE_INT64, 2^62, 2^63, -2^64, bad64)
test_for_numeric_type(upb.TYPE_FLOAT, 340282306073709652508363335590014353408)
test_for_numeric_type(upb.TYPE_DOUBLE, 10^101)
end
function test_string_array()
local function test_for_string_type(upb_type)
local array = upb.Array(upb_type)
assert_equal(0, #array)
-- 0 is never a valid index in Lua.
assert_error_match("array index", function() return array[0] end)
-- Past the end of the array.
assert_error_match("array index", function() return array[1] end)
array[1] = "foo"
assert_equal("foo", array[1])
assert_equal(1, #array)
-- Past the end of the array.
assert_error_match("array index", function() return array[2] end)
local array2 = upb.Array(upb_type)
assert_equal(0, #array2)
array[2] = "bar"
assert_equal("foo", array[1])
assert_equal("bar", array[2])
assert_equal(2, #array)
-- Past the end of the array.
assert_error_match("array index", function() return array[3] end)
local n = 1
for i, val in upb.ipairs(array) do
assert_equal(n, i)
n = n + 1
assert_equal(array[i], val)
end
assert_equal(3, n)
-- Can't assign other Lua types.
assert_error_match("Expected string", function() array[3] = 123 end)
assert_error_match("Expected string", function() array[3] = true end)
assert_error_match("Expected string", function() array[3] = false end)
assert_error_match("Expected string", function() array[3] = nil end)
assert_error_match("Expected string", function() array[3] = {} end)
assert_error_match("Expected string", function() array[3] = print end)
assert_error_match("Expected string", function() array[3] = array end)
end
test_for_string_type(upb.TYPE_STRING)
test_for_string_type(upb.TYPE_BYTES)
end
function test_msg_primitives()
local function test_for_numeric_type(upb_type, val, too_big, too_small, bad3)
local symtab = upb.SymbolTable{
upb.MessageDef{full_name = "TestMessage", fields = {
upb.FieldDef{name = "f", number = 1, type = upb_type},
}
}
}
factory = upb.MessageFactory(symtab)
TestMessage = factory:get_message_class("TestMessage")
msg = TestMessage()
-- Defaults to zero
assert_equal(0, msg.f)
msg.f = 0
assert_equal(0, msg.f)
msg.f = val
assert_equal(val, msg.f)
local errmsg = "not an integer or out of range"
if too_small then
assert_error_match(errmsg, function() msg.f = too_small end)
end
if too_big then
assert_error_match(errmsg, function() msg.f = too_big end)
end
if bad3 then
assert_error_match(errmsg, function() msg.f = bad3 end)
end
-- Can't assign other Lua types.
errmsg = "bad argument #3"
assert_error_match(errmsg, function() msg.f = "abc" end)
assert_error_match(errmsg, function() msg.f = true end)
assert_error_match(errmsg, function() msg.f = false end)
assert_error_match(errmsg, function() msg.f = nil end)
assert_error_match(errmsg, function() msg.f = {} end)
assert_error_match(errmsg, function() msg.f = print end)
assert_error_match(errmsg, function() msg.f = array end)
end
local symtab = upb.SymbolTable{
upb.MessageDef{full_name = "TestMessage", fields = {
upb.FieldDef{
name = "i32", number = 1, type = upb.TYPE_INT32, default = 1},
upb.FieldDef{
name = "u32", number = 2, type = upb.TYPE_UINT32, default = 2},
upb.FieldDef{
name = "i64", number = 3, type = upb.TYPE_INT64, default = 3},
upb.FieldDef{
name = "u64", number = 4, type = upb.TYPE_UINT64, default = 4},
upb.FieldDef{
name = "dbl", number = 5, type = upb.TYPE_DOUBLE, default = 5},
upb.FieldDef{
name = "flt", number = 6, type = upb.TYPE_FLOAT, default = 6},
upb.FieldDef{
name = "bool", number = 7, type = upb.TYPE_BOOL, default = true},
}
}
}
factory = upb.MessageFactory(symtab)
TestMessage = factory:get_message_class("TestMessage")
msg = TestMessage()
-- Unset member returns default value.
-- TODO(haberman): re-enable these when we have descriptor-based reflection.
-- assert_equal(1, msg.i32)
-- assert_equal(2, msg.u32)
-- assert_equal(3, msg.i64)
-- assert_equal(4, msg.u64)
-- assert_equal(5, msg.dbl)
-- assert_equal(6, msg.flt)
-- assert_equal(true, msg.bool)
-- Attempts to access non-existent fields fail.
assert_error_match("no such field", function() msg.no_such = 1 end)
msg.i32 = 10
msg.u32 = 20
msg.i64 = 30
msg.u64 = 40
msg.dbl = 50
msg.flt = 60
msg.bool = true
assert_equal(10, msg.i32)
assert_equal(20, msg.u32)
assert_equal(30, msg.i64)
assert_equal(40, msg.u64)
assert_equal(50, msg.dbl)
assert_equal(60, msg.flt)
assert_equal(true, msg.bool)
test_for_numeric_type(upb.TYPE_UINT32, 2^32 - 1, 2^32, -1, 5.1)
test_for_numeric_type(upb.TYPE_UINT64, 2^62, 2^64, -1, bad64)
test_for_numeric_type(upb.TYPE_INT32, 2^31 - 1, 2^31, -2^31 - 1, 5.1)
test_for_numeric_type(upb.TYPE_INT64, 2^61, 2^63, -2^64, bad64)
test_for_numeric_type(upb.TYPE_FLOAT, 2^20)
test_for_numeric_type(upb.TYPE_DOUBLE, 10^101)
end
function test_msg_array()
local symtab = upb.SymbolTable{
upb.MessageDef{full_name = "TestMessage", fields = {
upb.FieldDef{name = "i32_array", number = 1, type = upb.TYPE_INT32,
label = upb.LABEL_REPEATED},
}
}
}
factory = upb.MessageFactory(symtab)
TestMessage = factory:get_message_class("TestMessage")
msg = TestMessage()
assert_nil(msg.i32_array)
-- Can't assign a scalar; array is expected.
assert_error_match("lupb.array expected", function() msg.i32_array = 5 end)
-- Can't assign array of the wrong type.
local function assign_int64()
msg.i32_array = upb.Array(upb.TYPE_INT64)
end
assert_error_match("Array had incorrect type", assign_int64)
local arr = upb.Array(upb.TYPE_INT32)
msg.i32_array = arr
assert_equal(arr, msg.i32_array)
-- Can't assign other Lua types.
assert_error_match("array expected", function() msg.i32_array = "abc" end)
assert_error_match("array expected", function() msg.i32_array = true end)
assert_error_match("array expected", function() msg.i32_array = false end)
assert_error_match("array expected", function() msg.i32_array = nil end)
assert_error_match("array expected", function() msg.i32_array = {} end)
assert_error_match("array expected", function() msg.i32_array = print end)
end
function test_msg_submsg()
local symtab = upb.SymbolTable{
upb.MessageDef{full_name = "TestMessage", fields = {
upb.FieldDef{name = "submsg", number = 1, type = upb.TYPE_MESSAGE,
subdef_name = ".SubMessage"},
}
},
upb.MessageDef{full_name = "SubMessage"}
}
factory = upb.MessageFactory(symtab)
TestMessage = factory:get_message_class("TestMessage")
SubMessage = factory:get_message_class("SubMessage")
msg = TestMessage()
assert_nil(msg.submsg)
-- Can't assign message of the wrong type.
local function assign_int64()
msg.submsg = TestMessage()
end
assert_error_match("Message had incorrect type", assign_int64)
local sub = SubMessage()
msg.submsg = sub
assert_equal(sub, msg.submsg)
-- Can't assign other Lua types.
assert_error_match("msg expected", function() msg.submsg = "abc" end)
assert_error_match("msg expected", function() msg.submsg = true end)
assert_error_match("msg expected", function() msg.submsg = false end)
assert_error_match("msg expected", function() msg.submsg = nil end)
assert_error_match("msg expected", function() msg.submsg = {} end)
assert_error_match("msg expected", function() msg.submsg = print end)
end
-- Lua 5.1 and 5.2 have slightly different semantics for how a finalizer
-- can be defined in Lua.
if _VERSION >= 'Lua 5.2' then
function defer(fn)
setmetatable({}, { __gc = fn })
end
else
function defer(fn)
getmetatable(newproxy(true)).__gc = fn
end
end
function test_finalizer()
-- Tests that we correctly handle a call into an already-finalized object.
-- Collectible objects are finalized in the opposite order of creation.
do
local t = {}
defer(function()
assert_error_match("called into dead object", function()
-- Generic def call.
t[1]:full_name()
end)
assert_error_match("called into dead object", function()
-- Specific msgdef call.
t[1]:add()
end)
assert_error_match("called into dead object", function()
t[2]:values()
end)
assert_error_match("called into dead object", function()
t[3]:number()
end)
assert_error_match("called into dead object", function()
t[4]:lookup()
end)
end)
t = {
upb.MessageDef(),
upb.EnumDef(),
upb.FieldDef(),
upb.SymbolTable(),
}
end
collectgarbage()
end
local stats = lunit.main()
if stats.failed > 0 or stats.errors > 0 then
error("One or more errors in test suite")
end

@ -0,0 +1,80 @@
-- Require "pb" first to ensure that the transitive require of "upb" is
-- handled properly by the "pb" module.
local pb = require "upb.pb"
local upb = require "upb"
local lunit = require "lunit"
if _VERSION >= 'Lua 5.2' then
_ENV = lunit.module("testupb_pb", "seeall")
else
module("testupb_pb", lunit.testcase, package.seeall)
end
local symtab = upb.SymbolTable{
upb.MessageDef{full_name = "TestMessage", fields = {
upb.FieldDef{name = "i32", number = 1, type = upb.TYPE_INT32},
upb.FieldDef{name = "u32", number = 2, type = upb.TYPE_UINT32},
upb.FieldDef{name = "i64", number = 3, type = upb.TYPE_INT64},
upb.FieldDef{name = "u64", number = 4, type = upb.TYPE_UINT64},
upb.FieldDef{name = "dbl", number = 5, type = upb.TYPE_DOUBLE},
upb.FieldDef{name = "flt", number = 6, type = upb.TYPE_FLOAT},
upb.FieldDef{name = "bool", number = 7, type = upb.TYPE_BOOL},
}
}
}
local factory = upb.MessageFactory(symtab);
local TestMessage = factory:get_message_class("TestMessage")
function test_parse_primitive()
local binary_pb =
"\008\128\128\128\128\002\016\128\128\128\128\004\024\128\128"
.. "\128\128\128\128\128\002\032\128\128\128\128\128\128\128\001\041\000"
.. "\000\000\000\000\000\248\063\053\000\000\096\064\056\001"
local msg = TestMessage()
pb.decode(msg, binary_pb)
assert_equal(536870912, msg.i32)
assert_equal(1073741824, msg.u32)
assert_equal(1125899906842624, msg.i64)
assert_equal(562949953421312, msg.u64)
assert_equal(1.5, msg.dbl)
assert_equal(3.5, msg.flt)
assert_equal(true, msg.bool)
local encoded = pb.encode(msg)
local msg2 = TestMessage()
pb.decode(msg2, encoded)
assert_equal(536870912, msg.i32)
assert_equal(1073741824, msg.u32)
assert_equal(1125899906842624, msg.i64)
assert_equal(562949953421312, msg.u64)
assert_equal(1.5, msg.dbl)
assert_equal(3.5, msg.flt)
assert_equal(true, msg.bool)
end
function test_parse_string()
local symtab = upb.SymbolTable{
upb.MessageDef{full_name = "TestMessage", fields = {
upb.FieldDef{name = "str", number = 1, type = upb.TYPE_STRING},
}
}
}
local factory = upb.MessageFactory(symtab);
local TestMessage = factory:get_message_class("TestMessage")
local binary_pb = "\010\005Hello"
msg = TestMessage()
pb.decode(msg, binary_pb)
-- TODO(haberman): re-enable when this stuff works better.
-- assert_equal("Hello", msg.str)
end
local stats = lunit.main()
if stats.failed > 0 or stats.errors > 0 then
error("One or more errors in test suite")
end

@ -0,0 +1,62 @@
#!/usr/bin/ruby
#
# Tests for Ruby upb extension.
require 'test/unit'
require 'set'
require 'upb'
def get_descriptor
File.open("upb/descriptor/descriptor.pb").read
end
def load_descriptor
symtab = Upb::SymbolTable.new
symtab.load_descriptor(get_descriptor())
return symtab
end
def get_message_class(name)
return Upb.get_message_class(load_descriptor().lookup(name))
end
class TestRubyExtension < Test::Unit::TestCase
def test_parsedescriptor
msgdef = load_descriptor.lookup("google.protobuf.FileDescriptorSet")
assert_instance_of(Upb::MessageDef, msgdef)
file_descriptor_set = Upb.get_message_class(msgdef)
msg = file_descriptor_set.parse(get_descriptor())
# A couple message types we know should exist.
names = Set.new(["DescriptorProto", "FieldDescriptorProto"])
msg.file.each { |file|
file.message_type.each { |message_type|
names.delete(message_type.name)
}
}
assert_equal(0, names.size)
end
def test_parseserialize
field_descriptor_proto = get_message_class("google.protobuf.FieldDescriptorProto")
field_options = get_message_class("google.protobuf.FieldOptions")
field = field_descriptor_proto.new
field.name = "MyName"
field.number = 5
field.options = field_options.new
field.options.packed = true
serialized = Upb::Message.serialize(field)
field2 = field_descriptor_proto.parse(serialized)
assert_equal("MyName", field2.name)
assert_equal(5, field2.number)
assert_equal(true, field2.options.packed)
end
end

@ -0,0 +1,179 @@
/* This is a upb implementation of the upb conformance tests, see:
* https://github.com/google/protobuf/tree/master/conformance
*/
#include <errno.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "conformance/conformance.upb.h"
#include "src/google/protobuf/test_messages_proto3.upb.h"
int test_count = 0;
bool CheckedRead(int fd, void *buf, size_t len) {
size_t ofs = 0;
while (len > 0) {
ssize_t bytes_read = read(fd, (char*)buf + ofs, len);
if (bytes_read == 0) return false;
if (bytes_read < 0) {
perror("reading from test runner");
exit(1);
}
len -= bytes_read;
ofs += bytes_read;
}
return true;
}
void CheckedWrite(int fd, const void *buf, size_t len) {
if ((size_t)write(fd, buf, len) != len) {
perror("writing to test runner");
exit(1);
}
}
bool strview_eql(upb_strview view, const char *str) {
return view.size == strlen(str) && memcmp(view.data, str, view.size) == 0;
}
static const char *proto3_msg =
"protobuf_test_messages.proto3.TestAllTypesProto3";
void DoTest(
const conformance_ConformanceRequest* request,
conformance_ConformanceResponse *response,
upb_arena *arena) {
protobuf_test_messages_proto3_TestAllTypesProto3 *test_message;
if (!strview_eql(conformance_ConformanceRequest_message_type(request),
proto3_msg)) {
static const char msg[] = "Only proto3 for now.";
conformance_ConformanceResponse_set_skipped(
response, upb_strview_make(msg, sizeof(msg)));
return;
}
switch (conformance_ConformanceRequest_payload_case(request)) {
case conformance_ConformanceRequest_payload_protobuf_payload: {
upb_strview payload = conformance_ConformanceRequest_protobuf_payload(request);
test_message = protobuf_test_messages_proto3_TestAllTypesProto3_parse(
payload.data, payload.size, arena);
if (!test_message) {
static const char msg[] = "Parse error";
conformance_ConformanceResponse_set_parse_error(
response, upb_strview_make(msg, sizeof(msg)));
return;
}
break;
}
case conformance_ConformanceRequest_payload_NOT_SET:
fprintf(stderr, "conformance_upb: Request didn't have payload.\n");
return;
default: {
static const char msg[] = "Unsupported input format.";
conformance_ConformanceResponse_set_skipped(
response, upb_strview_make(msg, sizeof(msg)));
return;
}
}
switch (conformance_ConformanceRequest_requested_output_format(request)) {
case conformance_UNSPECIFIED:
fprintf(stderr, "conformance_upb: Unspecified output format.\n");
exit(1);
case conformance_PROTOBUF: {
size_t serialized_len;
char *serialized =
protobuf_test_messages_proto3_TestAllTypesProto3_serialize(
test_message, arena, &serialized_len);
if (!serialized) {
static const char msg[] = "Error serializing.";
conformance_ConformanceResponse_set_serialize_error(
response, upb_strview_make(msg, sizeof(msg)));
return;
}
conformance_ConformanceResponse_set_protobuf_payload(
response, upb_strview_make(serialized, serialized_len));
break;
}
default: {
static const char msg[] = "Unsupported output format.";
conformance_ConformanceResponse_set_skipped(
response, upb_strview_make(msg, sizeof(msg)));
return;
}
}
return;
}
bool DoTestIo(void) {
upb_arena *arena;
upb_alloc *alloc;
upb_status status;
char *serialized_input;
char *serialized_output;
uint32_t input_size;
size_t output_size;
conformance_ConformanceRequest *request;
conformance_ConformanceResponse *response;
if (!CheckedRead(STDIN_FILENO, &input_size, sizeof(uint32_t))) {
/* EOF. */
return false;
}
arena = upb_arena_new();
alloc = upb_arena_alloc(arena);
serialized_input = upb_malloc(alloc, input_size);
if (!CheckedRead(STDIN_FILENO, serialized_input, input_size)) {
fprintf(stderr, "conformance_upb: unexpected EOF on stdin.\n");
exit(1);
}
request =
conformance_ConformanceRequest_parse(serialized_input, input_size, arena);
response = conformance_ConformanceResponse_new(arena);
if (request) {
DoTest(request, response, arena);
} else {
fprintf(stderr, "conformance_upb: parse of ConformanceRequest failed: %s\n",
upb_status_errmsg(&status));
}
serialized_output = conformance_ConformanceResponse_serialize(
response, arena, &output_size);
CheckedWrite(STDOUT_FILENO, &output_size, sizeof(uint32_t));
CheckedWrite(STDOUT_FILENO, serialized_output, output_size);
test_count++;
upb_arena_free(arena);
return true;
}
int main(void) {
while (1) {
if (!DoTestIo()) {
fprintf(stderr, "conformance_upb: received EOF from test runner "
"after %d tests, exiting\n", test_count);
return 0;
}
}
}

@ -0,0 +1 @@
Required.ProtobufInput.PrematureEofInSubmessageValue.MESSAGE

@ -0,0 +1 @@
Corpus folder for fuzzing

@ -0,0 +1 @@
// Hello World

@ -0,0 +1,15 @@
#include <cstdint>
#include "google/protobuf/descriptor.upb.h"
#include "upb/upb.h"
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
upb::Arena arena;
google_protobuf_FileDescriptorProto_parse(reinterpret_cast<const char*>(data),
size, arena.ptr());
return 0;
}
#ifndef HAVE_FUZZER
int main() {}
#endif

Binary file not shown.

Binary file not shown.

@ -0,0 +1,149 @@
package benchmarks;
option optimize_for = SPEED;
enum Foo {
FOO_VALUE = 1;
FOO_VALUE2 = 2;
}
message Simple {
message M2 {
optional int32 f1 = 1234567;
}
optional M2 m2 = 1;
}
message SpeedMessage1 {
required string field1 = 1;
optional string field9 = 9;
optional string field18 = 18;
optional bool field80 = 80 [default=false];
optional bool field81 = 81 [default=true];
required int32 field2 = 2;
required int32 field3 = 3;
optional int32 field280 = 280;
optional int32 field6 = 6 [default=0];
optional int64 field22 = 22;
optional string field4 = 4;
repeated fixed64 field5 = 5;
optional bool field59 = 59 [default=false];
optional string field7 = 7;
optional int32 field16 = 16;
optional int32 field130 = 130 [default=0];
optional bool field12 = 12 [default=true];
optional bool field17 = 17 [default=true];
optional bool field13 = 13 [default=true];
optional bool field14 = 14 [default=true];
optional int32 field104 = 104 [default=0];
optional int32 field100 = 100 [default=0];
optional int32 field101 = 101 [default=0];
optional string field102 = 102;
optional string field103 = 103;
optional int32 field29 = 29 [default=0];
optional bool field30 = 30 [default=false];
optional int32 field60 = 60 [default=-1];
optional int32 field271 = 271 [default=-1];
optional int32 field272 = 272 [default=-1];
optional int32 field150 = 150;
optional int32 field23 = 23 [default=0];
optional bool field24 = 24 [default=false];
optional int32 field25 = 25 [default=0];
optional SpeedMessage1SubMessage field15 = 15;
optional bool field78 = 78;
optional int32 field67 = 67 [default=0];
optional int32 field68 = 68;
optional int32 field128 = 128 [default=0];
optional string field129 = 129 [default="xxxxxxxxxxxxxxxxxxxxx"];
optional int32 field131 = 131 [default=0];
optional Foo field132 = 132 [default=FOO_VALUE];
}
message SpeedMessage1SubMessage {
optional int32 field1 = 1 [default=0];
optional int32 field2 = 2 [default=0];
optional int32 field3 = 3 [default=0];
optional string field15 = 15 [default="FOOBAR!"];
optional bool field12 = 12 [default=true];
optional int64 field13 = 13;
optional int64 field14 = 14;
optional int32 field16 = 16;
optional int32 field19 = 19 [default=2];
optional bool field20 = 20 [default=true];
optional bool field28 = 28 [default=true];
optional fixed64 field21 = 21;
optional int32 field22 = 22;
optional bool field23 = 23 [ default=false ];
optional bool field206 = 206 [default=false];
optional fixed32 field203 = 203;
optional int32 field204 = 204;
optional string field205 = 205;
optional uint64 field207 = 207;
optional uint64 field300 = 300;
}
message SpeedMessage2 {
optional string field1 = 1;
optional int64 field3 = 3;
optional int64 field4 = 4;
optional int64 field30 = 30;
optional bool field75 = 75 [default=false];
optional string field6 = 6;
optional bytes field2 = 2;
optional int32 field21 = 21 [default=0];
optional int32 field71 = 71;
optional float field25 = 25;
optional int32 field109 = 109 [default=0];
optional int32 field210 = 210 [default=0];
optional int32 field211 = 211 [default=0];
optional int32 field212 = 212 [default=0];
optional int32 field213 = 213 [default=0];
optional int32 field216 = 216 [default=0];
optional int32 field217 = 217 [default=0];
optional int32 field218 = 218 [default=0];
optional int32 field220 = 220 [default=0];
optional int32 field221 = 221 [default=0];
optional float field222 = 222 [default=0.0];
optional int32 field63 = 63;
repeated group Group1 = 10 {
required float field11 = 11;
optional float field26 = 26;
optional string field12 = 12;
optional string field13 = 13;
repeated string field14 = 14;
required uint64 field15 = 15;
optional int32 field5 = 5;
optional string field27 = 27;
optional int32 field28 = 28;
optional string field29 = 29;
optional string field16 = 16;
repeated string field22 = 22;
repeated int32 field73 = 73;
optional int32 field20 = 20 [default=0];
optional string field24 = 24;
optional SpeedMessage2GroupedMessage field31 = 31;
}
repeated string field128 = 128;
optional int64 field131 = 131;
repeated string field127 = 127;
optional int32 field129 = 129;
repeated int64 field130 = 130;
optional bool field205 = 205 [default=false];
optional bool field206 = 206 [default=false];
}
message SpeedMessage2GroupedMessage {
optional float field1 = 1;
optional float field2 = 2;
optional float field3 = 3 [default=0.0];
optional bool field4 = 4;
optional bool field5 = 5;
optional bool field6 = 6 [default=true];
optional bool field7 = 7 [default=false];
optional float field8 = 8;
optional bool field9 = 9;
optional float field10 = 10;
optional int64 field11 = 11;
}

@ -0,0 +1,9 @@
syntax = "proto2";
import "tests/json/test.proto";
package upb.test.json;
message ImportEnum {
optional MyEnum e = 1;
}

@ -0,0 +1,47 @@
syntax = "proto3";
package upb.test.json;
message TestMessage {
int32 optional_int32 = 1;
int64 optional_int64 = 2;
int32 optional_uint32 = 3;
int64 optional_uint64 = 4;
string optional_string = 5;
bytes optional_bytes = 6;
bool optional_bool = 7;
SubMessage optional_msg = 8;
MyEnum optional_enum = 9;
repeated int32 repeated_int32 = 11;
repeated int64 repeated_int64 = 12;
repeated uint32 repeated_uint32 = 13;
repeated uint64 repeated_uint64 = 14;
repeated string repeated_string = 15;
repeated bytes repeated_bytes = 16;
repeated bool repeated_bool = 17;
repeated SubMessage repeated_msg = 18;
repeated MyEnum repeated_enum = 19;
map<string, string> map_string_string = 20;
map<int32, string> map_int32_string = 21;
map<bool, string> map_bool_string = 22;
map<string, int32> map_string_int32 = 23;
map<string, bool> map_string_bool = 24;
map<string, SubMessage> map_string_msg = 25;
oneof o {
int32 oneof_int32 = 26;
int64 oneof_int64 = 27;
}
}
message SubMessage {
int32 foo = 1;
}
enum MyEnum {
A = 0;
B = 1;
C = 2;
}

Binary file not shown.

@ -0,0 +1,256 @@
/*
*
* A set of tests for JSON parsing and serialization.
*/
#include "tests/json/test.upbdefs.h"
#include "tests/json/test.upb.h" // Test that it compiles for C++.
#include "tests/test_util.h"
#include "tests/upb_test.h"
#include "upb/handlers.h"
#include "upb/json/parser.h"
#include "upb/json/printer.h"
#include "upb/upb.h"
#include <string>
#include "upb/port_def.inc"
// Macros for readability in test case list: allows us to give TEST("...") /
// EXPECT("...") pairs.
#define TEST(x) x
#define EXPECT_SAME NULL
#define EXPECT(x) x
#define TEST_SENTINEL { NULL, NULL }
struct TestCase {
const char* input;
const char* expected;
};
bool verbose = false;
static TestCase kTestRoundtripMessages[] = {
// Test most fields here.
{
TEST("{\"optionalInt32\":-42,\"optionalString\":\"Test\\u0001Message\","
"\"optionalMsg\":{\"foo\":42},"
"\"optionalBool\":true,\"repeatedMsg\":[{\"foo\":1},"
"{\"foo\":2}]}"),
EXPECT_SAME
},
// We must also recognize raw proto names.
{
TEST("{\"optional_int32\":-42,\"optional_string\":\"Test\\u0001Message\","
"\"optional_msg\":{\"foo\":42},"
"\"optional_bool\":true,\"repeated_msg\":[{\"foo\":1},"
"{\"foo\":2}]}"),
EXPECT("{\"optionalInt32\":-42,\"optionalString\":\"Test\\u0001Message\","
"\"optionalMsg\":{\"foo\":42},"
"\"optionalBool\":true,\"repeatedMsg\":[{\"foo\":1},"
"{\"foo\":2}]}")
},
// Test special escapes in strings.
{
TEST("{\"repeatedString\":[\"\\b\",\"\\r\",\"\\n\",\"\\f\",\"\\t\","
"\"\uFFFF\"]}"),
EXPECT_SAME
},
// Test enum symbolic names.
{
// The common case: parse and print the symbolic name.
TEST("{\"optionalEnum\":\"A\"}"),
EXPECT_SAME
},
{
// Unknown enum value: will be printed as an integer.
TEST("{\"optionalEnum\":42}"),
EXPECT_SAME
},
{
// Known enum value: we're happy to parse an integer but we will re-emit the
// symbolic name.
TEST("{\"optionalEnum\":1}"),
EXPECT("{\"optionalEnum\":\"B\"}")
},
// UTF-8 tests: escapes -> literal UTF8 in output.
{
// Note double escape on \uXXXX: we want the escape to be processed by the
// JSON parser, not by the C++ compiler!
TEST("{\"optionalString\":\"\\u007F\"}"),
EXPECT("{\"optionalString\":\"\x7F\"}")
},
{
TEST("{\"optionalString\":\"\\u0080\"}"),
EXPECT("{\"optionalString\":\"\xC2\x80\"}")
},
{
TEST("{\"optionalString\":\"\\u07FF\"}"),
EXPECT("{\"optionalString\":\"\xDF\xBF\"}")
},
{
TEST("{\"optionalString\":\"\\u0800\"}"),
EXPECT("{\"optionalString\":\"\xE0\xA0\x80\"}")
},
{
TEST("{\"optionalString\":\"\\uFFFF\"}"),
EXPECT("{\"optionalString\":\"\xEF\xBF\xBF\"}")
},
// map-field tests
{
TEST("{\"mapStringString\":{\"a\":\"value1\",\"b\":\"value2\","
"\"c\":\"value3\"}}"),
EXPECT_SAME
},
{
TEST("{\"mapInt32String\":{\"1\":\"value1\",\"-1\":\"value2\","
"\"1234\":\"value3\"}}"),
EXPECT_SAME
},
{
TEST("{\"mapBoolString\":{\"false\":\"value1\",\"true\":\"value2\"}}"),
EXPECT_SAME
},
{
TEST("{\"mapStringInt32\":{\"asdf\":1234,\"jkl;\":-1}}"),
EXPECT_SAME
},
{
TEST("{\"mapStringBool\":{\"asdf\":true,\"jkl;\":false}}"),
EXPECT_SAME
},
{
TEST("{\"mapStringMsg\":{\"asdf\":{\"foo\":42},\"jkl;\":{\"foo\":84}}}"),
EXPECT_SAME
},
TEST_SENTINEL
};
static TestCase kTestRoundtripMessagesPreserve[] = {
// Test most fields here.
{
TEST("{\"optional_int32\":-42,\"optional_string\":\"Test\\u0001Message\","
"\"optional_msg\":{\"foo\":42},"
"\"optional_bool\":true,\"repeated_msg\":[{\"foo\":1},"
"{\"foo\":2}]}"),
EXPECT_SAME
},
TEST_SENTINEL
};
class StringSink {
public:
StringSink() {
upb_byteshandler_init(&byteshandler_);
upb_byteshandler_setstring(&byteshandler_, &str_handler, NULL);
upb_bytessink_reset(&bytessink_, &byteshandler_, &s_);
}
~StringSink() { }
upb_bytessink Sink() { return bytessink_; }
const std::string& Data() { return s_; }
private:
static size_t str_handler(void* _closure, const void* hd,
const char* data, size_t len,
const upb_bufhandle* handle) {
UPB_UNUSED(hd);
UPB_UNUSED(handle);
std::string* s = static_cast<std::string*>(_closure);
std::string appended(data, len);
s->append(data, len);
return len;
}
upb_byteshandler byteshandler_;
upb_bytessink bytessink_;
std::string s_;
};
void test_json_roundtrip_message(const char* json_src,
const char* json_expected,
const upb::Handlers* serialize_handlers,
const upb::json::ParserMethodPtr parser_method,
int seam) {
VerboseParserEnvironment env(verbose);
StringSink data_sink;
upb::json::PrinterPtr printer = upb::json::PrinterPtr::Create(
env.arena(), serialize_handlers, data_sink.Sink());
upb::json::ParserPtr parser = upb::json::ParserPtr::Create(
env.arena(), parser_method, NULL, printer.input(), env.status(), false);
env.ResetBytesSink(parser.input());
env.Reset(json_src, strlen(json_src), false, false);
bool ok = env.Start() &&
env.ParseBuffer(seam) &&
env.ParseBuffer(-1) &&
env.End();
ASSERT(ok);
ASSERT(env.CheckConsistency());
if (memcmp(json_expected,
data_sink.Data().data(),
data_sink.Data().size())) {
fprintf(stderr,
"JSON parse/serialize roundtrip result differs:\n"
"Original:\n%s\nParsed/Serialized:\n%s\n",
json_src, data_sink.Data().c_str());
abort();
}
}
// Starts with a message in JSON format, parses and directly serializes again,
// and compares the result.
void test_json_roundtrip() {
upb::SymbolTable symtab;
upb::HandlerCache serialize_handlercache(
upb::json::PrinterPtr::NewCache(false));
upb::json::CodeCache parse_codecache;
upb::MessageDefPtr md(upb_test_json_TestMessage_getmsgdef(symtab.ptr()));
ASSERT(md);
const upb::Handlers* serialize_handlers = serialize_handlercache.Get(md);
const upb::json::ParserMethodPtr parser_method = parse_codecache.Get(md);
ASSERT(serialize_handlers);
for (const TestCase* test_case = kTestRoundtripMessages;
test_case->input != NULL; test_case++) {
const char *expected =
(test_case->expected == EXPECT_SAME) ?
test_case->input :
test_case->expected;
for (size_t i = 0; i < strlen(test_case->input); i++) {
test_json_roundtrip_message(test_case->input, expected,
serialize_handlers, parser_method, i);
}
}
serialize_handlercache = upb::json::PrinterPtr::NewCache(true);
serialize_handlers = serialize_handlercache.Get(md);
for (const TestCase* test_case = kTestRoundtripMessagesPreserve;
test_case->input != NULL; test_case++) {
const char *expected =
(test_case->expected == EXPECT_SAME) ?
test_case->input :
test_case->expected;
for (size_t i = 0; i < strlen(test_case->input); i++) {
test_json_roundtrip_message(test_case->input, expected,
serialize_handlers, parser_method, i);
}
}
}
extern "C" {
int run_tests(int argc, char *argv[]) {
UPB_UNUSED(argc);
UPB_UNUSED(argv);
test_json_roundtrip();
return 0;
}
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,128 @@
syntax = "proto2";
enum TestEnum {
FOO = 1;
}
message Empty {}
message DecoderTest {
optional double f_double = 1;
optional float f_float = 2;
optional int64 f_int64 = 3;
optional uint64 f_uint64 = 4;
optional int32 f_int32 = 5;
optional fixed64 f_fixed64 = 6;
optional fixed32 f_fixed32 = 7;
optional bool f_bool = 8;
optional string f_string = 9;
optional DecoderTest f_message = 11;
optional bytes f_bytes = 12;
optional uint32 f_uint32 = 13;
optional TestEnum f_enum = 14;
optional sfixed32 f_sfixed32 = 15;
optional sfixed64 f_sfixed64 = 16;
optional sint32 f_sint32 = 17;
optional sint64 f_sint64 = 18;
optional string nop_field = 40;
repeated double r_double = 536869912;
repeated float r_float = 536869913;
repeated int64 r_int64 = 536869914;
repeated uint64 r_uint64 = 536869915;
repeated int32 r_int32 = 536869916;
repeated fixed64 r_fixed64 = 536869917;
repeated fixed32 r_fixed32 = 536869918;
repeated bool r_bool = 536869919;
repeated string r_string = 536869920;
repeated DecoderTest r_message = 536869922;
repeated bytes r_bytes = 536869923;
repeated uint32 r_uint32 = 536869924;
repeated TestEnum r_enum = 536869925;
repeated sfixed32 r_sfixed32 = 536869926;
repeated sfixed64 r_sfixed64 = 536869927;
repeated sint32 r_sint32 = 536869928;
repeated sint64 r_sint64 = 536869929;
optional group F_group = 10 {
optional double f_double = 1;
optional float f_float = 2;
optional int64 f_int64 = 3;
optional uint64 f_uint64 = 4;
optional int32 f_int32 = 5;
optional fixed64 f_fixed64 = 6;
optional fixed32 f_fixed32 = 7;
optional bool f_bool = 8;
optional string f_string = 9;
optional DecoderTest f_message = 11;
optional bytes f_bytes = 12;
optional uint32 f_uint32 = 13;
optional TestEnum f_enum = 14;
optional sfixed32 f_sfixed32 = 15;
optional sfixed64 f_sfixed64 = 16;
optional sint32 f_sint32 = 17;
optional sint64 f_sint64 = 18;
optional string nop_field = 40;
repeated double r_double = 536869912;
repeated float r_float = 536869913;
repeated int64 r_int64 = 536869914;
repeated uint64 r_uint64 = 536869915;
repeated int32 r_int32 = 536869916;
repeated fixed64 r_fixed64 = 536869917;
repeated fixed32 r_fixed32 = 536869918;
repeated bool r_bool = 536869919;
repeated string r_string = 536869920;
repeated DecoderTest r_message = 536869922;
repeated bytes r_bytes = 536869923;
repeated uint32 r_uint32 = 536869924;
repeated TestEnum r_enum = 536869925;
repeated sfixed32 r_sfixed32 = 536869926;
repeated sfixed64 r_sfixed64 = 536869927;
repeated sint32 r_sint32 = 536869928;
repeated sint64 r_sint64 = 536869929;
}
optional group R_group = 536869921 {
optional double f_double = 1;
optional float f_float = 2;
optional int64 f_int64 = 3;
optional uint64 f_uint64 = 4;
optional int32 f_int32 = 5;
optional fixed64 f_fixed64 = 6;
optional fixed32 f_fixed32 = 7;
optional bool f_bool = 8;
optional string f_string = 9;
optional DecoderTest f_message = 11;
optional bytes f_bytes = 12;
optional uint32 f_uint32 = 13;
optional TestEnum f_enum = 14;
optional sfixed32 f_sfixed32 = 15;
optional sfixed64 f_sfixed64 = 16;
optional sint32 f_sint32 = 17;
optional sint64 f_sint64 = 18;
optional string nop_field = 40;
repeated double r_double = 536869912;
repeated float r_float = 536869913;
repeated int64 r_int64 = 536869914;
repeated uint64 r_uint64 = 536869915;
repeated int32 r_int32 = 536869916;
repeated fixed64 r_fixed64 = 536869917;
repeated fixed32 r_fixed32 = 536869918;
repeated bool r_bool = 536869919;
repeated string r_string = 536869920;
repeated DecoderTest r_message = 536869922;
repeated bytes r_bytes = 536869923;
repeated uint32 r_uint32 = 536869924;
repeated TestEnum r_enum = 536869925;
repeated sfixed32 r_sfixed32 = 536869926;
repeated sfixed64 r_sfixed64 = 536869927;
repeated sint32 r_sint32 = 536869928;
repeated sint64 r_sint64 = 536869929;
}
}

@ -0,0 +1,48 @@
#include "tests/test_util.h"
#include "tests/upb_test.h"
#include "upb/bindings/stdc++/string.h"
#include "google/protobuf/descriptor.upb.h"
#include "google/protobuf/descriptor.upbdefs.h"
#include "upb/pb/decoder.h"
#include "upb/pb/encoder.h"
#include "upb/port_def.inc"
#include <iostream>
void test_pb_roundtrip() {
std::string input(
google_protobuf_descriptor_proto_upbdefinit.descriptor.data,
google_protobuf_descriptor_proto_upbdefinit.descriptor.size);
std::cout << input.size() << "\n";
upb::SymbolTable symtab;
upb::HandlerCache encoder_cache(upb::pb::EncoderPtr::NewCache());
upb::pb::CodeCache decoder_cache(&encoder_cache);
upb::Arena arena;
upb::Status status;
upb::MessageDefPtr md(
google_protobuf_FileDescriptorProto_getmsgdef(symtab.ptr()));
ASSERT(md);
const upb::Handlers *encoder_handlers = encoder_cache.Get(md);
ASSERT(encoder_handlers);
const upb::pb::DecoderMethodPtr method = decoder_cache.Get(md);
std::string output;
upb::StringSink string_sink(&output);
upb::pb::EncoderPtr encoder =
upb::pb::EncoderPtr::Create(&arena, encoder_handlers, string_sink.input());
upb::pb::DecoderPtr decoder =
upb::pb::DecoderPtr::Create(&arena, method, encoder.input(), &status);
bool ok = upb::PutBuffer(input, decoder.input());
ASSERT(ok);
ASSERT(input == output);
}
extern "C" {
int run_tests(int argc, char *argv[]) {
UPB_UNUSED(argc);
UPB_UNUSED(argv);
test_pb_roundtrip();
return 0;
}
}

@ -0,0 +1,117 @@
#include <stdio.h>
#include "upb/pb/varint.int.h"
#include "tests/upb_test.h"
#include "upb/port_def.inc"
/* Test that we can round-trip from int->varint->int. */
static void test_varint_for_num(upb_decoderet (*decoder)(const char*),
uint64_t num) {
char buf[16];
size_t bytes;
upb_decoderet r;
memset(buf, 0xff, sizeof(buf));
bytes = upb_vencode64(num, buf);
if (num <= UINT32_MAX) {
uint64_t encoded = upb_vencode32(num);
char buf2[16];
upb_decoderet r;
memset(buf2, 0, sizeof(buf2));
memcpy(&buf2, &encoded, 8);
#ifdef UPB_BIG_ENDIAN
char swap[8];
swap[0] = buf2[7];
swap[1] = buf2[6];
swap[2] = buf2[5];
swap[3] = buf2[4];
swap[4] = buf2[3];
swap[5] = buf2[2];
swap[6] = buf2[1];
swap[7] = buf2[0];
buf2[0] = swap[0];
buf2[1] = swap[1];
buf2[2] = swap[2];
buf2[3] = swap[3];
buf2[4] = swap[4];
buf2[5] = swap[5];
buf2[6] = swap[6];
buf2[7] = swap[7];
#endif
r = decoder(buf2);
ASSERT(r.val == num);
ASSERT(r.p == buf2 + upb_value_size(encoded));
ASSERT(upb_zzenc_32(upb_zzdec_32(num)) == num);
}
r = decoder(buf);
ASSERT(r.val == num);
ASSERT(r.p == buf + bytes);
ASSERT(upb_zzenc_64(upb_zzdec_64(num)) == num);
}
static void test_varint_decoder(upb_decoderet (*decoder)(const char*)) {
#define TEST(bytes, expected_val) {\
size_t n = sizeof(bytes) - 1; /* for NULL */ \
char buf[UPB_PB_VARINT_MAX_LEN]; \
upb_decoderet r; \
memset(buf, 0xff, sizeof(buf)); \
memcpy(buf, bytes, n); \
r = decoder(buf); \
ASSERT(r.val == expected_val); \
ASSERT(r.p == buf + n); \
}
uint64_t num;
char twelvebyte[16] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1};
const char *twelvebyte_buf = twelvebyte;
/* A varint that terminates before hitting the end of the provided buffer,
* but in too many bytes (11 instead of 10). */
upb_decoderet r = decoder(twelvebyte_buf);
ASSERT(r.p == NULL);
TEST("\x00", 0ULL);
TEST("\x01", 1ULL);
TEST("\x81\x14", 0xa01ULL);
TEST("\x81\x03", 0x181ULL);
TEST("\x81\x83\x07", 0x1c181ULL);
TEST("\x81\x83\x87\x0f", 0x1e1c181ULL);
TEST("\x81\x83\x87\x8f\x1f", 0x1f1e1c181ULL);
TEST("\x81\x83\x87\x8f\x9f\x3f", 0x1f9f1e1c181ULL);
TEST("\x81\x83\x87\x8f\x9f\xbf\x7f", 0x1fdf9f1e1c181ULL);
TEST("\x81\x83\x87\x8f\x9f\xbf\xff\x01", 0x3fdf9f1e1c181ULL);
TEST("\x81\x83\x87\x8f\x9f\xbf\xff\x81\x03", 0x303fdf9f1e1c181ULL);
TEST("\x81\x83\x87\x8f\x9f\xbf\xff\x81\x83\x07", 0x8303fdf9f1e1c181ULL);
#undef TEST
for (num = 5; num * 1.5 < UINT64_MAX; num *= 1.5) {
test_varint_for_num(decoder, num);
}
test_varint_for_num(decoder, 0);
}
#define TEST_VARINT_DECODER(decoder) \
/* Create non-inline versions for convenient inspection of assembly language \
* output. */ \
upb_decoderet _upb_vdecode_ ## decoder(const char *p) { \
return upb_vdecode_ ## decoder(p); \
} \
void test_ ## decoder(void) { \
test_varint_decoder(&_upb_vdecode_ ## decoder); \
} \
TEST_VARINT_DECODER(check2_branch32)
TEST_VARINT_DECODER(check2_branch64)
int run_tests(int argc, char *argv[]) {
UPB_UNUSED(argc);
UPB_UNUSED(argv);
test_check2_branch32();
test_check2_branch64();
return 0;
}

@ -0,0 +1,68 @@
// A series of messages with various kinds of cycles in them.
// +-+---+ +---+ +---+
// V | | V | V |
// A -> B-+-> C -> D---+--->E---+
// ^ |`---|--------^
// +----------+----+ F
syntax = "proto2";
message A {
optional B b = 1;
}
message B {
optional B b = 1;
optional C c = 2;
}
message C {
optional A a = 1;
optional B b = 2;
optional D d = 3;
optional E e = 4;
}
message D {
optional A a = 1;
optional D d = 2;
optional E e = 3;
}
message E {
optional E e = 1;
}
message F {
optional E e = 1;
}
// A proto with a bunch of simple primitives.
message SimplePrimitives {
optional fixed64 u64 = 1;
optional fixed32 u32 = 2;
optional double dbl = 3;
optional float flt = 5;
optional sint64 i64 = 6;
optional sint32 i32 = 7;
optional bool b = 8;
optional string str = 9;
oneof foo {
int32 oneof_int32 = 10;
string oneof_string = 11;
}
oneof bar {
int64 oneof_int64 = 13;
bytes oneof_bytes = 14;
}
message Nested {
oneof foo {
int32 oneof_int32 = 10;
string b = 11;
}
}
}

Binary file not shown.

@ -0,0 +1,957 @@
/*
*
* Tests for C++ wrappers.
*/
#include <stdio.h>
#include <string.h>
#include <fstream>
#include <iostream>
#include <set>
#include <sstream>
#include "tests/test_cpp.upbdefs.h"
#include "tests/upb_test.h"
#include "upb/def.h"
#include "upb/handlers.h"
#include "upb/pb/decoder.h"
#include "upb/pb/textprinter.h"
#include "upb/port_def.inc"
#include "upb/upb.h"
template <class T>
void AssertInsert(T* const container, const typename T::value_type& val) {
bool inserted = container->insert(val).second;
ASSERT(inserted);
}
//
// Tests for registering and calling handlers in all their variants.
// This test code is very repetitive because we have to declare each
// handler function variant separately, and they all have different
// signatures so it does not lend itself well to templates.
//
// We test three handler types:
// StartMessage (no data params)
// Int32 (1 data param (int32_t))
// String Buf (2 data params (const char*, size_t))
//
// For each handler type we test all 8 handler variants:
// (handler data?) x (function/method) x (returns {void, success})
//
// The one notable thing we don't test at the moment is
// StartSequence/StartString handlers: these are different from StartMessage()
// in that they return void* for the sub-closure. But this is exercised in
// other tests.
//
static const int kExpectedHandlerData = 1232323;
class StringBufTesterBase {
public:
static const int kFieldNumber = 3;
StringBufTesterBase() : seen_(false), handler_data_val_(0) {}
void CallAndVerify(upb::Sink sink, upb::FieldDefPtr f) {
upb_selector_t start;
ASSERT(upb_handlers_getselector(f.ptr(), UPB_HANDLER_STARTSTR, &start));
upb_selector_t str;
ASSERT(upb_handlers_getselector(f.ptr(), UPB_HANDLER_STRING, &str));
ASSERT(!seen_);
upb::Sink sub;
sink.StartMessage();
sink.StartString(start, 0, &sub);
size_t ret = sub.PutStringBuffer(str, &buf_, 5, &handle_);
ASSERT(seen_);
ASSERT(len_ == 5);
ASSERT(ret == 5);
ASSERT(handler_data_val_ == kExpectedHandlerData);
}
protected:
bool seen_;
int handler_data_val_;
size_t len_;
char buf_;
upb_bufhandle handle_;
};
// Test 8 combinations of:
// (handler data?) x (buffer handle?) x (function/method)
//
// Then we add one test each for this variation: to prevent combinatorial
// explosion of these tests we don't test the full 16 combinations, but
// rely on our knowledge that the implementation processes the return wrapping
// in a second separate and independent stage:
//
// (function/method)
class StringBufTesterVoidMethodNoHandlerDataNoHandle
: public StringBufTesterBase {
public:
typedef StringBufTesterVoidMethodNoHandlerDataNoHandle ME;
void Register(upb::HandlersPtr h, upb::FieldDefPtr f) {
UPB_UNUSED(f);
ASSERT(h.SetStringHandler(f, UpbMakeHandler(&ME::Handler)));
handler_data_val_ = kExpectedHandlerData;
}
private:
void Handler(const char *buf, size_t len) {
ASSERT(buf == &buf_);
seen_ = true;
len_ = len;
}
};
class StringBufTesterVoidMethodNoHandlerDataWithHandle
: public StringBufTesterBase {
public:
typedef StringBufTesterVoidMethodNoHandlerDataWithHandle ME;
void Register(upb::HandlersPtr h, upb::FieldDefPtr f) {
UPB_UNUSED(f);
ASSERT(h.SetStringHandler(f, UpbMakeHandler(&ME::Handler)));
handler_data_val_ = kExpectedHandlerData;
}
private:
void Handler(const char *buf, size_t len, const upb_bufhandle* handle) {
ASSERT(buf == &buf_);
ASSERT(handle == &handle_);
seen_ = true;
len_ = len;
}
};
class StringBufTesterVoidMethodWithHandlerDataNoHandle
: public StringBufTesterBase {
public:
typedef StringBufTesterVoidMethodWithHandlerDataNoHandle ME;
void Register(upb::HandlersPtr h, upb::FieldDefPtr f) {
UPB_UNUSED(f);
ASSERT(h.SetStringHandler(
f, UpbBind(&ME::Handler, new int(kExpectedHandlerData))));
}
private:
void Handler(const int* hd, const char *buf, size_t len) {
ASSERT(buf == &buf_);
handler_data_val_ = *hd;
seen_ = true;
len_ = len;
}
};
class StringBufTesterVoidMethodWithHandlerDataWithHandle
: public StringBufTesterBase {
public:
typedef StringBufTesterVoidMethodWithHandlerDataWithHandle ME;
void Register(upb::HandlersPtr h, upb::FieldDefPtr f) {
UPB_UNUSED(f);
ASSERT(h.SetStringHandler(
f, UpbBind(&ME::Handler, new int(kExpectedHandlerData))));
}
private:
void Handler(const int* hd, const char* buf, size_t len,
const upb_bufhandle* handle) {
ASSERT(buf == &buf_);
ASSERT(handle == &handle_);
handler_data_val_ = *hd;
seen_ = true;
len_ = len;
}
};
class StringBufTesterVoidFunctionNoHandlerDataNoHandle
: public StringBufTesterBase {
public:
typedef StringBufTesterVoidFunctionNoHandlerDataNoHandle ME;
void Register(upb::HandlersPtr h, upb::FieldDefPtr f) {
UPB_UNUSED(f);
ASSERT(h.SetStringHandler(f, UpbMakeHandler(&ME::Handler)));
handler_data_val_ = kExpectedHandlerData;
}
private:
static void Handler(ME* t, const char *buf, size_t len) {
ASSERT(buf == &t->buf_);
t->seen_ = true;
t->len_ = len;
}
};
class StringBufTesterVoidFunctionNoHandlerDataWithHandle
: public StringBufTesterBase {
public:
typedef StringBufTesterVoidFunctionNoHandlerDataWithHandle ME;
void Register(upb::HandlersPtr h, upb::FieldDefPtr f) {
UPB_UNUSED(f);
ASSERT(h.SetStringHandler(f, UpbMakeHandler(&ME::Handler)));
handler_data_val_ = kExpectedHandlerData;
}
private:
static void Handler(ME* t, const char* buf, size_t len,
const upb_bufhandle* handle) {
ASSERT(buf == &t->buf_);
ASSERT(handle == &t->handle_);
t->seen_ = true;
t->len_ = len;
}
};
class StringBufTesterVoidFunctionWithHandlerDataNoHandle
: public StringBufTesterBase {
public:
typedef StringBufTesterVoidFunctionWithHandlerDataNoHandle ME;
void Register(upb::HandlersPtr h, upb::FieldDefPtr f) {
UPB_UNUSED(f);
ASSERT(h.SetStringHandler(
f, UpbBind(&ME::Handler, new int(kExpectedHandlerData))));
}
private:
static void Handler(ME* t, const int* hd, const char *buf, size_t len) {
ASSERT(buf == &t->buf_);
t->handler_data_val_ = *hd;
t->seen_ = true;
t->len_ = len;
}
};
class StringBufTesterVoidFunctionWithHandlerDataWithHandle
: public StringBufTesterBase {
public:
typedef StringBufTesterVoidFunctionWithHandlerDataWithHandle ME;
void Register(upb::HandlersPtr h, upb::FieldDefPtr f) {
UPB_UNUSED(f);
ASSERT(h.SetStringHandler(
f, UpbBind(&ME::Handler, new int(kExpectedHandlerData))));
}
private:
static void Handler(ME* t, const int* hd, const char* buf, size_t len,
const upb_bufhandle* handle) {
ASSERT(buf == &t->buf_);
ASSERT(handle == &t->handle_);
t->handler_data_val_ = *hd;
t->seen_ = true;
t->len_ = len;
}
};
class StringBufTesterSizeTMethodNoHandlerDataNoHandle
: public StringBufTesterBase {
public:
typedef StringBufTesterSizeTMethodNoHandlerDataNoHandle ME;
void Register(upb::HandlersPtr h, upb::FieldDefPtr f) {
UPB_UNUSED(f);
ASSERT(h.SetStringHandler(f, UpbMakeHandler(&ME::Handler)));
handler_data_val_ = kExpectedHandlerData;
}
private:
size_t Handler(const char *buf, size_t len) {
ASSERT(buf == &buf_);
seen_ = true;
len_ = len;
return len;
}
};
class StringBufTesterBoolMethodNoHandlerDataNoHandle
: public StringBufTesterBase {
public:
typedef StringBufTesterBoolMethodNoHandlerDataNoHandle ME;
void Register(upb::HandlersPtr h, upb::FieldDefPtr f) {
UPB_UNUSED(f);
ASSERT(h.SetStringHandler(f, UpbMakeHandler(&ME::Handler)));
handler_data_val_ = kExpectedHandlerData;
}
private:
bool Handler(const char *buf, size_t len) {
ASSERT(buf == &buf_);
seen_ = true;
len_ = len;
return true;
}
};
class StartMsgTesterBase {
public:
// We don't need the FieldDef it will create, but the test harness still
// requires that we provide one.
static const int kFieldNumber = 3;
StartMsgTesterBase() : seen_(false), handler_data_val_(0) {}
void CallAndVerify(upb::Sink sink, upb::FieldDefPtr f) {
UPB_UNUSED(f);
ASSERT(!seen_);
sink.StartMessage();
ASSERT(seen_);
ASSERT(handler_data_val_ == kExpectedHandlerData);
}
protected:
bool seen_;
int handler_data_val_;
};
// Test all 8 combinations of:
// (handler data?) x (function/method) x (returns {void, bool})
class StartMsgTesterVoidFunctionNoHandlerData : public StartMsgTesterBase {
public:
typedef StartMsgTesterVoidFunctionNoHandlerData ME;
void Register(upb::HandlersPtr h, upb::FieldDefPtr f) {
UPB_UNUSED(f);
ASSERT(h.SetStartMessageHandler(UpbMakeHandler(&Handler)));
handler_data_val_ = kExpectedHandlerData;
}
private:
//static void Handler(ME* t) {
static void Handler(ME* t) {
t->seen_ = true;
}
};
class StartMsgTesterBoolFunctionNoHandlerData : public StartMsgTesterBase {
public:
typedef StartMsgTesterBoolFunctionNoHandlerData ME;
void Register(upb::HandlersPtr h, upb::FieldDefPtr f) {
UPB_UNUSED(f);
ASSERT(h.SetStartMessageHandler(UpbMakeHandler(&Handler)));
handler_data_val_ = kExpectedHandlerData;
}
private:
static bool Handler(ME* t) {
t->seen_ = true;
return true;
}
};
class StartMsgTesterVoidMethodNoHandlerData : public StartMsgTesterBase {
public:
typedef StartMsgTesterVoidMethodNoHandlerData ME;
void Register(upb::HandlersPtr h, upb::FieldDefPtr f) {
UPB_UNUSED(f);
ASSERT(h.SetStartMessageHandler(UpbMakeHandler(&ME::Handler)));
handler_data_val_ = kExpectedHandlerData;
}
private:
void Handler() {
seen_ = true;
}
};
class StartMsgTesterBoolMethodNoHandlerData : public StartMsgTesterBase {
public:
typedef StartMsgTesterBoolMethodNoHandlerData ME;
void Register(upb::HandlersPtr h, upb::FieldDefPtr f) {
UPB_UNUSED(f);
ASSERT(h.SetStartMessageHandler(UpbMakeHandler(&ME::Handler)));
handler_data_val_ = kExpectedHandlerData;
}
private:
bool Handler() {
seen_ = true;
return true;
}
};
class StartMsgTesterVoidFunctionWithHandlerData : public StartMsgTesterBase {
public:
typedef StartMsgTesterVoidFunctionWithHandlerData ME;
void Register(upb::HandlersPtr h, upb::FieldDefPtr f) {
UPB_UNUSED(f);
ASSERT(h.SetStartMessageHandler(
UpbBind(&Handler, new int(kExpectedHandlerData))));
}
private:
static void Handler(ME* t, const int* hd) {
t->handler_data_val_ = *hd;
t->seen_ = true;
}
};
class StartMsgTesterBoolFunctionWithHandlerData : public StartMsgTesterBase {
public:
typedef StartMsgTesterBoolFunctionWithHandlerData ME;
void Register(upb::HandlersPtr h, upb::FieldDefPtr f) {
UPB_UNUSED(f);
ASSERT(h.SetStartMessageHandler(
UpbBind(&Handler, new int(kExpectedHandlerData))));
}
private:
static bool Handler(ME* t, const int* hd) {
t->handler_data_val_ = *hd;
t->seen_ = true;
return true;
}
};
class StartMsgTesterVoidMethodWithHandlerData : public StartMsgTesterBase {
public:
typedef StartMsgTesterVoidMethodWithHandlerData ME;
void Register(upb::HandlersPtr h, upb::FieldDefPtr f) {
UPB_UNUSED(f);
ASSERT(h.SetStartMessageHandler(
UpbBind(&ME::Handler, new int(kExpectedHandlerData))));
}
private:
void Handler(const int* hd) {
handler_data_val_ = *hd;
seen_ = true;
}
};
class StartMsgTesterBoolMethodWithHandlerData : public StartMsgTesterBase {
public:
typedef StartMsgTesterBoolMethodWithHandlerData ME;
void Register(upb::HandlersPtr h, upb::FieldDefPtr f) {
UPB_UNUSED(f);
ASSERT(h.SetStartMessageHandler(
UpbBind(&ME::Handler, new int(kExpectedHandlerData))));
}
private:
bool Handler(const int* hd) {
handler_data_val_ = *hd;
seen_ = true;
return true;
}
};
class Int32ValueTesterBase {
public:
static const int kFieldNumber = 1;
Int32ValueTesterBase() : seen_(false), val_(0), handler_data_val_(0) {}
void CallAndVerify(upb::Sink sink, upb::FieldDefPtr f) {
upb_selector_t s;
ASSERT(upb_handlers_getselector(f.ptr(), UPB_HANDLER_INT32, &s));
ASSERT(!seen_);
sink.PutInt32(s, 5);
ASSERT(seen_);
ASSERT(handler_data_val_ == kExpectedHandlerData);
ASSERT(val_ == 5);
}
protected:
bool seen_;
int32_t val_;
int handler_data_val_;
};
// Test all 8 combinations of:
// (handler data?) x (function/method) x (returns {void, bool})
class ValueTesterInt32VoidFunctionNoHandlerData
: public Int32ValueTesterBase {
public:
typedef ValueTesterInt32VoidFunctionNoHandlerData ME;
void Register(upb::HandlersPtr h, upb::FieldDefPtr f) {
ASSERT(h.SetInt32Handler(f, UpbMakeHandler(&Handler)));
handler_data_val_ = kExpectedHandlerData;
}
private:
static void Handler(ME* t, int32_t val) {
t->val_ = val;
t->seen_ = true;
}
};
class ValueTesterInt32BoolFunctionNoHandlerData
: public Int32ValueTesterBase {
public:
typedef ValueTesterInt32BoolFunctionNoHandlerData ME;
void Register(upb::HandlersPtr h, upb::FieldDefPtr f) {
ASSERT(h.SetInt32Handler(f, UpbMakeHandler(&Handler)));
handler_data_val_ = kExpectedHandlerData;
}
private:
static bool Handler(ME* t, int32_t val) {
t->val_ = val;
t->seen_ = true;
return true;
}
};
class ValueTesterInt32VoidMethodNoHandlerData : public Int32ValueTesterBase {
public:
typedef ValueTesterInt32VoidMethodNoHandlerData ME;
void Register(upb::HandlersPtr h, upb::FieldDefPtr f) {
ASSERT(h.SetInt32Handler(f, UpbMakeHandler(&ME::Handler)));
handler_data_val_ = kExpectedHandlerData;
}
private:
void Handler(int32_t val) {
val_ = val;
seen_ = true;
}
};
class ValueTesterInt32BoolMethodNoHandlerData : public Int32ValueTesterBase {
public:
typedef ValueTesterInt32BoolMethodNoHandlerData ME;
void Register(upb::HandlersPtr h, upb::FieldDefPtr f) {
ASSERT(h.SetInt32Handler(f, UpbMakeHandler(&ME::Handler)));
handler_data_val_ = kExpectedHandlerData;
}
private:
bool Handler(int32_t val) {
val_ = val;
seen_ = true;
return true;
}
};
class ValueTesterInt32VoidFunctionWithHandlerData
: public Int32ValueTesterBase {
public:
typedef ValueTesterInt32VoidFunctionWithHandlerData ME;
void Register(upb::HandlersPtr h, upb::FieldDefPtr f) {
ASSERT(h.SetInt32Handler(
f, UpbBind(&Handler, new int(kExpectedHandlerData))));
}
private:
static void Handler(ME* t, const int* hd, int32_t val) {
t->val_ = val;
t->handler_data_val_ = *hd;
t->seen_ = true;
}
};
class ValueTesterInt32BoolFunctionWithHandlerData
: public Int32ValueTesterBase {
public:
typedef ValueTesterInt32BoolFunctionWithHandlerData ME;
void Register(upb::HandlersPtr h, upb::FieldDefPtr f) {
ASSERT(h.SetInt32Handler(
f, UpbBind(&Handler, new int(kExpectedHandlerData))));
}
private:
static bool Handler(ME* t, const int* hd, int32_t val) {
t->val_ = val;
t->handler_data_val_ = *hd;
t->seen_ = true;
return true;
}
};
class ValueTesterInt32VoidMethodWithHandlerData : public Int32ValueTesterBase {
public:
typedef ValueTesterInt32VoidMethodWithHandlerData ME;
void Register(upb::HandlersPtr h, upb::FieldDefPtr f) {
ASSERT(h.SetInt32Handler(
f, UpbBind(&ME::Handler, new int(kExpectedHandlerData))));
}
private:
void Handler(const int* hd, int32_t val) {
val_ = val;
handler_data_val_ = *hd;
seen_ = true;
}
};
class ValueTesterInt32BoolMethodWithHandlerData : public Int32ValueTesterBase {
public:
typedef ValueTesterInt32BoolMethodWithHandlerData ME;
void Register(upb::HandlersPtr h, upb::FieldDefPtr f) {
ASSERT(h.SetInt32Handler(
f, UpbBind(&ME::Handler, new int(kExpectedHandlerData))));
}
private:
bool Handler(const int* hd, int32_t val) {
val_ = val;
handler_data_val_ = *hd;
seen_ = true;
return true;
}
};
template <class T>
void RegisterHandlers(const void* closure, upb::Handlers* h_ptr) {
T* tester = const_cast<T*>(static_cast<const T*>(closure));
upb::HandlersPtr h(h_ptr);
upb::FieldDefPtr f = h.message_def().FindFieldByNumber(T::kFieldNumber);
ASSERT(f);
tester->Register(h, f);
}
template <class T>
void TestHandler() {
T tester;
upb::SymbolTable symtab;
upb::HandlerCache cache(&RegisterHandlers<T>, &tester);
upb::MessageDefPtr md(upb_test_TestMessage_getmsgdef(symtab.ptr()));
ASSERT(md);
upb::FieldDefPtr f = md.FindFieldByNumber(T::kFieldNumber);
ASSERT(f);
const upb::Handlers* h = cache.Get(md);
upb::Sink sink(h, &tester);
tester.CallAndVerify(sink, f);
}
class T1 {};
class T2 {};
template <class C>
void DoNothingHandler(C* closure) {
UPB_UNUSED(closure);
}
template <class C>
void DoNothingInt32Handler(C* closure, int32_t val) {
UPB_UNUSED(closure);
UPB_UNUSED(val);
}
template <class R>
class DoNothingStartHandler {
public:
// We wrap these functions inside of a class for a somewhat annoying reason.
// UpbMakeHandler() is a macro, so we can't say
// UpbMakeHandler(DoNothingStartHandler<T1, T2>)
//
// because otherwise the preprocessor gets confused at the comma and tries to
// make it two macro arguments. The usual solution doesn't work either:
// UpbMakeHandler((DoNothingStartHandler<T1, T2>))
//
// If we do that the macro expands correctly, but then it tries to pass that
// parenthesized expression as a template parameter, ie. Type<(F)>, which
// isn't legal C++ (Clang will compile it but complains with
// warning: address non-type template argument cannot be surrounded by
// parentheses
//
// This two-level thing allows us to effectively pass two template parameters,
// but without any commas:
// UpbMakeHandler(DoNothingStartHandler<T1>::Handler<T2>)
template <class C>
static R* Handler(C* closure) {
UPB_UNUSED(closure);
return NULL;
}
template <class C>
static R* String(C* closure, size_t size_len) {
UPB_UNUSED(closure);
UPB_UNUSED(size_len);
return NULL;
}
};
template <class C>
void DoNothingStringBufHandler(C* closure, const char *buf, size_t len) {
UPB_UNUSED(closure);
UPB_UNUSED(buf);
UPB_UNUSED(len);
}
template <class C>
void DoNothingEndMessageHandler(C* closure, upb_status *status) {
UPB_UNUSED(closure);
UPB_UNUSED(status);
}
void RegisterMismatchedTypes(const void* closure, upb::Handlers* h_ptr) {
upb::HandlersPtr h(h_ptr);
upb::MessageDefPtr md(h.message_def());
ASSERT(md);
upb::FieldDefPtr i32 = md.FindFieldByName("i32");
upb::FieldDefPtr r_i32 = md.FindFieldByName("r_i32");
upb::FieldDefPtr str = md.FindFieldByName("str");
upb::FieldDefPtr r_str = md.FindFieldByName("r_str");
upb::FieldDefPtr msg = md.FindFieldByName("msg");
upb::FieldDefPtr r_msg = md.FindFieldByName("r_msg");
ASSERT(i32);
ASSERT(r_i32);
ASSERT(str);
ASSERT(r_str);
ASSERT(msg);
ASSERT(r_msg);
// Establish T1 as the top-level closure type.
ASSERT(h.SetInt32Handler(i32, UpbMakeHandler(DoNothingInt32Handler<T1>)));
// Now any other attempt to set another handler with T2 as the top-level
// closure should fail. But setting these same handlers with T1 as the
// top-level closure will succeed.
ASSERT(!h.SetStartMessageHandler(UpbMakeHandler(DoNothingHandler<T2>)));
ASSERT(h.SetStartMessageHandler(UpbMakeHandler(DoNothingHandler<T1>)));
ASSERT(
!h.SetEndMessageHandler(UpbMakeHandler(DoNothingEndMessageHandler<T2>)));
ASSERT(
h.SetEndMessageHandler(UpbMakeHandler(DoNothingEndMessageHandler<T1>)));
ASSERT(!h.SetStartStringHandler(
str, UpbMakeHandler(DoNothingStartHandler<T1>::String<T2>)));
ASSERT(h.SetStartStringHandler(
str, UpbMakeHandler(DoNothingStartHandler<T1>::String<T1>)));
ASSERT(!h.SetEndStringHandler(str, UpbMakeHandler(DoNothingHandler<T2>)));
ASSERT(h.SetEndStringHandler(str, UpbMakeHandler(DoNothingHandler<T1>)));
ASSERT(!h.SetStartSubMessageHandler(
msg, UpbMakeHandler(DoNothingStartHandler<T1>::Handler<T2>)));
ASSERT(h.SetStartSubMessageHandler(
msg, UpbMakeHandler(DoNothingStartHandler<T1>::Handler<T1>)));
ASSERT(
!h.SetEndSubMessageHandler(msg, UpbMakeHandler(DoNothingHandler<T2>)));
ASSERT(
h.SetEndSubMessageHandler(msg, UpbMakeHandler(DoNothingHandler<T1>)));
ASSERT(!h.SetStartSequenceHandler(
r_i32, UpbMakeHandler(DoNothingStartHandler<T1>::Handler<T2>)));
ASSERT(h.SetStartSequenceHandler(
r_i32, UpbMakeHandler(DoNothingStartHandler<T1>::Handler<T1>)));
ASSERT(!h.SetEndSequenceHandler(
r_i32, UpbMakeHandler(DoNothingHandler<T2>)));
ASSERT(h.SetEndSequenceHandler(
r_i32, UpbMakeHandler(DoNothingHandler<T1>)));
ASSERT(!h.SetStartSequenceHandler(
r_msg, UpbMakeHandler(DoNothingStartHandler<T1>::Handler<T2>)));
ASSERT(h.SetStartSequenceHandler(
r_msg, UpbMakeHandler(DoNothingStartHandler<T1>::Handler<T1>)));
ASSERT(!h.SetEndSequenceHandler(
r_msg, UpbMakeHandler(DoNothingHandler<T2>)));
ASSERT(h.SetEndSequenceHandler(
r_msg, UpbMakeHandler(DoNothingHandler<T1>)));
ASSERT(!h.SetStartSequenceHandler(
r_str, UpbMakeHandler(DoNothingStartHandler<T1>::Handler<T2>)));
ASSERT(h.SetStartSequenceHandler(
r_str, UpbMakeHandler(DoNothingStartHandler<T1>::Handler<T1>)));
ASSERT(!h.SetEndSequenceHandler(
r_str, UpbMakeHandler(DoNothingHandler<T2>)));
ASSERT(h.SetEndSequenceHandler(
r_str, UpbMakeHandler(DoNothingHandler<T1>)));
// By setting T1 as the return type for the Start* handlers we have
// established T1 as the type of the sequence and string frames.
// Setting callbacks that use T2 should fail, but T1 should succeed.
ASSERT(
!h.SetStringHandler(str, UpbMakeHandler(DoNothingStringBufHandler<T2>)));
ASSERT(
h.SetStringHandler(str, UpbMakeHandler(DoNothingStringBufHandler<T1>)));
ASSERT(!h.SetInt32Handler(r_i32, UpbMakeHandler(DoNothingInt32Handler<T2>)));
ASSERT(h.SetInt32Handler(r_i32, UpbMakeHandler(DoNothingInt32Handler<T1>)));
ASSERT(!h.SetStartSubMessageHandler(
r_msg, UpbMakeHandler(DoNothingStartHandler<T1>::Handler<T2>)));
ASSERT(h.SetStartSubMessageHandler(
r_msg, UpbMakeHandler(DoNothingStartHandler<T1>::Handler<T1>)));
ASSERT(!h.SetEndSubMessageHandler(r_msg,
UpbMakeHandler(DoNothingHandler<T2>)));
ASSERT(h.SetEndSubMessageHandler(r_msg,
UpbMakeHandler(DoNothingHandler<T1>)));
ASSERT(!h.SetStartStringHandler(
r_str, UpbMakeHandler(DoNothingStartHandler<T1>::String<T2>)));
ASSERT(h.SetStartStringHandler(
r_str, UpbMakeHandler(DoNothingStartHandler<T1>::String<T1>)));
ASSERT(
!h.SetEndStringHandler(r_str, UpbMakeHandler(DoNothingHandler<T2>)));
ASSERT(h.SetEndStringHandler(r_str, UpbMakeHandler(DoNothingHandler<T1>)));
ASSERT(!h.SetStringHandler(r_str,
UpbMakeHandler(DoNothingStringBufHandler<T2>)));
ASSERT(h.SetStringHandler(r_str,
UpbMakeHandler(DoNothingStringBufHandler<T1>)));
}
void RegisterMismatchedTypes2(const void* closure, upb::Handlers* h_ptr) {
upb::HandlersPtr h(h_ptr);
upb::MessageDefPtr md(h.message_def());
ASSERT(md);
upb::FieldDefPtr i32 = md.FindFieldByName("i32");
upb::FieldDefPtr r_i32 = md.FindFieldByName("r_i32");
upb::FieldDefPtr str = md.FindFieldByName("str");
upb::FieldDefPtr r_str = md.FindFieldByName("r_str");
upb::FieldDefPtr msg = md.FindFieldByName("msg");
upb::FieldDefPtr r_msg = md.FindFieldByName("r_msg");
ASSERT(i32);
ASSERT(r_i32);
ASSERT(str);
ASSERT(r_str);
ASSERT(msg);
ASSERT(r_msg);
// For our second test we do the same in reverse. We directly set the type of
// the frame and then observe failures at registering a Start* handler that
// returns a different type.
// First establish the type of a sequence frame directly.
ASSERT(h.SetInt32Handler(r_i32, UpbMakeHandler(DoNothingInt32Handler<T1>)));
// Now setting a StartSequence callback that returns a different type should
// fail.
ASSERT(!h.SetStartSequenceHandler(
r_i32, UpbMakeHandler(DoNothingStartHandler<T2>::Handler<T1>)));
ASSERT(h.SetStartSequenceHandler(
r_i32, UpbMakeHandler(DoNothingStartHandler<T1>::Handler<T1>)));
// Establish a string frame directly.
ASSERT(h.SetStringHandler(r_str,
UpbMakeHandler(DoNothingStringBufHandler<T1>)));
// Fail setting a StartString callback that returns a different type.
ASSERT(!h.SetStartStringHandler(
r_str, UpbMakeHandler(DoNothingStartHandler<T2>::String<T1>)));
ASSERT(h.SetStartStringHandler(
r_str, UpbMakeHandler(DoNothingStartHandler<T1>::String<T1>)));
// The previous established T1 as the frame for the r_str sequence.
ASSERT(!h.SetStartSequenceHandler(
r_str, UpbMakeHandler(DoNothingStartHandler<T2>::Handler<T1>)));
ASSERT(h.SetStartSequenceHandler(
r_str, UpbMakeHandler(DoNothingStartHandler<T1>::Handler<T1>)));
}
void TestMismatchedTypes() {
// First create a schema for our test.
upb::SymbolTable symtab;
upb::HandlerCache handler_cache(&RegisterMismatchedTypes, nullptr);
upb::HandlerCache handler_cache2(&RegisterMismatchedTypes2, nullptr);
const upb::MessageDefPtr md(upb_test_TestMessage_getmsgdef(symtab.ptr()));
// Now test the type-checking in handler registration.
handler_cache.Get(md);
handler_cache2.Get(md);
}
class IntIncrementer {
public:
explicit IntIncrementer(int* x) : x_(x) { (*x_)++; }
~IntIncrementer() { (*x_)--; }
static void Handler(void* closure, const IntIncrementer* incrementer,
int32_t x) {
UPB_UNUSED(closure);
UPB_UNUSED(incrementer);
UPB_UNUSED(x);
}
private:
int* x_;
};
void RegisterIncrementor(const void* closure, upb::Handlers* h_ptr) {
const int* x = static_cast<const int*>(closure);
upb::HandlersPtr h(h_ptr);
upb::FieldDefPtr f = h.message_def().FindFieldByName("i32");
h.SetInt32Handler(f, UpbBind(&IntIncrementer::Handler,
new IntIncrementer(const_cast<int*>(x))));
}
void TestHandlerDataDestruction() {
int x = 0;
{
upb::SymbolTable symtab;
upb::HandlerCache cache(&RegisterIncrementor, &x);
upb::MessageDefPtr md(upb_test_TestMessage_getmsgdef(symtab.ptr()));
cache.Get(md);
ASSERT(x == 1);
}
ASSERT(x == 0);
}
void TestIteration() {
upb::SymbolTable symtab;
upb::MessageDefPtr md(upb_test_TestMessage_getmsgdef(symtab.ptr()));
// Test range-based for on both fields and oneofs (with the iterator adaptor).
int field_count = 0;
for (auto field : md.fields()) {
UPB_UNUSED(field);
field_count++;
}
ASSERT(field_count == md.field_count());
int oneof_count = 0;
for (auto oneof : md.oneofs()) {
UPB_UNUSED(oneof);
oneof_count++;
}
ASSERT(oneof_count == md.oneof_count());
}
extern "C" {
int run_tests(int argc, char *argv[]) {
TestHandler<ValueTesterInt32VoidFunctionNoHandlerData>();
TestHandler<ValueTesterInt32BoolFunctionNoHandlerData>();
TestHandler<ValueTesterInt32VoidMethodNoHandlerData>();
TestHandler<ValueTesterInt32BoolMethodNoHandlerData>();
TestHandler<ValueTesterInt32VoidFunctionWithHandlerData>();
TestHandler<ValueTesterInt32BoolFunctionWithHandlerData>();
TestHandler<ValueTesterInt32VoidMethodWithHandlerData>();
TestHandler<ValueTesterInt32BoolMethodWithHandlerData>();
TestHandler<StartMsgTesterVoidFunctionNoHandlerData>();
TestHandler<StartMsgTesterBoolFunctionNoHandlerData>();
TestHandler<StartMsgTesterVoidMethodNoHandlerData>();
TestHandler<StartMsgTesterBoolMethodNoHandlerData>();
TestHandler<StartMsgTesterVoidFunctionWithHandlerData>();
TestHandler<StartMsgTesterBoolFunctionWithHandlerData>();
TestHandler<StartMsgTesterVoidMethodWithHandlerData>();
TestHandler<StartMsgTesterBoolMethodWithHandlerData>();
TestHandler<StringBufTesterVoidMethodNoHandlerDataNoHandle>();
TestHandler<StringBufTesterVoidMethodNoHandlerDataWithHandle>();
TestHandler<StringBufTesterVoidMethodWithHandlerDataNoHandle>();
TestHandler<StringBufTesterVoidMethodWithHandlerDataWithHandle>();
TestHandler<StringBufTesterVoidFunctionNoHandlerDataNoHandle>();
TestHandler<StringBufTesterVoidFunctionNoHandlerDataWithHandle>();
TestHandler<StringBufTesterVoidFunctionWithHandlerDataNoHandle>();
TestHandler<StringBufTesterVoidFunctionWithHandlerDataWithHandle>();
TestHandler<StringBufTesterSizeTMethodNoHandlerDataNoHandle>();
TestHandler<StringBufTesterBoolMethodNoHandlerDataNoHandle>();
TestMismatchedTypes();
TestHandlerDataDestruction();
TestIteration();
return 0;
}
}

@ -0,0 +1,12 @@
syntax = "proto2";
package upb.test;
message TestMessage {
optional int32 i32 = 1;
repeated int32 r_i32 = 2;
optional string str = 3;
repeated string r_str = 4;
optional TestMessage msg = 5;
repeated TestMessage r_msg = 6;
}

@ -0,0 +1,679 @@
/*
*
* Tests for upb_table.
*/
#include <limits.h>
#include <string.h>
#include <sys/resource.h>
#include <iostream>
#include <map>
#include <set>
#include <string>
#include <unordered_map>
#include <vector>
#include "tests/upb_test.h"
#include "upb/table.int.h"
#include "upb/port_def.inc"
// Convenience interface for C++. We don't put this in upb itself because
// the table is not exposed to users.
namespace upb {
template <class T> upb_value MakeUpbValue(T val);
template <class T> T GetUpbValue(upb_value val);
template <class T> upb_ctype_t GetUpbValueType();
#define FUNCS(name, type_t, enumval) \
template<> upb_value MakeUpbValue<type_t>(type_t val) { return upb_value_ ## name(val); } \
template<> type_t GetUpbValue<type_t>(upb_value val) { return upb_value_get ## name(val); } \
template<> upb_ctype_t GetUpbValueType<type_t>() { return enumval; }
FUNCS(int32, int32_t, UPB_CTYPE_INT32)
FUNCS(int64, int64_t, UPB_CTYPE_INT64)
FUNCS(uint32, uint32_t, UPB_CTYPE_UINT32)
FUNCS(uint64, uint64_t, UPB_CTYPE_UINT64)
FUNCS(bool, bool, UPB_CTYPE_BOOL)
FUNCS(cstr, char*, UPB_CTYPE_CSTR)
FUNCS(ptr, void*, UPB_CTYPE_PTR)
FUNCS(constptr, const void*, UPB_CTYPE_CONSTPTR)
FUNCS(fptr, upb_func*, UPB_CTYPE_FPTR)
#undef FUNCS
class IntTable {
public:
IntTable(upb_ctype_t value_type) { upb_inttable_init(&table_, value_type); }
~IntTable() { upb_inttable_uninit(&table_); }
size_t count() { return upb_inttable_count(&table_); }
bool Insert(uintptr_t key, upb_value val) {
return upb_inttable_insert(&table_, key, val);
}
bool Replace(uintptr_t key, upb_value val) {
return upb_inttable_replace(&table_, key, val);
}
std::pair<bool, upb_value> Remove(uintptr_t key) {
std::pair<bool, upb_value> ret;
ret.first = upb_inttable_remove(&table_, key, &ret.second);
return ret;
}
std::pair<bool, upb_value> Lookup(uintptr_t key) const {
std::pair<bool, upb_value> ret;
ret.first = upb_inttable_lookup(&table_, key, &ret.second);
return ret;
}
std::pair<bool, upb_value> Lookup32(uint32_t key) const {
std::pair<bool, upb_value> ret;
ret.first = upb_inttable_lookup32(&table_, key, &ret.second);
return ret;
}
void Compact() { upb_inttable_compact(&table_); }
class iterator : public std::iterator<std::forward_iterator_tag,
std::pair<uintptr_t, upb_value> > {
public:
explicit iterator(IntTable* table) {
upb_inttable_begin(&iter_, &table->table_);
}
static iterator end(IntTable* table) {
iterator iter(table);
upb_inttable_iter_setdone(&iter.iter_);
return iter;
}
void operator++() {
return upb_inttable_next(&iter_);
}
std::pair<uintptr_t, upb_value> operator*() const {
std::pair<uintptr_t, upb_value> ret;
ret.first = upb_inttable_iter_key(&iter_);
ret.second = upb_inttable_iter_value(&iter_);
return ret;
}
bool operator==(const iterator& other) const {
return upb_inttable_iter_isequal(&iter_, &other.iter_);
}
bool operator!=(const iterator& other) const {
return !(*this == other);
}
private:
upb_inttable_iter iter_;
};
upb_inttable table_;
};
class StrTable {
public:
StrTable(upb_ctype_t value_type) { upb_strtable_init(&table_, value_type); }
~StrTable() { upb_strtable_uninit(&table_); }
size_t count() { return upb_strtable_count(&table_); }
bool Insert(const std::string& key, upb_value val) {
return upb_strtable_insert2(&table_, key.c_str(), key.size(), val);
}
std::pair<bool, upb_value> Remove(const std::string& key) {
std::pair<bool, upb_value> ret;
ret.first =
upb_strtable_remove2(&table_, key.c_str(), key.size(), &ret.second);
return ret;
}
std::pair<bool, upb_value> Lookup(const std::string& key) const {
std::pair<bool, upb_value> ret;
ret.first =
upb_strtable_lookup2(&table_, key.c_str(), key.size(), &ret.second);
return ret;
}
void Resize(size_t size_lg2) {
upb_strtable_resize(&table_, size_lg2, &upb_alloc_global);
}
class iterator : public std::iterator<std::forward_iterator_tag,
std::pair<std::string, upb_value> > {
public:
explicit iterator(StrTable* table) {
upb_strtable_begin(&iter_, &table->table_);
}
static iterator end(StrTable* table) {
iterator iter(table);
upb_strtable_iter_setdone(&iter.iter_);
return iter;
}
void operator++() {
return upb_strtable_next(&iter_);
}
std::pair<std::string, upb_value> operator*() const {
std::pair<std::string, upb_value> ret;
ret.first.assign(upb_strtable_iter_key(&iter_));
ret.second = upb_strtable_iter_value(&iter_);
return ret;
}
bool operator==(const iterator& other) const {
return upb_strtable_iter_isequal(&iter_, &other.iter_);
}
bool operator!=(const iterator& other) const {
return !(*this == other);
}
private:
upb_strtable_iter iter_;
};
upb_strtable table_;
};
template <class T> class TypedStrTable {
public:
TypedStrTable() : table_(GetUpbValueType<T>()) {}
size_t count() { return table_.count(); }
bool Insert(const std::string &key, T val) {
return table_.Insert(key, MakeUpbValue<T>(val));
}
std::pair<bool, T> Remove(const std::string& key) {
std::pair<bool, upb_value> found = table_.Remove(key);
std::pair<bool, T> ret;
ret.first = found.first;
if (ret.first) {
ret.second = GetUpbValue<T>(found.second);
}
return ret;
}
std::pair<bool, T> Lookup(const std::string& key) const {
std::pair<bool, upb_value> found = table_.Lookup(key);
std::pair<bool, T> ret;
ret.first = found.first;
if (ret.first) {
ret.second = GetUpbValue<T>(found.second);
}
return ret;
}
void Resize(size_t size_lg2) {
table_.Resize(size_lg2);
}
class iterator : public std::iterator<std::forward_iterator_tag, std::pair<std::string, T> > {
public:
explicit iterator(TypedStrTable* table) : iter_(&table->table_) {}
static iterator end(TypedStrTable* table) {
iterator iter(table);
iter.iter_ = StrTable::iterator::end(&table->table_);
return iter;
}
void operator++() { ++iter_; }
std::pair<std::string, T> operator*() const {
std::pair<std::string, upb_value> val = *iter_;
std::pair<std::string, T> ret;
ret.first = val.first;
ret.second = GetUpbValue<T>(val.second);
return ret;
}
bool operator==(const iterator& other) const {
return iter_ == other.iter_;
}
bool operator!=(const iterator& other) const {
return iter_ != other.iter_;
}
private:
StrTable::iterator iter_;
};
iterator begin() { return iterator(this); }
iterator end() { return iterator::end(this); }
StrTable table_;
};
template <class T> class TypedIntTable {
public:
TypedIntTable() : table_(GetUpbValueType<T>()) {}
size_t count() { return table_.count(); }
bool Insert(uintptr_t key, T val) {
return table_.Insert(key, MakeUpbValue<T>(val));
}
bool Replace(uintptr_t key, T val) {
return table_.Replace(key, MakeUpbValue<T>(val));
}
std::pair<bool, T> Remove(uintptr_t key) {
std::pair<bool, upb_value> found = table_.Remove(key);
std::pair<bool, T> ret;
ret.first = found.first;
if (ret.first) {
ret.second = GetUpbValue<T>(found.second);
}
return ret;
}
std::pair<bool, T> Lookup(uintptr_t key) const {
std::pair<bool, upb_value> found = table_.Lookup(key);
std::pair<bool, T> ret;
ret.first = found.first;
if (ret.first) {
ret.second = GetUpbValue<T>(found.second);
}
return ret;
}
void Compact() { table_.Compact(); }
class iterator : public std::iterator<std::forward_iterator_tag, std::pair<uintptr_t, T> > {
public:
explicit iterator(TypedIntTable* table) : iter_(&table->table_) {}
static iterator end(TypedIntTable* table) {
return IntTable::iterator::end(&table->table_);
}
void operator++() { ++iter_; }
std::pair<uintptr_t, T> operator*() const {
std::pair<uintptr_t, upb_value> val = *iter_;
std::pair<uintptr_t, T> ret;
ret.first = val.first;
ret.second = GetUpbValue<T>(val.second);
return ret;
}
bool operator==(const iterator& other) const {
return iter_ == other.iter_;
}
bool operator!=(const iterator& other) const {
return iter_ != other.iter_;
}
private:
IntTable::iterator iter_;
};
iterator begin() { return iterator(this); }
iterator end() { return iterator::end(this); }
IntTable table_;
};
}
bool benchmark = false;
#define CPU_TIME_PER_TEST 0.5
using std::vector;
double get_usertime() {
struct rusage usage;
getrusage(RUSAGE_SELF, &usage);
return usage.ru_utime.tv_sec + (usage.ru_utime.tv_usec/1000000.0);
}
/* num_entries must be a power of 2. */
void test_strtable(const vector<std::string>& keys, uint32_t num_to_insert) {
/* Initialize structures. */
std::map<std::string, int32_t> m;
typedef upb::TypedStrTable<int32_t> Table;
Table table;
std::set<std::string> all;
for(size_t i = 0; i < num_to_insert; i++) {
const std::string& key = keys[i];
all.insert(key);
table.Insert(key, key[0]);
m[key] = key[0];
}
/* Test correctness. */
for(uint32_t i = 0; i < keys.size(); i++) {
const std::string& key = keys[i];
std::pair<bool, int32_t> found = table.Lookup(key);
if(m.find(key) != m.end()) { /* Assume map implementation is correct. */
ASSERT(found.first);
ASSERT(found.second == key[0]);
ASSERT(m[key] == key[0]);
} else {
ASSERT(!found.first);
}
}
for (Table::iterator it = table.begin(); it != table.end(); ++it) {
std::set<std::string>::iterator i = all.find((*it).first);
ASSERT(i != all.end());
all.erase(i);
}
ASSERT(all.empty());
// Test iteration with resizes.
for (int i = 0; i < 10; i++) {
for (Table::iterator it = table.begin(); it != table.end(); ++it) {
// Even if we invalidate the iterator it should only return real elements.
ASSERT((*it).second == m[(*it).first]);
// Force a resize even though the size isn't changing.
// Also forces the table size to grow so some new buckets end up empty.
int new_lg2 = table.table_.table_.t.size_lg2 + 1;
// Don't use more than 64k tables, to avoid exhausting memory.
new_lg2 = UPB_MIN(new_lg2, 16);
table.Resize(new_lg2);
}
}
}
/* num_entries must be a power of 2. */
void test_inttable(int32_t *keys, uint16_t num_entries, const char *desc) {
/* Initialize structures. */
typedef upb::TypedIntTable<uint32_t> Table;
Table table;
uint32_t largest_key = 0;
std::map<uint32_t, uint32_t> m;
std::unordered_map<uint32_t, uint32_t> hm;
for(size_t i = 0; i < num_entries; i++) {
int32_t key = keys[i];
largest_key = UPB_MAX((int32_t)largest_key, key);
table.Insert(key, key * 2);
m[key] = key*2;
hm[key] = key*2;
}
/* Test correctness. */
for(uint32_t i = 0; i <= largest_key; i++) {
std::pair<bool, uint32_t> found = table.Lookup(i);
if(m.find(i) != m.end()) { /* Assume map implementation is correct. */
ASSERT(found.first);
ASSERT(found.second == i*2);
ASSERT(m[i] == i*2);
ASSERT(hm[i] == i*2);
} else {
ASSERT(!found.first);
}
}
for(uint16_t i = 0; i < num_entries; i += 2) {
std::pair<bool, uint32_t> found = table.Remove(keys[i]);
ASSERT(found.first == (m.erase(keys[i]) == 1));
if (found.first) ASSERT(found.second == (uint32_t)keys[i] * 2);
hm.erase(keys[i]);
m.erase(keys[i]);
}
ASSERT(table.count() == hm.size());
/* Test correctness. */
for(uint32_t i = 0; i <= largest_key; i++) {
std::pair<bool, uint32_t> found = table.Lookup(i);
if(m.find(i) != m.end()) { /* Assume map implementation is correct. */
ASSERT(found.first);
ASSERT(found.second == i*2);
ASSERT(m[i] == i*2);
ASSERT(hm[i] == i*2);
} else {
ASSERT(!found.first);
}
}
// Test replace.
for(uint32_t i = 0; i <= largest_key; i++) {
bool replaced = table.Replace(i, i*3);
if(m.find(i) != m.end()) { /* Assume map implementation is correct. */
ASSERT(replaced);
m[i] = i * 3;
hm[i] = i * 3;
} else {
ASSERT(!replaced);
}
}
// Compact and test correctness again.
table.Compact();
for(uint32_t i = 0; i <= largest_key; i++) {
std::pair<bool, uint32_t> found = table.Lookup(i);
if(m.find(i) != m.end()) { /* Assume map implementation is correct. */
ASSERT(found.first);
ASSERT(found.second == i*3);
ASSERT(m[i] == i*3);
ASSERT(hm[i] == i*3);
} else {
ASSERT(!found.first);
}
}
if(!benchmark) {
return;
}
printf("%s\n", desc);
/* Test performance. We only test lookups for keys that are known to exist. */
uint16_t *rand_order = new uint16_t[num_entries];
for(uint16_t i = 0; i < num_entries; i++) {
rand_order[i] = i;
}
for(uint16_t i = num_entries - 1; i >= 1; i--) {
uint16_t rand_i = (random() / (double)RAND_MAX) * i;
ASSERT(rand_i <= i);
uint16_t tmp = rand_order[rand_i];
rand_order[rand_i] = rand_order[i];
rand_order[i] = tmp;
}
uintptr_t x = 0;
const int mask = num_entries - 1;
int time_mask = 0xffff;
printf("upb_inttable(seq): ");
fflush(stdout);
double before = get_usertime();
unsigned int i;
#define MAYBE_BREAK \
if ((i & time_mask) == 0 && (get_usertime() - before) > CPU_TIME_PER_TEST) \
break;
for(i = 0; true; i++) {
MAYBE_BREAK;
int32_t key = keys[i & mask];
upb_value v;
bool ok = upb_inttable_lookup32(&table.table_.table_, key, &v);
x += (uintptr_t)ok;
}
double total = get_usertime() - before;
printf("%ld/s\n", (long)(i/total));
double upb_seq_i = i / 100; // For later percentage calcuation.
printf("upb_inttable(rand): ");
fflush(stdout);
before = get_usertime();
for(i = 0; true; i++) {
MAYBE_BREAK;
int32_t key = keys[rand_order[i & mask]];
upb_value v;
bool ok = upb_inttable_lookup32(&table.table_.table_, key, &v);
x += (uintptr_t)ok;
}
total = get_usertime() - before;
printf("%ld/s\n", (long)(i/total));
double upb_rand_i = i / 100; // For later percentage calculation.
printf("std::map<int32_t, int32_t>(seq): ");
fflush(stdout);
before = get_usertime();
for(i = 0; true; i++) {
MAYBE_BREAK;
int32_t key = keys[i & mask];
x += m[key];
}
total = get_usertime() - before;
printf("%ld/s (%0.1f%% of upb)\n", (long)(i/total), i / upb_seq_i);
printf("std::map<int32_t, int32_t>(rand): ");
fflush(stdout);
before = get_usertime();
for(i = 0; true; i++) {
MAYBE_BREAK;
int32_t key = keys[rand_order[i & mask]];
x += m[key];
}
total = get_usertime() - before;
printf("%ld/s (%0.1f%% of upb)\n", (long)(i/total), i / upb_rand_i);
printf("std::unordered_map<uint32_t, uint32_t>(seq): ");
fflush(stdout);
before = get_usertime();
for(i = 0; true; i++) {
MAYBE_BREAK;
int32_t key = keys[rand_order[i & mask]];
x += hm[key];
}
total = get_usertime() - before;
printf("%ld/s (%0.1f%% of upb)\n", (long)(i/total), i / upb_seq_i);
printf("std::unordered_map<uint32_t, uint32_t>(rand): ");
fflush(stdout);
before = get_usertime();
for(i = 0; true; i++) {
MAYBE_BREAK;
int32_t key = keys[rand_order[i & mask]];
x += hm[key];
}
total = get_usertime() - before;
if (x == INT_MAX) abort();
printf("%ld/s (%0.1f%% of upb)\n\n", (long)(i/total), i / upb_rand_i);
delete[] rand_order;
}
/*
* This test can't pass right now because the table can't store a value of
* (uint64_t)-1.
*/
void test_int64_max_value() {
/*
typedef upb::TypedIntTable<uint64_t> Table;
Table table;
uintptr_t uint64_max = (uint64_t)-1;
table.Insert(1, uint64_max);
std::pair<bool, uint64_t> found = table.Lookup(1);
ASSERT(found.first);
ASSERT(found.second == uint64_max);
*/
}
int32_t *get_contiguous_keys(int32_t num) {
int32_t *buf = new int32_t[num];
for(int32_t i = 0; i < num; i++)
buf[i] = i;
return buf;
}
void test_delete() {
upb_inttable t;
upb_inttable_init(&t, UPB_CTYPE_BOOL);
upb_inttable_insert(&t, 0, upb_value_bool(true));
upb_inttable_insert(&t, 2, upb_value_bool(true));
upb_inttable_insert(&t, 4, upb_value_bool(true));
upb_inttable_compact(&t);
upb_inttable_remove(&t, 0, NULL);
upb_inttable_remove(&t, 2, NULL);
upb_inttable_remove(&t, 4, NULL);
upb_inttable_iter iter;
for (upb_inttable_begin(&iter, &t); !upb_inttable_done(&iter);
upb_inttable_next(&iter)) {
ASSERT(false);
}
upb_inttable_uninit(&t);
}
extern "C" {
int run_tests(int argc, char *argv[]) {
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "benchmark") == 0) benchmark = true;
}
vector<std::string> keys;
keys.push_back("google.protobuf.FileDescriptorSet");
keys.push_back("google.protobuf.FileDescriptorProto");
keys.push_back("google.protobuf.DescriptorProto");
keys.push_back("google.protobuf.DescriptorProto.ExtensionRange");
keys.push_back("google.protobuf.FieldDescriptorProto");
keys.push_back("google.protobuf.EnumDescriptorProto");
keys.push_back("google.protobuf.EnumValueDescriptorProto");
keys.push_back("google.protobuf.ServiceDescriptorProto");
keys.push_back("google.protobuf.MethodDescriptorProto");
keys.push_back("google.protobuf.FileOptions");
keys.push_back("google.protobuf.MessageOptions");
keys.push_back("google.protobuf.FieldOptions");
keys.push_back("google.protobuf.EnumOptions");
keys.push_back("google.protobuf.EnumValueOptions");
keys.push_back("google.protobuf.ServiceOptions");
keys.push_back("google.protobuf.MethodOptions");
keys.push_back("google.protobuf.UninterpretedOption");
keys.push_back("google.protobuf.UninterpretedOption.NamePart");
for (int i = 0; i < 10; i++) {
test_strtable(keys, 18);
}
int32_t *keys1 = get_contiguous_keys(8);
test_inttable(keys1, 8, "Table size: 8, keys: 1-8 ====");
delete[] keys1;
int32_t *keys2 = get_contiguous_keys(64);
test_inttable(keys2, 64, "Table size: 64, keys: 1-64 ====\n");
delete[] keys2;
int32_t *keys3 = get_contiguous_keys(512);
test_inttable(keys3, 512, "Table size: 512, keys: 1-512 ====\n");
delete[] keys3;
int32_t *keys4 = new int32_t[64];
for(int32_t i = 0; i < 64; i++) {
if(i < 32)
keys4[i] = i+1;
else
keys4[i] = 10101+i;
}
test_inttable(keys4, 64, "Table size: 64, keys: 1-32 and 10133-10164 ====\n");
delete[] keys4;
test_delete();
test_int64_max_value();
return 0;
}
}

@ -0,0 +1,230 @@
/*
** Common functionality for tests.
**/
#ifndef UPB_TEST_UTIL_H_
#define UPB_TEST_UTIL_H_
#include <stdio.h>
#include <math.h>
#include "tests/upb_test.h"
#include "upb/sink.h"
#include "upb/port_def.inc"
#ifdef __cplusplus
upb_bufhandle global_handle;
/* A convenience class for parser tests. Provides some useful features:
*
* - can support multiple calls to parse, to test the parser's handling
* of buffer seams.
*
* - can output verbose output about each parse call when requested, for
* ease of debugging.
*
* - can pass NULL for skipped regions of the input if requested.
*
* - allocates and passes a separate buffer for each parsed region, to
* ensure that the parser is not erroneously overreading its buffer.
*/
class VerboseParserEnvironment {
public:
/* Pass verbose=true to print detailed diagnostics to stderr. */
VerboseParserEnvironment(bool verbose) : verbose_(verbose) {}
void Reset(const char *buf, size_t len, bool may_skip, bool expect_error) {
buf_ = buf;
len_ = len;
ofs_ = 0;
expect_error_ = expect_error;
end_ok_set_ = false;
skip_until_ = may_skip ? 0 : -1;
skipped_with_null_ = false;
}
/* The user should call a series of:
*
* Reset(buf, len, may_skip);
* Start()
* ParseBuffer(X);
* ParseBuffer(Y);
* // Repeat ParseBuffer as desired, but last call should pass -1.
* ParseBuffer(-1);
* End();
*/
bool Start() {
if (verbose_) {
fprintf(stderr, "Calling start()\n");
}
return sink_.Start(len_, &subc_);
}
bool End() {
if (verbose_) {
fprintf(stderr, "Calling end()\n");
}
end_ok_ = sink_.End();
end_ok_set_ = true;
return end_ok_;
}
bool CheckConsistency() {
/* If we called end (which we should only do when previous bytes are fully
* accepted), then end() should return true iff there were no errors. */
if (end_ok_set_ && end_ok_ != status_.ok()) {
fprintf(stderr, "End() status and saw_error didn't match.\n");
return false;
}
if (expect_error_ && status_.ok()) {
fprintf(stderr, "Expected error but saw none.\n");
return false;
}
if (!status_.ok()) {
if (expect_error_ && verbose_) {
fprintf(stderr, "Encountered error, as expected: %s",
status_.error_message());
} else if (!expect_error_) {
fprintf(stderr, "Encountered unexpected error: %s",
status_.error_message());
return false;
}
}
return true;
}
bool ParseBuffer(int bytes) {
if (bytes < 0) {
bytes = len_ - ofs_;
}
ASSERT((size_t)bytes <= (len_ - ofs_));
/* Copy buffer into a separate, temporary buffer.
* This is necessary to verify that the parser is not erroneously
* reading outside the specified bounds. */
char *buf2 = NULL;
if ((int)(ofs_ + bytes) <= skip_until_) {
skipped_with_null_ = true;
} else {
buf2 = (char*)malloc(bytes);
UPB_ASSERT(buf2);
memcpy(buf2, buf_ + ofs_, bytes);
}
if (buf2 == NULL && bytes == 0) {
/* Decoders dont' support buf=NULL, bytes=0. */
return true;
}
if (verbose_) {
fprintf(stderr, "Calling parse(%u) for bytes %u-%u of the input\n",
(unsigned)bytes, (unsigned)ofs_, (unsigned)(ofs_ + bytes));
}
int parsed = sink_.PutBuffer(subc_, buf2, bytes, &global_handle);
free(buf2);
if (verbose_) {
if (parsed == bytes) {
fprintf(stderr,
"parse(%u) = %u, complete byte count indicates success\n",
(unsigned)bytes, (unsigned)bytes);
} else if (parsed > bytes) {
fprintf(stderr,
"parse(%u) = %u, long byte count indicates success and skip "
"of the next %u bytes\n",
(unsigned)bytes, (unsigned)parsed, (unsigned)(parsed - bytes));
} else {
fprintf(stderr,
"parse(%u) = %u, short byte count indicates failure; "
"last %u bytes were not consumed\n",
(unsigned)bytes, (unsigned)parsed, (unsigned)(bytes - parsed));
}
}
if (!status_.ok()) {
return false;
}
if (parsed > bytes && skip_until_ >= 0) {
skip_until_ = ofs_ + parsed;
}
ofs_ += UPB_MIN(parsed, bytes);
return true;
}
void ResetBytesSink(upb::BytesSink sink) {
sink_ = sink;
}
size_t ofs() { return ofs_; }
bool SkippedWithNull() { return skipped_with_null_; }
upb::Arena* arena() { return &arena_; }
upb::Status* status() { return &status_; }
private:
upb::Arena arena_;
upb::Status status_;
upb::BytesSink sink_;
const char* buf_;
size_t len_;
bool verbose_;
size_t ofs_;
void *subc_;
bool expect_error_;
bool end_ok_;
bool end_ok_set_;
/* When our parse call returns a value greater than the number of bytes
* we passed in, the decoder is indicating to us that the next N bytes
* in the stream are not needed and can be skipped. The user is allowed
* to pass a NULL buffer for those N bytes.
*
* skip_until_ is initially set to 0 if we should do this NULL-buffer
* skipping or -1 if we should not. If we are open to doing NULL-buffer
* skipping and we get an opportunity to do it, we set skip_until to the
* stream offset where we can skip until. The user can then test whether
* this happened by testing SkippedWithNull(). */
int skip_until_;
bool skipped_with_null_;
};
#endif /* __cplusplus */
UPB_INLINE char *upb_readfile(const char *filename, size_t *len) {
long size;
char *buf;
FILE *f = fopen(filename, "rb");
if(!f) return NULL;
if(fseek(f, 0, SEEK_END) != 0) goto error;
size = ftell(f);
if(size < 0) goto error;
if(fseek(f, 0, SEEK_SET) != 0) goto error;
buf = (char*)malloc(size + 1);
if(size && fread(buf, size, 1, f) != 1) goto error;
fclose(f);
if (len) *len = size;
buf[size] = '\0';
return buf;
error:
fclose(f);
return NULL;
}
#include "upb/port_undef.inc"
#endif /* UPB_TEST_UTIL_H_ */

@ -0,0 +1,16 @@
#include <stdlib.h>
#ifdef USE_GOOGLE
#include "base/init_google.h"
#endif
extern "C" {
int run_tests(int argc, char *argv[]);
}
int main(int argc, char *argv[]) {
#ifdef USE_GOOGLE
InitGoogle(NULL, &argc, &argv, true);
#endif
run_tests(argc, argv);
}

@ -0,0 +1,53 @@
#ifndef UPB_TEST_H_
#define UPB_TEST_H_
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
int num_assertions = 0;
uint32_t testhash = 0;
#define PRINT_FAILURE(expr) \
fprintf(stderr, "Assertion failed: %s:%d\n", __FILE__, __LINE__); \
fprintf(stderr, "expr: %s\n", #expr); \
if (testhash) { \
fprintf(stderr, "assertion failed running test %x. " \
"Run with the arg %x to run only this test.\n", \
testhash, testhash); \
}
#define ASSERT(expr) do { \
++num_assertions; \
if (!(expr)) { \
PRINT_FAILURE(expr) \
abort(); \
} \
} while (0)
#define ASSERT_NOCOUNT(expr) do { \
if (!(expr)) { \
PRINT_FAILURE(expr) \
abort(); \
} \
} while (0)
#define ASSERT_STATUS(expr, status) do { \
++num_assertions; \
if (!(expr)) { \
PRINT_FAILURE(expr) \
fprintf(stderr, "failed status: %s\n", upb_status_errmsg(status)); \
abort(); \
} \
} while (0)
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* UPB_DECODER_H_ */

@ -0,0 +1,32 @@
Lunit License
-------------
Lunit is written by Michael Roth <mroth@nessie.de> and is licensed
under the terms of the MIT license reproduced below.
========================================================================
Copyright (c) 2004-2010 Michael Roth <mroth@nessie.de>
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
files (the "Software"), to deal in the Software without restriction,
including without limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of the Software,
and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
========================================================================

@ -0,0 +1,9 @@
URL: https://github.com/dcurrie/lunit
Version: 0.5
License: MIT
License File: LICENSE
Description:
A unit testing library for Lua.
Local Modifications:
Extracted the two file we actually need from the distribution.

@ -0,0 +1,156 @@
--[[--------------------------------------------------------------------------
This file is part of lunit 0.5.
For Details about lunit look at: http://www.mroth.net/lunit/
Author: Michael Roth <mroth@nessie.de>
Copyright (c) 2006-2008 Michael Roth <mroth@nessie.de>
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
files (the "Software"), to deal in the Software without restriction,
including without limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of the Software,
and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--]]--------------------------------------------------------------------------
--[[
begin()
run(testcasename, testname)
err(fullname, message, traceback)
fail(fullname, where, message, usermessage)
pass(testcasename, testname)
done()
Fullname:
testcase.testname
testcase.testname:setupname
testcase.testname:teardownname
--]]
lunit = require "lunit"
local lunit_console
if _VERSION >= 'Lua 5.2' then
lunit_console = setmetatable({},{__index = _ENV})
_ENV = lunit_console
else
module( "lunit-console", package.seeall )
lunit_console = _M
end
local function printformat(format, ...)
io.write( string.format(format, ...) )
end
local columns_printed = 0
local function writestatus(char)
if columns_printed == 0 then
io.write(" ")
end
if columns_printed == 60 then
io.write("\n ")
columns_printed = 0
end
io.write(char)
io.flush()
columns_printed = columns_printed + 1
end
local msgs = {}
function begin()
local total_tc = 0
local total_tests = 0
msgs = {} -- e
for tcname in lunit.testcases() do
total_tc = total_tc + 1
for testname, test in lunit.tests(tcname) do
total_tests = total_tests + 1
end
end
printformat("Loaded testsuite with %d tests in %d testcases.\n\n", total_tests, total_tc)
end
function run(testcasename, testname)
-- NOP
end
function err(fullname, message, traceback)
writestatus("E")
msgs[#msgs+1] = "Error! ("..fullname.."):\n"..message.."\n\t"..table.concat(traceback, "\n\t") .. "\n"
end
function fail(fullname, where, message, usermessage)
writestatus("F")
local text = "Failure ("..fullname.."):\n"..
where..": "..message.."\n"
if usermessage then
text = text .. where..": "..usermessage.."\n"
end
msgs[#msgs+1] = text
end
function pass(testcasename, testname)
writestatus(".")
end
function done()
printformat("\n\n%d Assertions checked.\n", lunit.stats.assertions )
print()
for i, msg in ipairs(msgs) do
printformat( "%3d) %s\n", i, msg )
end
printformat("Testsuite finished (%d passed, %d failed, %d errors).\n",
lunit.stats.passed, lunit.stats.failed, lunit.stats.errors )
end
return lunit_console

@ -0,0 +1,725 @@
--[[--------------------------------------------------------------------------
This file is part of lunit 0.5.
For Details about lunit look at: http://www.mroth.net/lunit/
Author: Michael Roth <mroth@nessie.de>
Copyright (c) 2004, 2006-2010 Michael Roth <mroth@nessie.de>
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
files (the "Software"), to deal in the Software without restriction,
including without limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of the Software,
and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--]]--------------------------------------------------------------------------
local orig_assert = assert
local pairs = pairs
local ipairs = ipairs
local next = next
local type = type
local error = error
local tostring = tostring
local setmetatable = setmetatable
local pcall = pcall
local xpcall = xpcall
local require = require
local loadfile = loadfile
local string_sub = string.sub
local string_gsub = string.gsub
local string_format = string.format
local string_lower = string.lower
local string_find = string.find
local table_concat = table.concat
local debug_getinfo = debug.getinfo
local _G = _G
local lunit
if _VERSION >= 'Lua 5.2' then
lunit = {}
_ENV = lunit
else
module("lunit")
lunit = _M
end
local __failure__ = {} -- Type tag for failed assertions
local typenames = { "nil", "boolean", "number", "string", "table", "function", "thread", "userdata" }
local traceback_hide -- Traceback function which hides lunit internals
local mypcall -- Protected call to a function with own traceback
do
local _tb_hide = setmetatable( {}, {__mode="k"} )
function traceback_hide(func)
_tb_hide[func] = true
end
local function my_traceback(errobj)
if is_table(errobj) and errobj.type == __failure__ then
local info = debug_getinfo(5, "Sl") -- FIXME: Hardcoded integers are bad...
errobj.where = string_format( "%s:%d", info.short_src, info.currentline)
else
errobj = { msg = tostring(errobj) }
errobj.tb = {}
local i = 2
while true do
local info = debug_getinfo(i, "Snlf")
if not is_table(info) then
break
end
if not _tb_hide[info.func] then
local line = {} -- Ripped from ldblib.c...
line[#line+1] = string_format("%s:", info.short_src)
if info.currentline > 0 then
line[#line+1] = string_format("%d:", info.currentline)
end
if info.namewhat ~= "" then
line[#line+1] = string_format(" in function '%s'", info.name)
else
if info.what == "main" then
line[#line+1] = " in main chunk"
elseif info.what == "C" or info.what == "tail" then
line[#line+1] = " ?"
else
line[#line+1] = string_format(" in function <%s:%d>", info.short_src, info.linedefined)
end
end
errobj.tb[#errobj.tb+1] = table_concat(line)
end
i = i + 1
end
end
return errobj
end
function mypcall(func)
orig_assert( is_function(func) )
local ok, errobj = xpcall(func, my_traceback)
if not ok then
return errobj
end
end
traceback_hide(mypcall)
end
-- Type check functions
for _, typename in ipairs(typenames) do
lunit["is_"..typename] = function(x)
return type(x) == typename
end
end
local is_nil = is_nil
local is_boolean = is_boolean
local is_number = is_number
local is_string = is_string
local is_table = is_table
local is_function = is_function
local is_thread = is_thread
local is_userdata = is_userdata
local function failure(name, usermsg, defaultmsg, ...)
local errobj = {
type = __failure__,
name = name,
msg = string_format(defaultmsg,...),
usermsg = usermsg
}
error(errobj, 0)
end
traceback_hide( failure )
local function format_arg(arg)
local argtype = type(arg)
if argtype == "string" then
return "'"..arg.."'"
elseif argtype == "number" or argtype == "boolean" or argtype == "nil" then
return tostring(arg)
else
return "["..tostring(arg).."]"
end
end
local function selected(map, name)
if not map then
return true
end
local m = {}
for k,v in pairs(map) do
m[k] = lunitpat2luapat(v)
end
return in_patternmap(m, name)
end
function fail(msg)
stats.assertions = stats.assertions + 1
failure( "fail", msg, "failure" )
end
traceback_hide( fail )
function assert(assertion, msg)
stats.assertions = stats.assertions + 1
if not assertion then
failure( "assert", msg, "assertion failed" )
end
return assertion
end
traceback_hide( assert )
function assert_true(actual, msg)
stats.assertions = stats.assertions + 1
if actual ~= true then
failure( "assert_true", msg, "true expected but was %s", format_arg(actual) )
end
return actual
end
traceback_hide( assert_true )
function assert_false(actual, msg)
stats.assertions = stats.assertions + 1
if actual ~= false then
failure( "assert_false", msg, "false expected but was %s", format_arg(actual) )
end
return actual
end
traceback_hide( assert_false )
function assert_equal(expected, actual, msg)
stats.assertions = stats.assertions + 1
if expected ~= actual then
failure( "assert_equal", msg, "expected %s but was %s", format_arg(expected), format_arg(actual) )
end
return actual
end
traceback_hide( assert_equal )
function assert_not_equal(unexpected, actual, msg)
stats.assertions = stats.assertions + 1
if unexpected == actual then
failure( "assert_not_equal", msg, "%s not expected but was one", format_arg(unexpected) )
end
return actual
end
traceback_hide( assert_not_equal )
function assert_match(pattern, actual, msg)
stats.assertions = stats.assertions + 1
if type(pattern) ~= "string" then
failure( "assert_match", msg, "expected a string as pattern but was %s", format_arg(pattern) )
end
if type(actual) ~= "string" then
failure( "assert_match", msg, "expected a string to match pattern '%s' but was a %s", pattern, format_arg(actual) )
end
if not string_find(actual, pattern) then
failure( "assert_match", msg, "expected '%s' to match pattern '%s' but doesn't", actual, pattern )
end
return actual
end
traceback_hide( assert_match )
function assert_not_match(pattern, actual, msg)
stats.assertions = stats.assertions + 1
if type(pattern) ~= "string" then
failure( "assert_not_match", msg, "expected a string as pattern but was %s", format_arg(pattern) )
end
if type(actual) ~= "string" then
failure( "assert_not_match", msg, "expected a string to not match pattern '%s' but was %s", pattern, format_arg(actual) )
end
if string_find(actual, pattern) then
failure( "assert_not_match", msg, "expected '%s' to not match pattern '%s' but it does", actual, pattern )
end
return actual
end
traceback_hide( assert_not_match )
function assert_error(msg, func)
stats.assertions = stats.assertions + 1
if func == nil then
func, msg = msg, nil
end
if type(func) ~= "function" then
failure( "assert_error", msg, "expected a function as last argument but was %s", format_arg(func) )
end
local ok, errmsg = pcall(func)
if ok then
failure( "assert_error", msg, "error expected but no error occurred" )
end
end
traceback_hide( assert_error )
function assert_error_match(msg, pattern, func)
stats.assertions = stats.assertions + 1
if func == nil then
msg, pattern, func = nil, msg, pattern
end
if type(pattern) ~= "string" then
failure( "assert_error_match", msg, "expected the pattern as a string but was %s", format_arg(pattern) )
end
if type(func) ~= "function" then
failure( "assert_error_match", msg, "expected a function as last argument but was %s", format_arg(func) )
end
local ok, errmsg = pcall(func)
if ok then
failure( "assert_error_match", msg, "error expected but no error occurred" )
end
if type(errmsg) ~= "string" then
failure( "assert_error_match", msg, "error as string expected but was %s", format_arg(errmsg) )
end
if not string_find(errmsg, pattern) then
failure( "assert_error_match", msg, "expected error '%s' to match pattern '%s' but doesn't", errmsg, pattern )
end
end
traceback_hide( assert_error_match )
function assert_pass(msg, func)
stats.assertions = stats.assertions + 1
if func == nil then
func, msg = msg, nil
end
if type(func) ~= "function" then
failure( "assert_pass", msg, "expected a function as last argument but was %s", format_arg(func) )
end
local ok, errmsg = pcall(func)
if not ok then
failure( "assert_pass", msg, "no error expected but error was: '%s'", errmsg )
end
end
traceback_hide( assert_pass )
-- lunit.assert_typename functions
for _, typename in ipairs(typenames) do
local assert_typename = "assert_"..typename
lunit[assert_typename] = function(actual, msg)
stats.assertions = stats.assertions + 1
if type(actual) ~= typename then
failure( assert_typename, msg, "%s expected but was %s", typename, format_arg(actual) )
end
return actual
end
traceback_hide( lunit[assert_typename] )
end
-- lunit.assert_not_typename functions
for _, typename in ipairs(typenames) do
local assert_not_typename = "assert_not_"..typename
lunit[assert_not_typename] = function(actual, msg)
stats.assertions = stats.assertions + 1
if type(actual) == typename then
failure( assert_not_typename, msg, typename.." not expected but was one" )
end
end
traceback_hide( lunit[assert_not_typename] )
end
function lunit.clearstats()
stats = {
assertions = 0;
passed = 0;
failed = 0;
errors = 0;
}
end
local report, reporterrobj
do
local testrunner
function lunit.setrunner(newrunner)
if not ( is_table(newrunner) or is_nil(newrunner) ) then
return error("lunit.setrunner: Invalid argument", 0)
end
local oldrunner = testrunner
testrunner = newrunner
return oldrunner
end
function lunit.loadrunner(name)
if not is_string(name) then
return error("lunit.loadrunner: Invalid argument", 0)
end
local ok, runner = pcall( require, name )
if not ok then
return error("lunit.loadrunner: Can't load test runner: "..runner, 0)
end
return setrunner(runner)
end
function lunit.getrunner()
return testrunner
end
function report(event, ...)
local f = testrunner and testrunner[event]
if is_function(f) then
pcall(f, ...)
end
end
function reporterrobj(context, tcname, testname, errobj)
local fullname = tcname .. "." .. testname
if context == "setup" then
fullname = fullname .. ":" .. setupname(tcname, testname)
elseif context == "teardown" then
fullname = fullname .. ":" .. teardownname(tcname, testname)
end
if errobj.type == __failure__ then
stats.failed = stats.failed + 1
report("fail", fullname, errobj.where, errobj.msg, errobj.usermsg)
else
stats.errors = stats.errors + 1
report("err", fullname, errobj.msg, errobj.tb)
end
end
end
local function key_iter(t, k)
return (next(t,k))
end
local testcase
do
-- Array with all registered testcases
local _testcases = {}
-- Marks a module as a testcase.
-- Applied over a module from module("xyz", lunit.testcase).
function lunit.testcase(m)
orig_assert( is_table(m) )
--orig_assert( m._M == m )
orig_assert( is_string(m._NAME) )
--orig_assert( is_string(m._PACKAGE) )
-- Register the module as a testcase
_testcases[m._NAME] = m
-- Import lunit, fail, assert* and is_* function to the module/testcase
m.lunit = lunit
m.fail = lunit.fail
for funcname, func in pairs(lunit) do
if "assert" == string_sub(funcname, 1, 6) or "is_" == string_sub(funcname, 1, 3) then
m[funcname] = func
end
end
end
function lunit.module(name,seeall)
local m = {}
if seeall == "seeall" then
setmetatable(m, { __index = _G })
end
m._NAME = name
lunit.testcase(m)
return m
end
-- Iterator (testcasename) over all Testcases
function lunit.testcases()
-- Make a copy of testcases to prevent confusing the iterator when
-- new testcase are defined
local _testcases2 = {}
for k,v in pairs(_testcases) do
_testcases2[k] = true
end
return key_iter, _testcases2, nil
end
function testcase(tcname)
return _testcases[tcname]
end
end
do
-- Finds a function in a testcase case insensitive
local function findfuncname(tcname, name)
for key, value in pairs(testcase(tcname)) do
if is_string(key) and is_function(value) and string_lower(key) == name then
return key
end
end
end
function lunit.setupname(tcname)
return findfuncname(tcname, "setup")
end
function lunit.teardownname(tcname)
return findfuncname(tcname, "teardown")
end
-- Iterator over all test names in a testcase.
-- Have to collect the names first in case one of the test
-- functions creates a new global and throws off the iteration.
function lunit.tests(tcname)
local testnames = {}
for key, value in pairs(testcase(tcname)) do
if is_string(key) and is_function(value) then
local lfn = string_lower(key)
if string_sub(lfn, 1, 4) == "test" or string_sub(lfn, -4) == "test" then
testnames[key] = true
end
end
end
return key_iter, testnames, nil
end
end
function lunit.runtest(tcname, testname)
orig_assert( is_string(tcname) )
orig_assert( is_string(testname) )
if (not getrunner()) then
loadrunner("console")
end
local function callit(context, func)
if func then
local err = mypcall(func)
if err then
reporterrobj(context, tcname, testname, err)
return false
end
end
return true
end
traceback_hide(callit)
report("run", tcname, testname)
local tc = testcase(tcname)
local setup = tc[setupname(tcname)]
local test = tc[testname]
local teardown = tc[teardownname(tcname)]
local setup_ok = callit( "setup", setup )
local test_ok = setup_ok and callit( "test", test )
local teardown_ok = setup_ok and callit( "teardown", teardown )
if setup_ok and test_ok and teardown_ok then
stats.passed = stats.passed + 1
report("pass", tcname, testname)
end
end
traceback_hide(runtest)
function lunit.run(testpatterns)
clearstats()
report("begin")
for testcasename in lunit.testcases() do
-- Run tests in the testcases
for testname in lunit.tests(testcasename) do
if selected(testpatterns, testname) then
runtest(testcasename, testname)
end
end
end
report("done")
return stats
end
traceback_hide(run)
function lunit.loadonly()
clearstats()
report("begin")
report("done")
return stats
end
local lunitpat2luapat
do
local conv = {
["^"] = "%^",
["$"] = "%$",
["("] = "%(",
[")"] = "%)",
["%"] = "%%",
["."] = "%.",
["["] = "%[",
["]"] = "%]",
["+"] = "%+",
["-"] = "%-",
["?"] = ".",
["*"] = ".*"
}
function lunitpat2luapat(str)
--return "^" .. string.gsub(str, "%W", conv) .. "$"
-- Above was very annoying, if I want to run all the tests having to do with
-- RSS, I want to be able to do "-t rss" not "-t \*rss\*".
return string_gsub(str, "%W", conv)
end
end
local function in_patternmap(map, name)
if map[name] == true then
return true
else
for _, pat in ipairs(map) do
if string_find(name, pat) then
return true
end
end
end
return false
end
-- Called from 'lunit' shell script.
function main(argv)
argv = argv or {}
-- FIXME: Error handling and error messages aren't nice.
local function checkarg(optname, arg)
if not is_string(arg) then
return error("lunit.main: option "..optname..": argument missing.", 0)
end
end
local function loadtestcase(filename)
if not is_string(filename) then
return error("lunit.main: invalid argument")
end
local chunk, err = loadfile(filename)
if err then
return error(err)
else
chunk()
end
end
local testpatterns = nil
local doloadonly = false
local i = 0
while i < #argv do
i = i + 1
local arg = argv[i]
if arg == "--loadonly" then
doloadonly = true
elseif arg == "--runner" or arg == "-r" then
local optname = arg; i = i + 1; arg = argv[i]
checkarg(optname, arg)
loadrunner(arg)
elseif arg == "--test" or arg == "-t" then
local optname = arg; i = i + 1; arg = argv[i]
checkarg(optname, arg)
testpatterns = testpatterns or {}
testpatterns[#testpatterns+1] = arg
elseif arg == "--help" or arg == "-h" then
print[[
lunit 0.5
Copyright (c) 2004-2009 Michael Roth <mroth@nessie.de>
This program comes WITHOUT WARRANTY OF ANY KIND.
Usage: lua test [OPTIONS] [--] scripts
Options:
-r, --runner RUNNER Testrunner to use, defaults to 'lunit-console'.
-t, --test PATTERN Which tests to run, may contain * or ? wildcards.
--loadonly Only load the tests.
-h, --help Print this help screen.
Please report bugs to <mroth@nessie.de>.
]]
return
elseif arg == "--" then
while i < #argv do
i = i + 1; arg = argv[i]
loadtestcase(arg)
end
else
loadtestcase(arg)
end
end
if doloadonly then
return loadonly()
else
return run(testpatterns)
end
end
clearstats()
return lunit

@ -0,0 +1,81 @@
#!/usr/bin/python
import sys
import re
import os
INCLUDE_RE = re.compile('^#include "([^"]*)"$')
def parse_include(line):
match = INCLUDE_RE.match(line)
return match.groups()[0] if match else None
class Amalgamator:
def __init__(self, output_path):
self.include_paths = ["."]
self.included = set(["upb/port_def.inc", "upb/port_undef.inc"])
self.output_h = open(output_path + "upb.h", "w")
self.output_c = open(output_path + "upb.c", "w")
self.output_c.write("/* Amalgamated source file */\n")
self.output_c.write('#include "upb.h"\n')
self.output_c.write(open("upb/port_def.inc").read())
self.output_h.write("/* Amalgamated source file */\n")
self.output_h.write('#include <stdint.h>')
self.output_h.write(open("upb/port_def.inc").read())
def add_include_path(self, path):
self.include_paths.append(path)
def finish(self):
self.output_c.write(open("upb/port_undef.inc").read())
self.output_h.write(open("upb/port_undef.inc").read())
def _process_file(self, infile_name, outfile):
file = None
for path in self.include_paths:
try:
full_path = os.path.join(path, infile_name)
file = open(full_path)
break
except IOError:
pass
if not file:
raise RuntimeError("Couldn't open file " + infile_name)
for line in file:
include = parse_include(line)
if include is not None and (include.startswith("upb") or
include.startswith("google")):
if include not in self.included:
self.included.add(include)
self._add_header(include)
else:
outfile.write(line)
def _add_header(self, filename):
self._process_file(filename, self.output_h)
def add_src(self, filename):
self._process_file(filename, self.output_c)
# ---- main ----
output_path = sys.argv[1]
amalgamator = Amalgamator(output_path)
files = []
for arg in sys.argv[2:]:
arg = arg.strip()
if arg.startswith("-I"):
amalgamator.add_include_path(arg[2:])
elif arg.endswith(".h") or arg.endswith(".inc"):
pass
else:
files.append(arg)
for filename in files:
amalgamator.add_src(filename)
amalgamator.finish()

@ -0,0 +1,279 @@
#!/usr/bin/env python
"""TODO(haberman): DO NOT SUBMIT without one-line documentation for make_cmakelists.
TODO(haberman): DO NOT SUBMIT without a detailed description of make_cmakelists.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import textwrap
import os
def StripColons(deps):
return map(lambda x: x[1:], deps)
def IsSourceFile(name):
return name.endswith(".c") or name.endswith(".cc")
class BuildFileFunctions(object):
def __init__(self, converter):
self.converter = converter
def _add_deps(self, kwargs, keyword=""):
if "deps" not in kwargs:
return
self.converter.toplevel += "target_link_libraries(%s%s\n %s)\n" % (
kwargs["name"],
keyword,
"\n ".join(StripColons(kwargs["deps"]))
)
def load(self, *args):
pass
def cc_library(self, **kwargs):
if kwargs["name"] == "amalgamation" or kwargs["name"] == "upbc_generator":
return
files = kwargs.get("srcs", []) + kwargs.get("hdrs", [])
found_files = []
for file in files:
if os.path.isfile(file):
found_files.append(file)
elif os.path.isfile("generated_for_cmake/" + file):
found_files.append("generated_for_cmake/" + file)
else:
print("Warning: no such file: " + file)
if list(filter(IsSourceFile, files)):
# Has sources, make this a normal library.
self.converter.toplevel += "add_library(%s\n %s)\n" % (
kwargs["name"],
"\n ".join(found_files)
)
self._add_deps(kwargs)
else:
# Header-only library, have to do a couple things differently.
# For some info, see:
# http://mariobadr.com/creating-a-header-only-library-with-cmake.html
self.converter.toplevel += "add_library(%s INTERFACE)\n" % (
kwargs["name"]
)
self._add_deps(kwargs, " INTERFACE")
def cc_binary(self, **kwargs):
pass
def cc_test(self, **kwargs):
# Disable this until we properly support upb_proto_library().
# self.converter.toplevel += "add_executable(%s\n %s)\n" % (
# kwargs["name"],
# "\n ".join(kwargs["srcs"])
# )
# self.converter.toplevel += "add_test(NAME %s COMMAND %s)\n" % (
# kwargs["name"],
# kwargs["name"],
# )
# if "data" in kwargs:
# for data_dep in kwargs["data"]:
# self.converter.toplevel += textwrap.dedent("""\
# add_custom_command(
# TARGET %s POST_BUILD
# COMMAND ${CMAKE_COMMAND} -E copy
# ${CMAKE_SOURCE_DIR}/%s
# ${CMAKE_CURRENT_BINARY_DIR}/%s)\n""" % (
# kwargs["name"], data_dep, data_dep
# ))
# self._add_deps(kwargs)
pass
def py_library(self, **kwargs):
pass
def py_binary(self, **kwargs):
pass
def lua_cclibrary(self, **kwargs):
pass
def lua_library(self, **kwargs):
pass
def lua_binary(self, **kwargs):
pass
def lua_test(self, **kwargs):
pass
def sh_test(self, **kwargs):
pass
def make_shell_script(self, **kwargs):
pass
def exports_files(self, files, **kwargs):
pass
def proto_library(self, **kwargs):
pass
def generated_file_staleness_test(self, **kwargs):
pass
def upb_amalgamation(self, **kwargs):
pass
def upb_proto_library(self, **kwargs):
pass
def upb_proto_reflection_library(self, **kwargs):
pass
def upb_proto_srcs(self, **kwargs):
pass
def genrule(self, **kwargs):
pass
def config_setting(self, **kwargs):
pass
def select(self, arg_dict):
return []
def glob(self, *args):
return []
def licenses(self, *args):
pass
def filegroup(self, **kwargs):
pass
def map_dep(self, arg):
return arg
class WorkspaceFileFunctions(object):
def __init__(self, converter):
self.converter = converter
def load(self, *args):
pass
def workspace(self, **kwargs):
self.converter.prelude += "project(%s)\n" % (kwargs["name"])
def http_archive(self, **kwargs):
pass
def git_repository(self, **kwargs):
pass
def bazel_version_repository(self, **kwargs):
pass
def upb_deps(self):
pass
class Converter(object):
def __init__(self):
self.prelude = ""
self.toplevel = ""
self.if_lua = ""
def convert(self):
return self.template % {
"prelude": converter.prelude,
"toplevel": converter.toplevel,
}
template = textwrap.dedent("""\
# This file was generated from BUILD using tools/make_cmakelists.py.
cmake_minimum_required(VERSION 3.1)
if(${CMAKE_VERSION} VERSION_LESS 3.12)
cmake_policy(VERSION ${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION})
else()
cmake_policy(VERSION 3.12)
endif()
cmake_minimum_required (VERSION 3.0)
cmake_policy(SET CMP0048 NEW)
%(prelude)s
# Prevent CMake from setting -rdynamic on Linux (!!).
SET(CMAKE_SHARED_LIBRARY_LINK_C_FLAGS "")
SET(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "")
# Set default build type.
if(NOT CMAKE_BUILD_TYPE)
message(STATUS "Setting build type to 'RelWithDebInfo' as none was specified.")
set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING
"Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel."
FORCE)
endif()
# When using Ninja, compiler output won't be colorized without this.
include(CheckCXXCompilerFlag)
CHECK_CXX_COMPILER_FLAG(-fdiagnostics-color=always SUPPORTS_COLOR_ALWAYS)
if(SUPPORTS_COLOR_ALWAYS)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color=always")
endif()
# Implement ASAN/UBSAN options
if(UPB_ENABLE_ASAN)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fsanitize=address")
endif()
if(UPB_ENABLE_UBSAN)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fsanitize=address")
endif()
include_directories(.)
include_directories(generated_for_cmake)
include_directories(${CMAKE_CURRENT_BINARY_DIR})
if(APPLE)
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -undefined dynamic_lookup -flat_namespace")
elseif(UNIX)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--build-id")
endif()
enable_testing()
%(toplevel)s
""")
data = {}
converter = Converter()
def GetDict(obj):
ret = {}
for k in dir(obj):
if not k.startswith("_"):
ret[k] = getattr(obj, k);
return ret
globs = GetDict(converter)
exec(open("WORKSPACE").read(), GetDict(WorkspaceFileFunctions(converter)))
exec(open("BUILD").read(), GetDict(BuildFileFunctions(converter)))
with open(sys.argv[1], "w") as f:
f.write(converter.convert())

@ -0,0 +1,30 @@
"""The py_test() script for generated_file_staleness_test() rules.
Note that this file is preprocessed! The INSERT_<...> text below is replaced
with the actual list of files before we actually run the script.
"""
from __future__ import absolute_import
from tools import staleness_test_lib
import unittest
import sys
file_list = """
INSERT_FILE_LIST_HERE
""".split()
config = staleness_test_lib.Config(file_list)
class TestFilesMatch(unittest.TestCase):
def testFilesMatch(self):
errors = staleness_test_lib.CheckFilesMatch(config)
self.assertFalse(errors, errors)
if len(sys.argv) > 1 and sys.argv[1] == "--fix":
staleness_test_lib.FixFiles(config)
else:
unittest.main()

@ -0,0 +1,158 @@
"""Shared code for validating generated_file_staleness_test() rules.
This code is used by test scripts generated from
generated_file_staleness_test() rules.
"""
from __future__ import absolute_import
from __future__ import print_function
import os
from shutil import copyfile
class _FilePair(object):
"""Represents a single (target, generated) file pair."""
def __init__(self, target, generated):
self.target = target
self.generated = generated
class Config(object):
"""Represents the configuration for a single staleness test target."""
def __init__(self, file_list):
# Duplicate to avoid modifying our arguments.
file_list = list(file_list)
# The file list contains a few other bits of information at the end.
# This is packed by the code in build_defs.bzl.
self.target_name = file_list.pop()
self.package_name = file_list.pop()
self.pattern = file_list.pop()
self.file_list = file_list
def _GetFilePairs(config):
"""Generates the list of file pairs.
Args:
config: a Config object representing this target's config.
Returns:
A list of _FilePair objects.
"""
ret = []
has_bazel_genfiles = os.path.exists("bazel-genfiles")
for filename in config.file_list:
target = os.path.join(config.package_name, filename)
generated = os.path.join(config.package_name, config.pattern % filename)
if has_bazel_genfiles:
generated = os.path.join("bazel-genfiles", generated)
# Generated files should always exist. Blaze should guarantee this before
# we are run.
if not os.path.isfile(generated):
print("Generated file '%s' does not exist." % generated)
print("Please run this command to generate it:")
print(" bazel build %s:%s" % (config.package_name, config.target_name))
ret.append(_FilePair(target, generated))
return ret
def _GetMissingAndStaleFiles(file_pairs):
"""Generates lists of missing and stale files.
Args:
file_pairs: a list of _FilePair objects.
Returns:
missing_files: a list of _FilePair objects representing missing files.
These target files do not exist at all.
stale_files: a list of _FilePair objects representing stale files.
These target files exist but have stale contents.
"""
missing_files = []
stale_files = []
for pair in file_pairs:
if not os.path.isfile(pair.target):
missing_files.append(pair)
continue
generated = open(pair.generated).read()
target = open(pair.target).read()
if generated != target:
stale_files.append(pair)
return missing_files, stale_files
def _CopyFiles(file_pairs):
"""Copies all generated files to the corresponding target file.
The target files must be writable already.
Args:
file_pairs: a list of _FilePair objects that we want to copy.
"""
for pair in file_pairs:
target_dir = os.path.dirname(pair.target)
if not os.path.isdir(target_dir):
os.makedirs(target_dir)
copyfile(pair.generated, pair.target)
def FixFiles(config):
"""Implements the --fix option: overwrites missing or out-of-date files.
Args:
config: the Config object for this test.
"""
file_pairs = _GetFilePairs(config)
missing_files, stale_files = _GetMissingAndStaleFiles(file_pairs)
_CopyFiles(stale_files + missing_files)
def CheckFilesMatch(config):
"""Checks whether each target file matches the corresponding generated file.
Args:
config: the Config object for this test.
Returns:
None if everything matches, otherwise a string error message.
"""
diff_errors = []
file_pairs = _GetFilePairs(config)
missing_files, stale_files = _GetMissingAndStaleFiles(file_pairs)
for pair in missing_files:
diff_errors.append("File %s does not exist" % pair.target)
continue
for pair in stale_files:
diff_errors.append("File %s is out of date" % pair.target)
if diff_errors:
error_msg = "Files out of date!\n\n"
error_msg += "To fix run THIS command:\n"
error_msg += " bazel-bin/%s/%s --fix\n\n" % (config.package_name,
config.target_name)
error_msg += "Errors:\n"
error_msg += " " + "\n ".join(diff_errors)
return error_msg
else:
return None

@ -0,0 +1,5 @@
This directory contains code that interfaces upb with external C/C++
libraries. Right now this is:
* upb/bindings/lua:
a Lua extension that exposes upb to Lua programs via the Lua C API.

@ -0,0 +1,766 @@
#include <float.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include "lauxlib.h"
#include "upb/bindings/lua/upb.h"
#include "upb/def.h"
#define LUPB_ENUMDEF "lupb.enumdef"
#define LUPB_FIELDDEF "lupb.fielddef"
#define LUPB_FILEDEF "lupb.filedef"
#define LUPB_MSGDEF "lupb.msgdef"
#define LUPB_ONEOFDEF "lupb.oneof"
#define LUPB_SYMTAB "lupb.symtab"
#define LUPB_OBJCACHE "lupb.objcache"
#define CHK(pred) \
do { \
upb_status status; \
upb_status_clear(&status); \
pred; \
lupb_checkstatus(L, &status); \
} while (0)
/* lupb_wrapper ***************************************************************/
/* Wrappers around upb objects. */
/* Checks type; if it matches, pulls the pointer out of the wrapper. */
void *lupb_checkwrapper(lua_State *L, int narg, const char *type) {
void *ud = lua_touserdata(L, narg);
void *ret;
if (!ud) {
luaL_typerror(L, narg, "upb wrapper");
}
memcpy(&ret, ud, sizeof(ret));
if (!ret) {
luaL_error(L, "called into dead object");
}
luaL_checkudata(L, narg, type);
return ret;
}
void lupb_pushwrapper(lua_State *L, const void *obj, const char *type) {
void *ud;
if (obj == NULL) {
lua_pushnil(L);
return;
}
/* Lookup our cache in the registry (we don't put our objects in the registry
* directly because we need our cache to be a weak table). */
lua_getfield(L, LUA_REGISTRYINDEX, LUPB_OBJCACHE);
UPB_ASSERT(!lua_isnil(L, -1)); /* Should have been created by luaopen_upb. */
lua_pushlightuserdata(L, (void*)obj);
lua_rawget(L, -2);
/* Stack is now: objcache, cached value. */
if (lua_isnil(L, -1)) {
/* Remove bad cached value and push new value. */
lua_pop(L, 1);
ud = lua_newuserdata(L, sizeof(*ud));
memcpy(ud, &obj, sizeof(*ud));
luaL_getmetatable(L, type);
/* Should have been created by luaopen_upb. */
lupb_assert(L, !lua_isnil(L, -1));
lua_setmetatable(L, -2);
/* Set it in the cache. */
lua_pushlightuserdata(L, (void*)obj);
lua_pushvalue(L, -2);
lua_rawset(L, -4);
}
lua_insert(L, -2);
lua_pop(L, 1);
}
void lupb_msgdef_pushwrapper(lua_State *L, const upb_msgdef *m);
void lupb_oneofdef_pushwrapper(lua_State *L, const upb_oneofdef *o);
static void lupb_enumdef_pushwrapper(lua_State *L, const upb_enumdef *e);
/* lupb_fielddef **************************************************************/
void lupb_fielddef_pushwrapper(lua_State *L, const upb_fielddef *f) {
lupb_pushwrapper(L, f, LUPB_FIELDDEF);
}
const upb_fielddef *lupb_fielddef_check(lua_State *L, int narg) {
return lupb_checkwrapper(L, narg, LUPB_FIELDDEF);
}
static int lupb_fielddef_containingoneof(lua_State *L) {
const upb_fielddef *f = lupb_fielddef_check(L, 1);
lupb_oneofdef_pushwrapper(L, upb_fielddef_containingoneof(f));
return 1;
}
static int lupb_fielddef_containingtype(lua_State *L) {
const upb_fielddef *f = lupb_fielddef_check(L, 1);
lupb_msgdef_pushwrapper(L, upb_fielddef_containingtype(f));
return 1;
}
static int lupb_fielddef_default(lua_State *L) {
const upb_fielddef *f = lupb_fielddef_check(L, 1);
switch (upb_fielddef_type(f)) {
case UPB_TYPE_INT32:
case UPB_TYPE_ENUM:
lupb_pushint32(L, upb_fielddef_defaultint32(f)); break;
case UPB_TYPE_INT64:
lupb_pushint64(L, upb_fielddef_defaultint64(f)); break;
case UPB_TYPE_UINT32:
lupb_pushuint32(L, upb_fielddef_defaultuint32(f)); break;
case UPB_TYPE_UINT64:
lupb_pushuint64(L, upb_fielddef_defaultuint64(f)); break;
case UPB_TYPE_DOUBLE:
lua_pushnumber(L, upb_fielddef_defaultdouble(f)); break;
case UPB_TYPE_FLOAT:
lua_pushnumber(L, upb_fielddef_defaultfloat(f)); break;
case UPB_TYPE_BOOL:
lua_pushboolean(L, upb_fielddef_defaultbool(f)); break;
case UPB_TYPE_STRING:
case UPB_TYPE_BYTES: {
size_t len;
const char *data = upb_fielddef_defaultstr(f, &len);
lua_pushlstring(L, data, len);
break;
}
case UPB_TYPE_MESSAGE:
return luaL_error(L, "Message fields do not have explicit defaults.");
}
return 1;
}
static int lupb_fielddef_descriptortype(lua_State *L) {
const upb_fielddef *f = lupb_fielddef_check(L, 1);
lua_pushnumber(L, upb_fielddef_descriptortype(f));
return 1;
}
static int lupb_fielddef_getsel(lua_State *L) {
const upb_fielddef *f = lupb_fielddef_check(L, 1);
upb_selector_t sel;
if (upb_handlers_getselector(f, luaL_checknumber(L, 2), &sel)) {
lua_pushinteger(L, sel);
return 1;
} else {
return 0;
}
}
static int lupb_fielddef_hassubdef(lua_State *L) {
const upb_fielddef *f = lupb_fielddef_check(L, 1);
lua_pushboolean(L, upb_fielddef_hassubdef(f));
return 1;
}
static int lupb_fielddef_index(lua_State *L) {
const upb_fielddef *f = lupb_fielddef_check(L, 1);
lua_pushinteger(L, upb_fielddef_index(f));
return 1;
}
static int lupb_fielddef_isextension(lua_State *L) {
const upb_fielddef *f = lupb_fielddef_check(L, 1);
lua_pushboolean(L, upb_fielddef_isextension(f));
return 1;
}
static int lupb_fielddef_label(lua_State *L) {
const upb_fielddef *f = lupb_fielddef_check(L, 1);
lua_pushinteger(L, upb_fielddef_label(f));
return 1;
}
static int lupb_fielddef_lazy(lua_State *L) {
const upb_fielddef *f = lupb_fielddef_check(L, 1);
lua_pushboolean(L, upb_fielddef_lazy(f));
return 1;
}
static int lupb_fielddef_name(lua_State *L) {
const upb_fielddef *f = lupb_fielddef_check(L, 1);
lua_pushstring(L, upb_fielddef_name(f));
return 1;
}
static int lupb_fielddef_number(lua_State *L) {
const upb_fielddef *f = lupb_fielddef_check(L, 1);
int32_t num = upb_fielddef_number(f);
if (num)
lua_pushinteger(L, num);
else
lua_pushnil(L);
return 1;
}
static int lupb_fielddef_packed(lua_State *L) {
const upb_fielddef *f = lupb_fielddef_check(L, 1);
lua_pushboolean(L, upb_fielddef_packed(f));
return 1;
}
static int lupb_fielddef_msgsubdef(lua_State *L) {
const upb_fielddef *f = lupb_fielddef_check(L, 1);
lupb_msgdef_pushwrapper(L, upb_fielddef_msgsubdef(f));
return 1;
}
static int lupb_fielddef_enumsubdef(lua_State *L) {
const upb_fielddef *f = lupb_fielddef_check(L, 1);
lupb_enumdef_pushwrapper(L, upb_fielddef_enumsubdef(f));
return 1;
}
static int lupb_fielddef_type(lua_State *L) {
const upb_fielddef *f = lupb_fielddef_check(L, 1);
if (upb_fielddef_typeisset(f))
lua_pushinteger(L, upb_fielddef_type(f));
else
lua_pushnil(L);
return 1;
}
static const struct luaL_Reg lupb_fielddef_m[] = {
{"containing_oneof", lupb_fielddef_containingoneof},
{"containing_type", lupb_fielddef_containingtype},
{"default", lupb_fielddef_default},
{"descriptor_type", lupb_fielddef_descriptortype},
{"getsel", lupb_fielddef_getsel},
{"has_subdef", lupb_fielddef_hassubdef},
{"index", lupb_fielddef_index},
{"is_extension", lupb_fielddef_isextension},
{"label", lupb_fielddef_label},
{"lazy", lupb_fielddef_lazy},
{"name", lupb_fielddef_name},
{"number", lupb_fielddef_number},
{"packed", lupb_fielddef_packed},
{"msgsubdef", lupb_fielddef_msgsubdef},
{"enumsubdef", lupb_fielddef_enumsubdef},
{"type", lupb_fielddef_type},
{NULL, NULL}
};
/* lupb_oneofdef **************************************************************/
void lupb_oneofdef_pushwrapper(lua_State *L, const upb_oneofdef *o) {
lupb_pushwrapper(L, o, LUPB_ONEOFDEF);
}
const upb_oneofdef *lupb_oneofdef_check(lua_State *L, int narg) {
return lupb_checkwrapper(L, narg, LUPB_ONEOFDEF);
}
static int lupb_oneofdef_containingtype(lua_State *L) {
const upb_oneofdef *o = lupb_oneofdef_check(L, 1);
lupb_msgdef_pushwrapper(L, upb_oneofdef_containingtype(o));
return 1;
}
static int lupb_oneofdef_field(lua_State *L) {
const upb_oneofdef *o = lupb_oneofdef_check(L, 1);
int type = lua_type(L, 2);
const upb_fielddef *f;
if (type == LUA_TNUMBER) {
f = upb_oneofdef_itof(o, lua_tointeger(L, 2));
} else if (type == LUA_TSTRING) {
f = upb_oneofdef_ntofz(o, lua_tostring(L, 2));
} else {
const char *msg = lua_pushfstring(L, "number or string expected, got %s",
luaL_typename(L, 2));
return luaL_argerror(L, 2, msg);
}
lupb_fielddef_pushwrapper(L, f);
return 1;
}
static int lupb_oneofiter_next(lua_State *L) {
upb_oneof_iter *i = lua_touserdata(L, lua_upvalueindex(1));
if (upb_oneof_done(i)) return 0;
lupb_fielddef_pushwrapper(L, upb_oneof_iter_field(i));
upb_oneof_next(i);
return 1;
}
static int lupb_oneofdef_fields(lua_State *L) {
const upb_oneofdef *o = lupb_oneofdef_check(L, 1);
upb_oneof_iter *i = lua_newuserdata(L, sizeof(upb_oneof_iter));
upb_oneof_begin(i, o);
/* Need to guarantee that the msgdef outlives the iter. */
lua_pushvalue(L, 1);
lua_pushcclosure(L, &lupb_oneofiter_next, 2);
return 1;
}
static int lupb_oneofdef_len(lua_State *L) {
const upb_oneofdef *o = lupb_oneofdef_check(L, 1);
lua_pushinteger(L, upb_oneofdef_numfields(o));
return 1;
}
static int lupb_oneofdef_name(lua_State *L) {
const upb_oneofdef *o = lupb_oneofdef_check(L, 1);
lua_pushstring(L, upb_oneofdef_name(o));
return 1;
}
static const struct luaL_Reg lupb_oneofdef_m[] = {
{"containing_type", lupb_oneofdef_containingtype},
{"field", lupb_oneofdef_field},
{"fields", lupb_oneofdef_fields},
{"name", lupb_oneofdef_name},
{NULL, NULL}
};
static const struct luaL_Reg lupb_oneofdef_mm[] = {
{"__len", lupb_oneofdef_len},
{NULL, NULL}
};
/* lupb_msgdef ****************************************************************/
typedef struct {
const upb_msgdef *md;
} lupb_msgdef;
void lupb_msgdef_pushwrapper(lua_State *L, const upb_msgdef *m) {
lupb_pushwrapper(L, m, LUPB_MSGDEF);
}
const upb_msgdef *lupb_msgdef_check(lua_State *L, int narg) {
return lupb_checkwrapper(L, narg, LUPB_MSGDEF);
}
static int lupb_msgdef_len(lua_State *L) {
const upb_msgdef *m = lupb_msgdef_check(L, 1);
lua_pushinteger(L, upb_msgdef_numfields(m));
return 1;
}
static int lupb_msgdef_field(lua_State *L) {
const upb_msgdef *m = lupb_msgdef_check(L, 1);
int type = lua_type(L, 2);
const upb_fielddef *f;
if (type == LUA_TNUMBER) {
f = upb_msgdef_itof(m, lua_tointeger(L, 2));
} else if (type == LUA_TSTRING) {
f = upb_msgdef_ntofz(m, lua_tostring(L, 2));
} else {
const char *msg = lua_pushfstring(L, "number or string expected, got %s",
luaL_typename(L, 2));
return luaL_argerror(L, 2, msg);
}
lupb_fielddef_pushwrapper(L, f);
return 1;
}
static int lupb_msgdef_lookupname(lua_State *L) {
const upb_msgdef *m = lupb_msgdef_check(L, 1);
const upb_fielddef *f;
const upb_oneofdef *o;
if (!upb_msgdef_lookupnamez(m, lua_tostring(L, 2), &f, &o)) {
lua_pushnil(L);
} else if (o) {
lupb_oneofdef_pushwrapper(L, o);
} else {
lupb_fielddef_pushwrapper(L, f);
}
return 1;
}
static int lupb_msgfielditer_next(lua_State *L) {
upb_msg_field_iter *i = lua_touserdata(L, lua_upvalueindex(1));
if (upb_msg_field_done(i)) return 0;
lupb_fielddef_pushwrapper(L, upb_msg_iter_field(i));
upb_msg_field_next(i);
return 1;
}
static int lupb_msgdef_fields(lua_State *L) {
const upb_msgdef *m = lupb_msgdef_check(L, 1);
upb_msg_field_iter *i = lua_newuserdata(L, sizeof(upb_msg_field_iter));
upb_msg_field_begin(i, m);
/* Need to guarantee that the msgdef outlives the iter. */
lua_pushvalue(L, 1);
lua_pushcclosure(L, &lupb_msgfielditer_next, 2);
return 1;
}
static int lupb_msgoneofiter_next(lua_State *L) {
upb_msg_oneof_iter *i = lua_touserdata(L, lua_upvalueindex(1));
if (upb_msg_oneof_done(i)) return 0;
lupb_oneofdef_pushwrapper(L, upb_msg_iter_oneof(i));
upb_msg_oneof_next(i);
return 1;
}
static int lupb_msgdef_oneofs(lua_State *L) {
const upb_msgdef *m = lupb_msgdef_check(L, 1);
upb_msg_oneof_iter *i = lua_newuserdata(L, sizeof(upb_msg_oneof_iter));
upb_msg_oneof_begin(i, m);
/* Need to guarantee that the msgdef outlives the iter. */
lua_pushvalue(L, 1);
lua_pushcclosure(L, &lupb_msgoneofiter_next, 2);
return 1;
}
static int lupb_msgdef_mapentry(lua_State *L) {
const upb_msgdef *m = lupb_msgdef_check(L, 1);
lua_pushboolean(L, upb_msgdef_mapentry(m));
return 1;
}
static int lupb_msgdef_syntax(lua_State *L) {
const upb_msgdef *m = lupb_msgdef_check(L, 1);
lua_pushinteger(L, upb_msgdef_syntax(m));
return 1;
}
static const struct luaL_Reg lupb_msgdef_mm[] = {
{"__len", lupb_msgdef_len},
{NULL, NULL}
};
static const struct luaL_Reg lupb_msgdef_m[] = {
{"field", lupb_msgdef_field},
{"fields", lupb_msgdef_fields},
{"lookup_name", lupb_msgdef_lookupname},
{"oneofs", lupb_msgdef_oneofs},
{"syntax", lupb_msgdef_syntax},
{"_map_entry", lupb_msgdef_mapentry},
{NULL, NULL}
};
/* lupb_enumdef ***************************************************************/
const upb_enumdef *lupb_enumdef_check(lua_State *L, int narg) {
return lupb_checkwrapper(L, narg, LUPB_ENUMDEF);
}
static void lupb_enumdef_pushwrapper(lua_State *L, const upb_enumdef *e) {
lupb_pushwrapper(L, e, LUPB_ENUMDEF);
}
static int lupb_enumdef_len(lua_State *L) {
const upb_enumdef *e = lupb_enumdef_check(L, 1);
lua_pushinteger(L, upb_enumdef_numvals(e));
return 1;
}
static int lupb_enumdef_value(lua_State *L) {
const upb_enumdef *e = lupb_enumdef_check(L, 1);
int type = lua_type(L, 2);
if (type == LUA_TNUMBER) {
/* Pushes "nil" for a NULL pointer. */
int32_t key = lupb_checkint32(L, 2);
lua_pushstring(L, upb_enumdef_iton(e, key));
} else if (type == LUA_TSTRING) {
const char *key = lua_tostring(L, 2);
int32_t num;
if (upb_enumdef_ntoiz(e, key, &num)) {
lua_pushinteger(L, num);
} else {
lua_pushnil(L);
}
} else {
const char *msg = lua_pushfstring(L, "number or string expected, got %s",
luaL_typename(L, 2));
return luaL_argerror(L, 2, msg);
}
return 1;
}
static int lupb_enumiter_next(lua_State *L) {
upb_enum_iter *i = lua_touserdata(L, lua_upvalueindex(1));
if (upb_enum_done(i)) return 0;
lua_pushstring(L, upb_enum_iter_name(i));
lua_pushinteger(L, upb_enum_iter_number(i));
upb_enum_next(i);
return 2;
}
static int lupb_enumdef_values(lua_State *L) {
const upb_enumdef *e = lupb_enumdef_check(L, 1);
upb_enum_iter *i = lua_newuserdata(L, sizeof(upb_enum_iter));
upb_enum_begin(i, e);
/* Need to guarantee that the enumdef outlives the iter. */
lua_pushvalue(L, 1);
lua_pushcclosure(L, &lupb_enumiter_next, 2);
return 1;
}
static const struct luaL_Reg lupb_enumdef_mm[] = {
{"__len", lupb_enumdef_len},
{NULL, NULL}
};
static const struct luaL_Reg lupb_enumdef_m[] = {
{"value", lupb_enumdef_value},
{"values", lupb_enumdef_values},
{NULL, NULL}
};
/* lupb_filedef ***************************************************************/
void lupb_filedef_pushwrapper(lua_State *L, const upb_filedef *f) {
lupb_pushwrapper(L, f, LUPB_FILEDEF);
}
const upb_filedef *lupb_filedef_check(lua_State *L, int narg) {
return lupb_checkwrapper(L, narg, LUPB_FILEDEF);
}
static int lupb_filedef_dep(lua_State *L) {
const upb_filedef *f = lupb_filedef_check(L, 1);
int index = luaL_checkint(L, 2);
lupb_filedef_pushwrapper(L, upb_filedef_dep(f, index));
return 1;
}
static int lupb_filedef_depcount(lua_State *L) {
const upb_filedef *f = lupb_filedef_check(L, 1);
lua_pushnumber(L, upb_filedef_depcount(f));
return 1;
}
static int lupb_filedef_enum(lua_State *L) {
const upb_filedef *f = lupb_filedef_check(L, 1);
int index = luaL_checkint(L, 2);
lupb_enumdef_pushwrapper(L, upb_filedef_enum(f, index));
return 1;
}
static int lupb_filedef_enumcount(lua_State *L) {
const upb_filedef *f = lupb_filedef_check(L, 1);
lua_pushnumber(L, upb_filedef_enumcount(f));
return 1;
}
static int lupb_filedef_msg(lua_State *L) {
const upb_filedef *f = lupb_filedef_check(L, 1);
int index = luaL_checkint(L, 2);
lupb_msgdef_pushwrapper(L, upb_filedef_msg(f, index));
return 1;
}
static int lupb_filedef_msgcount(lua_State *L) {
const upb_filedef *f = lupb_filedef_check(L, 1);
lua_pushnumber(L, upb_filedef_msgcount(f));
return 1;
}
static int lupb_filedef_name(lua_State *L) {
const upb_filedef *f = lupb_filedef_check(L, 1);
lua_pushstring(L, upb_filedef_name(f));
return 1;
}
static int lupb_filedef_package(lua_State *L) {
const upb_filedef *f = lupb_filedef_check(L, 1);
lua_pushstring(L, upb_filedef_package(f));
return 1;
}
static int lupb_filedef_syntax(lua_State *L) {
const upb_filedef *f = lupb_filedef_check(L, 1);
lua_pushnumber(L, upb_filedef_syntax(f));
return 1;
}
static const struct luaL_Reg lupb_filedef_m[] = {
{"dep", lupb_filedef_dep},
{"depcount", lupb_filedef_depcount},
{"enum", lupb_filedef_enum},
{"enumcount", lupb_filedef_enumcount},
{"msg", lupb_filedef_msg},
{"msgcount", lupb_filedef_msgcount},
{"name", lupb_filedef_name},
{"package", lupb_filedef_package},
{"syntax", lupb_filedef_syntax},
{NULL, NULL}
};
/* lupb_symtab ****************************************************************/
typedef struct {
upb_symtab *symtab;
} lupb_symtab;
upb_symtab *lupb_symtab_check(lua_State *L, int narg) {
lupb_symtab *lsymtab = luaL_checkudata(L, narg, LUPB_SYMTAB);
if (!lsymtab->symtab) {
luaL_error(L, "called into dead object");
}
return lsymtab->symtab;
}
static int lupb_symtab_new(lua_State *L) {
lupb_symtab *lsymtab = lua_newuserdata(L, sizeof(*lsymtab));
lsymtab->symtab = upb_symtab_new();
luaL_getmetatable(L, LUPB_SYMTAB);
lua_setmetatable(L, -2);
return 1;
}
static int lupb_symtab_gc(lua_State *L) {
lupb_symtab *lsymtab = luaL_checkudata(L, 1, LUPB_SYMTAB);
upb_symtab_free(lsymtab->symtab);
lsymtab->symtab = NULL;
return 0;
}
/* TODO(haberman): perhaps this should take a message object instead of a
* serialized string once we have a good story for vending compiled-in
* messages. */
static int lupb_symtab_add(lua_State *L) {
upb_arena *arena;
size_t i, n, len;
const google_protobuf_FileDescriptorProto *const *files;
google_protobuf_FileDescriptorSet *set;
upb_symtab *s = lupb_symtab_check(L, 1);
const char *str = luaL_checklstring(L, 2, &len);
lupb_arena_new(L);
arena = lupb_arena_check(L, -1);
set = google_protobuf_FileDescriptorSet_parse(str, len, arena);
if (!set) {
luaL_argerror(L, 2, "failed to parse descriptor");
}
files = google_protobuf_FileDescriptorSet_file(set, &n);
for (i = 0; i < n; i++) {
CHK(upb_symtab_addfile(s, files[i], &status));
}
return 0;
}
static int lupb_symtab_lookupmsg(lua_State *L) {
const upb_symtab *s = lupb_symtab_check(L, 1);
const upb_msgdef *m = upb_symtab_lookupmsg(s, luaL_checkstring(L, 2));
lupb_msgdef_pushwrapper(L, m);
return 1;
}
static int lupb_symtab_lookupenum(lua_State *L) {
const upb_symtab *s = lupb_symtab_check(L, 1);
const upb_enumdef *e = upb_symtab_lookupenum(s, luaL_checkstring(L, 2));
lupb_enumdef_pushwrapper(L, e);
return 1;
}
static const struct luaL_Reg lupb_symtab_m[] = {
{"add", lupb_symtab_add},
{"lookup_msg", lupb_symtab_lookupmsg},
{"lookup_enum", lupb_symtab_lookupenum},
{NULL, NULL}
};
static const struct luaL_Reg lupb_symtab_mm[] = {
{"__gc", lupb_symtab_gc},
{NULL, NULL}
};
/* lupb toplevel **************************************************************/
static void lupb_setfieldi(lua_State *L, const char *field, int i) {
lua_pushinteger(L, i);
lua_setfield(L, -2, field);
}
static const struct luaL_Reg lupbdef_toplevel_m[] = {
{"SymbolTable", lupb_symtab_new},
{NULL, NULL}
};
void lupb_def_registertypes(lua_State *L) {
lupb_setfuncs(L, lupbdef_toplevel_m);
/* Refcounted types. */
lupb_register_type(L, LUPB_ENUMDEF, lupb_enumdef_m, lupb_enumdef_mm);
lupb_register_type(L, LUPB_FIELDDEF, lupb_fielddef_m, NULL);
lupb_register_type(L, LUPB_FILEDEF, lupb_filedef_m, NULL);
lupb_register_type(L, LUPB_MSGDEF, lupb_msgdef_m, lupb_msgdef_mm);
lupb_register_type(L, LUPB_ONEOFDEF, lupb_oneofdef_m, lupb_oneofdef_mm);
lupb_register_type(L, LUPB_SYMTAB, lupb_symtab_m, lupb_symtab_mm);
/* Create our object cache. */
lua_newtable(L);
lua_createtable(L, 0, 1); /* Cache metatable. */
lua_pushstring(L, "v"); /* Values are weak. */
lua_setfield(L, -2, "__mode");
lua_setmetatable(L, -2);
lua_setfield(L, LUA_REGISTRYINDEX, LUPB_OBJCACHE);
/* Register constants. */
lupb_setfieldi(L, "LABEL_OPTIONAL", UPB_LABEL_OPTIONAL);
lupb_setfieldi(L, "LABEL_REQUIRED", UPB_LABEL_REQUIRED);
lupb_setfieldi(L, "LABEL_REPEATED", UPB_LABEL_REPEATED);
lupb_setfieldi(L, "TYPE_DOUBLE", UPB_TYPE_DOUBLE);
lupb_setfieldi(L, "TYPE_FLOAT", UPB_TYPE_FLOAT);
lupb_setfieldi(L, "TYPE_INT64", UPB_TYPE_INT64);
lupb_setfieldi(L, "TYPE_UINT64", UPB_TYPE_UINT64);
lupb_setfieldi(L, "TYPE_INT32", UPB_TYPE_INT32);
lupb_setfieldi(L, "TYPE_BOOL", UPB_TYPE_BOOL);
lupb_setfieldi(L, "TYPE_STRING", UPB_TYPE_STRING);
lupb_setfieldi(L, "TYPE_MESSAGE", UPB_TYPE_MESSAGE);
lupb_setfieldi(L, "TYPE_BYTES", UPB_TYPE_BYTES);
lupb_setfieldi(L, "TYPE_UINT32", UPB_TYPE_UINT32);
lupb_setfieldi(L, "TYPE_ENUM", UPB_TYPE_ENUM);
lupb_setfieldi(L, "DESCRIPTOR_TYPE_DOUBLE", UPB_DESCRIPTOR_TYPE_DOUBLE);
lupb_setfieldi(L, "DESCRIPTOR_TYPE_FLOAT", UPB_DESCRIPTOR_TYPE_FLOAT);
lupb_setfieldi(L, "DESCRIPTOR_TYPE_INT64", UPB_DESCRIPTOR_TYPE_INT64);
lupb_setfieldi(L, "DESCRIPTOR_TYPE_UINT64", UPB_DESCRIPTOR_TYPE_UINT64);
lupb_setfieldi(L, "DESCRIPTOR_TYPE_INT32", UPB_DESCRIPTOR_TYPE_INT32);
lupb_setfieldi(L, "DESCRIPTOR_TYPE_FIXED64", UPB_DESCRIPTOR_TYPE_FIXED64);
lupb_setfieldi(L, "DESCRIPTOR_TYPE_FIXED32", UPB_DESCRIPTOR_TYPE_FIXED32);
lupb_setfieldi(L, "DESCRIPTOR_TYPE_BOOL", UPB_DESCRIPTOR_TYPE_BOOL);
lupb_setfieldi(L, "DESCRIPTOR_TYPE_STRING", UPB_DESCRIPTOR_TYPE_STRING);
lupb_setfieldi(L, "DESCRIPTOR_TYPE_GROUP", UPB_DESCRIPTOR_TYPE_GROUP);
lupb_setfieldi(L, "DESCRIPTOR_TYPE_MESSAGE", UPB_DESCRIPTOR_TYPE_MESSAGE);
lupb_setfieldi(L, "DESCRIPTOR_TYPE_BYTES", UPB_DESCRIPTOR_TYPE_BYTES);
lupb_setfieldi(L, "DESCRIPTOR_TYPE_UINT32", UPB_DESCRIPTOR_TYPE_UINT32);
lupb_setfieldi(L, "DESCRIPTOR_TYPE_ENUM", UPB_DESCRIPTOR_TYPE_ENUM);
lupb_setfieldi(L, "DESCRIPTOR_TYPE_SFIXED32", UPB_DESCRIPTOR_TYPE_SFIXED32);
lupb_setfieldi(L, "DESCRIPTOR_TYPE_SFIXED64", UPB_DESCRIPTOR_TYPE_SFIXED64);
lupb_setfieldi(L, "DESCRIPTOR_TYPE_SINT32", UPB_DESCRIPTOR_TYPE_SINT32);
lupb_setfieldi(L, "DESCRIPTOR_TYPE_SINT64", UPB_DESCRIPTOR_TYPE_SINT64);
lupb_setfieldi(L, "HANDLER_INT32", UPB_HANDLER_INT32);
lupb_setfieldi(L, "HANDLER_INT64", UPB_HANDLER_INT64);
lupb_setfieldi(L, "HANDLER_UINT32", UPB_HANDLER_UINT32);
lupb_setfieldi(L, "HANDLER_UINT64", UPB_HANDLER_UINT64);
lupb_setfieldi(L, "HANDLER_FLOAT", UPB_HANDLER_FLOAT);
lupb_setfieldi(L, "HANDLER_DOUBLE", UPB_HANDLER_DOUBLE);
lupb_setfieldi(L, "HANDLER_BOOL", UPB_HANDLER_BOOL);
lupb_setfieldi(L, "HANDLER_STARTSTR", UPB_HANDLER_STARTSTR);
lupb_setfieldi(L, "HANDLER_STRING", UPB_HANDLER_STRING);
lupb_setfieldi(L, "HANDLER_ENDSTR", UPB_HANDLER_ENDSTR);
lupb_setfieldi(L, "HANDLER_STARTSUBMSG", UPB_HANDLER_STARTSUBMSG);
lupb_setfieldi(L, "HANDLER_ENDSUBMSG", UPB_HANDLER_ENDSUBMSG);
lupb_setfieldi(L, "HANDLER_STARTSEQ", UPB_HANDLER_STARTSEQ);
lupb_setfieldi(L, "HANDLER_ENDSEQ", UPB_HANDLER_ENDSEQ);
lupb_setfieldi(L, "SYNTAX_PROTO2", UPB_SYNTAX_PROTO2);
lupb_setfieldi(L, "SYNTAX_PROTO3", UPB_SYNTAX_PROTO3);
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,245 @@
/*
** require("lua") -- A Lua extension for upb.
**
** Exposes only the core library
** (sub-libraries are exposed in other extensions).
**
** 64-bit woes: Lua can only represent numbers of type lua_Number (which is
** double unless the user specifically overrides this). Doubles can represent
** the entire range of 64-bit integers, but lose precision once the integers are
** greater than 2^53.
**
** Lua 5.3 is adding support for integers, which will allow for 64-bit
** integers (which can be interpreted as signed or unsigned).
**
** LuaJIT supports 64-bit signed and unsigned boxed representations
** through its "cdata" mechanism, but this is not portable to regular Lua.
**
** Hopefully Lua 5.3 will come soon enough that we can either use Lua 5.3
** integer support or LuaJIT 64-bit cdata for users that need the entire
** domain of [u]int64 values.
*/
#include <float.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include "lauxlib.h"
#include "upb/bindings/lua/upb.h"
#include "upb/handlers.h"
#include "upb/msg.h"
/* Lua compatibility code *****************************************************/
/* Lua 5.1 and Lua 5.2 have slightly incompatible APIs. A little bit of
* compatibility code can help hide the difference. Not too many people still
* use Lua 5.1 but LuaJIT uses the Lua 5.1 API in some ways. */
#if LUA_VERSION_NUM == 501
/* taken from lua 5.2's source. */
void *luaL_testudata(lua_State *L, int ud, const char *tname) {
void *p = lua_touserdata(L, ud);
if (p != NULL) { /* value is a userdata? */
if (lua_getmetatable(L, ud)) { /* does it have a metatable? */
luaL_getmetatable(L, tname); /* get correct metatable */
if (!lua_rawequal(L, -1, -2)) /* not the same? */
p = NULL; /* value is a userdata with wrong metatable */
lua_pop(L, 2); /* remove both metatables */
return p;
}
}
return NULL; /* value is not a userdata with a metatable */
}
static void lupb_newlib(lua_State *L, const char *name, const luaL_Reg *funcs) {
luaL_register(L, name, funcs);
}
#elif LUA_VERSION_NUM == 502
int luaL_typerror(lua_State *L, int narg, const char *tname) {
const char *msg = lua_pushfstring(L, "%s expected, got %s",
tname, luaL_typename(L, narg));
return luaL_argerror(L, narg, msg);
}
static void lupb_newlib(lua_State *L, const char *name, const luaL_Reg *funcs) {
/* Lua 5.2 modules are not expected to set a global variable, so "name" is
* unused. */
UPB_UNUSED(name);
/* Can't use luaL_newlib(), because funcs is not the actual array.
* Could (micro-)optimize this a bit to count funcs for initial table size. */
lua_createtable(L, 0, 8);
luaL_setfuncs(L, funcs, 0);
}
#else
#error Only Lua 5.1 and 5.2 are supported
#endif
/* Shims for upcoming Lua 5.3 functionality. */
bool lua_isinteger(lua_State *L, int argn) {
UPB_UNUSED(L);
UPB_UNUSED(argn);
return false;
}
/* Utility functions **********************************************************/
/* We store our module table in the registry, keyed by ptr.
* For more info about the motivation/rationale, see this thread:
* http://thread.gmane.org/gmane.comp.lang.lua.general/110632 */
bool lupb_openlib(lua_State *L, void *ptr, const char *name,
const luaL_Reg *funcs) {
/* Lookup cached module table. */
lua_pushlightuserdata(L, ptr);
lua_rawget(L, LUA_REGISTRYINDEX);
if (!lua_isnil(L, -1)) {
return true;
}
lupb_newlib(L, name, funcs);
/* Save module table in cache. */
lua_pushlightuserdata(L, ptr);
lua_pushvalue(L, -2);
lua_rawset(L, LUA_REGISTRYINDEX);
return false;
}
void lupb_checkstatus(lua_State *L, upb_status *s) {
if (!upb_ok(s)) {
lua_pushstring(L, upb_status_errmsg(s));
lua_error(L);
}
}
/* Scalar type mapping ********************************************************/
/* Functions that convert scalar/primitive values (numbers, strings, bool)
* between Lua and C/upb. Handles type/range checking. */
bool lupb_checkbool(lua_State *L, int narg) {
if (!lua_isboolean(L, narg)) {
luaL_error(L, "must be true or false");
}
return lua_toboolean(L, narg);
}
/* Unlike luaL_checkstring(), this does not allow implicit conversion to
* string. */
const char *lupb_checkstring(lua_State *L, int narg, size_t *len) {
if (lua_type(L, narg) != LUA_TSTRING) {
luaL_error(L, "Expected string");
}
return lua_tolstring(L, narg, len);
}
/* Unlike luaL_checkinteger, these do not implicitly convert from string or
* round an existing double value. We allow floating-point input, but only if
* the actual value is integral. */
#define INTCHECK(type, ctype) \
ctype lupb_check##type(lua_State *L, int narg) { \
double n; \
ctype i; \
if (lua_isinteger(L, narg)) { \
return lua_tointeger(L, narg); \
} \
\
/* Prevent implicit conversion from string. */ \
luaL_checktype(L, narg, LUA_TNUMBER); \
n = lua_tonumber(L, narg); \
\
i = (ctype)n; \
if ((double)i != n) { \
/* double -> ctype truncated or rounded. */ \
luaL_error(L, "number %f was not an integer or out of range for " #type, \
n); \
} \
return i; \
} \
void lupb_push##type(lua_State *L, ctype val) { \
/* TODO: push integer for Lua >= 5.3, 64-bit cdata for LuaJIT. */ \
/* This is lossy for some [u]int64 values, which isn't great, but */ \
/* crashing when we encounter these values seems worse. */ \
lua_pushnumber(L, val); \
}
INTCHECK(int64, int64_t)
INTCHECK(int32, int32_t)
INTCHECK(uint64, uint64_t)
INTCHECK(uint32, uint32_t)
double lupb_checkdouble(lua_State *L, int narg) {
/* If we were being really hard-nosed here, we'd check whether the input was
* an integer that has no precise double representation. But doubles aren't
* generally expected to be exact like integers are, and worse this could
* cause data-dependent runtime errors: one run of the program could work fine
* because the integer calculations happened to be exactly representable in
* double, while the next could crash because of subtly different input. */
luaL_checktype(L, narg, LUA_TNUMBER); /* lua_tonumber() auto-converts. */
return lua_tonumber(L, narg);
}
float lupb_checkfloat(lua_State *L, int narg) {
/* We don't worry about checking whether the input can be exactly converted to
* float -- see above. */
luaL_checktype(L, narg, LUA_TNUMBER); /* lua_tonumber() auto-converts. */
return lua_tonumber(L, narg);
}
void lupb_pushdouble(lua_State *L, double d) {
lua_pushnumber(L, d);
}
void lupb_pushfloat(lua_State *L, float d) {
lua_pushnumber(L, d);
}
static const struct luaL_Reg lupb_toplevel_m[] = {
{NULL, NULL}
};
void lupb_register_type(lua_State *L, const char *name, const luaL_Reg *m,
const luaL_Reg *mm) {
luaL_newmetatable(L, name);
if (mm) {
lupb_setfuncs(L, mm);
}
if (m) {
/* Methods go in the mt's __index method. This implies that you can'
* implement __index and also have methods. */
lua_getfield(L, -1, "__index");
lupb_assert(L, lua_isnil(L, -1));
lua_pop(L, 1);
lua_createtable(L, 0, 0);
lupb_setfuncs(L, m);
lua_setfield(L, -2, "__index");
}
lua_pop(L, 1); /* The mt. */
}
int luaopen_upb_c(lua_State *L) {
static char module_key;
if (lupb_openlib(L, &module_key, "upb_c", lupb_toplevel_m)) {
return 1;
}
lupb_def_registertypes(L);
lupb_msg_registertypes(L);
return 1; /* Return package table. */
}

@ -0,0 +1,127 @@
/*
** Shared definitions for upb Lua modules.
*/
#ifndef UPB_LUA_UPB_H_
#define UPB_LUA_UPB_H_
#include "lauxlib.h"
#include "upb/def.h"
#include "upb/handlers.h"
#include "upb/msg.h"
#include "upb/msgfactory.h"
/* Lua 5.1/5.2 compatibility code. */
#if LUA_VERSION_NUM == 501
#define lua_rawlen lua_objlen
/* Lua >= 5.2's getuservalue/setuservalue functions do not exist in prior
* versions but the older function lua_getfenv() can provide 100% of its
* capabilities (the reverse is not true). */
#define lua_getuservalue(L, index) lua_getfenv(L, index)
#define lua_setuservalue(L, index) lua_setfenv(L, index)
void *luaL_testudata(lua_State *L, int ud, const char *tname);
#define lupb_setfuncs(L, l) luaL_register(L, NULL, l)
#elif LUA_VERSION_NUM == 502
int luaL_typerror(lua_State *L, int narg, const char *tname);
#define lupb_setfuncs(L, l) luaL_setfuncs(L, l, 0)
#else
#error Only Lua 5.1 and 5.2 are supported
#endif
#define lupb_assert(L, predicate) \
if (!(predicate)) \
luaL_error(L, "internal error: %s, %s:%d ", #predicate, __FILE__, __LINE__);
/* Function for initializing the core library. This function is idempotent,
* and should be called at least once before calling any of the functions that
* construct core upb types. */
int luaopen_upb(lua_State *L);
/* Gets or creates a package table for a C module that is uniquely identified by
* "ptr". The easiest way to supply a unique "ptr" is to pass the address of a
* static variable private in the module's .c file.
*
* If this module has already been registered in this lua_State, pushes it and
* returns true.
*
* Otherwise, creates a new module table for this module with the given name,
* pushes it, and registers the given top-level functions in it. It also sets
* it as a global variable, but only if the current version of Lua expects that
* (ie Lua 5.1/LuaJIT).
*
* If "false" is returned, the caller is guaranteed that this lib has not been
* registered in this Lua state before (regardless of any funny business the
* user might have done to the global state), so the caller can safely perform
* one-time initialization. */
bool lupb_openlib(lua_State *L, void *ptr, const char *name,
const luaL_Reg *funcs);
/* Custom check/push functions. Unlike the Lua equivalents, they are pinned to
* specific types (instead of lua_Number, etc), and do not allow any implicit
* conversion or data loss. */
int64_t lupb_checkint64(lua_State *L, int narg);
int32_t lupb_checkint32(lua_State *L, int narg);
uint64_t lupb_checkuint64(lua_State *L, int narg);
uint32_t lupb_checkuint32(lua_State *L, int narg);
double lupb_checkdouble(lua_State *L, int narg);
float lupb_checkfloat(lua_State *L, int narg);
bool lupb_checkbool(lua_State *L, int narg);
const char *lupb_checkstring(lua_State *L, int narg, size_t *len);
const char *lupb_checkname(lua_State *L, int narg);
void lupb_pushint64(lua_State *L, int64_t val);
void lupb_pushint32(lua_State *L, int32_t val);
void lupb_pushuint64(lua_State *L, uint64_t val);
void lupb_pushuint32(lua_State *L, uint32_t val);
void lupb_pushdouble(lua_State *L, double val);
void lupb_pushfloat(lua_State *L, float val);
/* Registers a type with the given name, methods, and metamethods. */
void lupb_register_type(lua_State *L, const char *name, const luaL_Reg *m,
const luaL_Reg *mm);
/* Checks the given upb_status and throws a Lua error if it is not ok. */
void lupb_checkstatus(lua_State *L, upb_status *s);
/** From def.c. ***************************************************************/
upb_fieldtype_t lupb_checkfieldtype(lua_State *L, int narg);
const upb_msgdef *lupb_msgdef_check(lua_State *L, int narg);
const upb_enumdef *lupb_enumdef_check(lua_State *L, int narg);
const upb_fielddef *lupb_fielddef_check(lua_State *L, int narg);
upb_symtab *lupb_symtab_check(lua_State *L, int narg);
void lupb_def_registertypes(lua_State *L);
/** From msg.c. ***************************************************************/
struct lupb_msgclass;
typedef struct lupb_msgclass lupb_msgclass;
upb_arena *lupb_arena_check(lua_State *L, int narg);
int lupb_arena_new(lua_State *L);
upb_arena *lupb_arena_get(lua_State *L);
int lupb_msg_pushref(lua_State *L, int msgclass, void *msg);
const upb_msg *lupb_msg_checkmsg(lua_State *L, int narg,
const lupb_msgclass *lmsgclass);
upb_msg *lupb_msg_checkmsg2(lua_State *L, int narg,
const upb_msglayout **layout);
const lupb_msgclass *lupb_msgclass_check(lua_State *L, int narg);
const upb_msglayout *lupb_msgclass_getlayout(lua_State *L, int narg);
const upb_msgdef *lupb_msgclass_getmsgdef(const lupb_msgclass *lmsgclass);
upb_msgfactory *lupb_msgclass_getfactory(const lupb_msgclass *lmsgclass);
void lupb_msg_registertypes(lua_State *L);
#endif /* UPB_LUA_UPB_H_ */

@ -0,0 +1,172 @@
-- Before calling require on "upb_c", we need to load the same library
-- as RTLD_GLOBAL, for the benefit of other C extensions that depend on
-- C functions in the core.
--
-- This has to happen *before* the require call, because if the module
-- is loaded RTLD_LOCAL first, a subsequent load as RTLD_GLOBAL won't
-- have the proper effect, at least on some platforms.
local so = package.searchpath and package.searchpath("upb_c", package.cpath)
if so then
package.loadlib(so, "*")
end
local upb = require("upb_c")
-- A convenience function for building/linking/freezing defs
-- while maintaining their original order.
--
-- Sample usage:
-- local m1, m2 = upb.build_defs{
-- upb.MessageDef{full_name = "M1", fields = {
-- upb.FieldDef{
-- name = "m2",
-- number = 1,
-- type = upb.TYPE_MESSAGE,
-- subdef_name = ".M2"
-- },
-- }
-- },
-- upb.MessageDef{full_name = "M2"}
-- }
upb.build_defs = function(defs)
upb.SymbolTable(defs)
-- Lua 5.2 puts unpack in the table library.
return (unpack or table.unpack)(defs)
end
local ipairs_iter = function(array, last_index)
local next_index = last_index + 1
if next_index > #array then
return nil
end
return next_index, array[next_index]
end
-- For iterating over the indexes and values of a upb.Array.
--
-- for i, val in upb.ipairs(array) do
-- -- ...
-- end
upb.ipairs = function(array)
return ipairs_iter, array, 0
end
local set_named = function(obj, init)
for k, v in pairs(init) do
local func = obj["set_" .. k]
if not func then
error("Cannot set member: " .. k)
end
func(obj, v)
end
end
-- Capture references to the functions we're wrapping.
local RealFieldDef = upb.FieldDef
local RealEnumDef = upb.EnumDef
local RealMessageDef = upb.MessageDef
local RealOneofDef = upb.OneofDef
local RealSymbolTable = upb.SymbolTable
-- FieldDef constructor; a wrapper around the real constructor that can
-- set initial properties.
--
-- User can specify initialization values like so:
-- upb.FieldDef{label=upb.LABEL_REQUIRED, name="my_field", number=5,
-- type=upb.TYPE_INT32, default_value=12, type_name="Foo"}
upb.FieldDef = function(init)
local f = RealFieldDef()
if init then
-- Other members are often dependent on type, so set that first.
if init.type then
f:set_type(init.type)
init.type = nil
end
set_named(f, init)
end
return f
end
-- MessageDef constructor; a wrapper around the real constructor that can
-- set initial properties.
--
-- User can specify initialization values like so:
-- upb.MessageDef{full_name="MyMessage", extstart=8000, fields={...}}
upb.MessageDef = function(init)
local m = RealMessageDef()
if init then
for _, f in pairs(init.fields or {}) do
m:add(f)
end
init.fields = nil
set_named(m, init)
end
return m
end
-- EnumDef constructor; a wrapper around the real constructor that can
-- set initial properties.
--
-- User can specify initialization values like so:
-- upb.EnumDef{full_name="MyEnum",
-- values={
-- {"FOO_VALUE_1", 1},
-- {"FOO_VALUE_2", 2}
-- }
-- }
upb.EnumDef = function(init)
local e = RealEnumDef()
if init then
for _, val in pairs(init.values or {}) do
e:add(val[1], val[2])
end
init.values = nil
set_named(e, init)
end
return e
end
-- OneofDef constructor; a wrapper around the real constructor that can
-- set initial properties.
--
-- User can specify initialization values like so:
-- upb.OneofDef{name="foo", fields={...}}
upb.OneofDef = function(init)
local o = RealOneofDef()
if init then
for _, val in pairs(init.fields or {}) do
o:add(val)
end
init.fields = nil
set_named(o, init)
end
return o
end
-- SymbolTable constructor; a wrapper around the real constructor that can
-- add an initial set of defs.
upb.SymbolTable = function(defs)
local s = RealSymbolTable()
if defs then
s:add(defs)
end
return s
end
return upb

@ -0,0 +1,56 @@
/*
** require("upb.pb") -- A Lua extension for upb.pb.
**
** Exposes all the types defined in upb/pb/{*}.h
** Also defines a few convenience functions on top.
*/
#include "upb/bindings/lua/upb.h"
#include "upb/decode.h"
#include "upb/encode.h"
#define LUPB_PBDECODERMETHOD "lupb.pb.decodermethod"
static int lupb_pb_decode(lua_State *L) {
size_t len;
const upb_msglayout *layout;
upb_msg *msg = lupb_msg_checkmsg2(L, 1, &layout);
const char *pb = lua_tolstring(L, 2, &len);
upb_decode(pb, len, msg, layout, lupb_arena_get(L));
/* TODO(haberman): check for error. */
return 0;
}
static int lupb_pb_encode(lua_State *L) {
const upb_msglayout *layout;
const upb_msg *msg = lupb_msg_checkmsg2(L, 1, &layout);
upb_arena *arena = upb_arena_new();
size_t size;
char *result;
result = upb_encode(msg, (const void*)layout, arena, &size);
/* Free resources before we potentially bail on error. */
lua_pushlstring(L, result, size);
upb_arena_free(arena);
/* TODO(haberman): check for error. */
return 1;
}
static const struct luaL_Reg toplevel_m[] = {
{"decode", lupb_pb_decode},
{"encode", lupb_pb_encode},
{NULL, NULL}
};
int luaopen_upb_pb_c(lua_State *L) {
static char module_key;
if (lupb_openlib(L, &module_key, "upb.pb_c", toplevel_m)) {
return 1;
}
return 1;
}

@ -0,0 +1,3 @@
require "upb"
return require "upb.pb_c"

@ -0,0 +1,69 @@
#ifndef UPB_STDCPP_H_
#define UPB_STDCPP_H_
#include "upb/sink.h"
#include "upb/port_def.inc"
namespace upb {
template <class T>
class FillStringHandler {
public:
static void SetHandler(upb_byteshandler* handler) {
upb_byteshandler_setstartstr(handler, &FillStringHandler::StartString,
NULL);
upb_byteshandler_setstring(handler, &FillStringHandler::StringBuf, NULL);
}
private:
// TODO(haberman): add UpbBind/UpbMakeHandler support to BytesHandler so these
// can be prettier callbacks.
static void* StartString(void *c, const void *hd, size_t size) {
UPB_UNUSED(hd);
UPB_UNUSED(size);
T* str = static_cast<T*>(c);
str->clear();
return c;
}
static size_t StringBuf(void* c, const void* hd, const char* buf, size_t n,
const upb_bufhandle* h) {
UPB_UNUSED(hd);
UPB_UNUSED(h);
T* str = static_cast<T*>(c);
try {
str->append(buf, n);
return n;
} catch (const std::exception&) {
return 0;
}
}
};
class StringSink {
public:
template <class T>
explicit StringSink(T* target) {
// TODO(haberman): we need to avoid rebuilding a new handler every time,
// but with class globals disallowed for google3 C++ this is tricky.
upb_byteshandler_init(&handler_);
FillStringHandler<T>::SetHandler(&handler_);
input_.Reset(&handler_, target);
}
BytesSink input() { return input_; }
private:
upb_byteshandler handler_;
BytesSink input_;
};
} // namespace upb
#include "upb/port_undef.inc"
#endif // UPB_STDCPP_H_

@ -0,0 +1,604 @@
#include <string.h>
#include "upb/upb.h"
#include "upb/decode.h"
#include "upb/port_def.inc"
/* Maps descriptor type -> upb field type. */
const uint8_t upb_desctype_to_fieldtype[] = {
UPB_WIRE_TYPE_END_GROUP, /* ENDGROUP */
UPB_TYPE_DOUBLE, /* DOUBLE */
UPB_TYPE_FLOAT, /* FLOAT */
UPB_TYPE_INT64, /* INT64 */
UPB_TYPE_UINT64, /* UINT64 */
UPB_TYPE_INT32, /* INT32 */
UPB_TYPE_UINT64, /* FIXED64 */
UPB_TYPE_UINT32, /* FIXED32 */
UPB_TYPE_BOOL, /* BOOL */
UPB_TYPE_STRING, /* STRING */
UPB_TYPE_MESSAGE, /* GROUP */
UPB_TYPE_MESSAGE, /* MESSAGE */
UPB_TYPE_BYTES, /* BYTES */
UPB_TYPE_UINT32, /* UINT32 */
UPB_TYPE_ENUM, /* ENUM */
UPB_TYPE_INT32, /* SFIXED32 */
UPB_TYPE_INT64, /* SFIXED64 */
UPB_TYPE_INT32, /* SINT32 */
UPB_TYPE_INT64, /* SINT64 */
};
/* Data pertaining to the parse. */
typedef struct {
const char *ptr; /* Current parsing position. */
const char *field_start; /* Start of this field. */
const char *limit; /* End of delimited region or end of buffer. */
upb_arena *arena;
int depth;
uint32_t end_group; /* Set to field number of END_GROUP tag, if any. */
} upb_decstate;
/* Data passed by value to each parsing function. */
typedef struct {
char *msg;
const upb_msglayout *layout;
upb_decstate *state;
} upb_decframe;
#define CHK(x) if (!(x)) { return 0; }
static bool upb_skip_unknowngroup(upb_decstate *d, int field_number);
static bool upb_decode_message(upb_decstate *d, char *msg,
const upb_msglayout *l);
static bool upb_decode_varint(const char **ptr, const char *limit,
uint64_t *val) {
uint8_t byte;
int bitpos = 0;
const char *p = *ptr;
*val = 0;
do {
CHK(bitpos < 70 && p < limit);
byte = *p;
*val |= (uint64_t)(byte & 0x7F) << bitpos;
p++;
bitpos += 7;
} while (byte & 0x80);
*ptr = p;
return true;
}
static bool upb_decode_varint32(const char **ptr, const char *limit,
uint32_t *val) {
uint64_t u64;
CHK(upb_decode_varint(ptr, limit, &u64) && u64 <= UINT32_MAX);
*val = (uint32_t)u64;
return true;
}
static bool upb_decode_64bit(const char **ptr, const char *limit,
uint64_t *val) {
CHK(limit - *ptr >= 8);
memcpy(val, *ptr, 8);
*ptr += 8;
return true;
}
static bool upb_decode_32bit(const char **ptr, const char *limit,
uint32_t *val) {
CHK(limit - *ptr >= 4);
memcpy(val, *ptr, 4);
*ptr += 4;
return true;
}
static int32_t upb_zzdecode_32(uint32_t n) {
return (n >> 1) ^ -(int32_t)(n & 1);
}
static int64_t upb_zzdecode_64(uint64_t n) {
return (n >> 1) ^ -(int64_t)(n & 1);
}
static bool upb_decode_string(const char **ptr, const char *limit,
int *outlen) {
uint32_t len;
CHK(upb_decode_varint32(ptr, limit, &len) &&
len < INT32_MAX &&
limit - *ptr >= (int32_t)len);
*outlen = len;
return true;
}
static void upb_set32(void *msg, size_t ofs, uint32_t val) {
memcpy((char*)msg + ofs, &val, sizeof(val));
}
static bool upb_append_unknown(upb_decstate *d, upb_decframe *frame) {
upb_msg_addunknown(frame->msg, d->field_start, d->ptr - d->field_start,
d->arena);
return true;
}
static bool upb_skip_unknownfielddata(upb_decstate *d, uint32_t tag,
uint32_t group_fieldnum) {
switch (tag & 7) {
case UPB_WIRE_TYPE_VARINT: {
uint64_t val;
return upb_decode_varint(&d->ptr, d->limit, &val);
}
case UPB_WIRE_TYPE_32BIT: {
uint32_t val;
return upb_decode_32bit(&d->ptr, d->limit, &val);
}
case UPB_WIRE_TYPE_64BIT: {
uint64_t val;
return upb_decode_64bit(&d->ptr, d->limit, &val);
}
case UPB_WIRE_TYPE_DELIMITED: {
int len;
CHK(upb_decode_string(&d->ptr, d->limit, &len));
d->ptr += len;
return true;
}
case UPB_WIRE_TYPE_START_GROUP:
return upb_skip_unknowngroup(d, tag >> 3);
case UPB_WIRE_TYPE_END_GROUP:
return (tag >> 3) == group_fieldnum;
}
return false;
}
static bool upb_skip_unknowngroup(upb_decstate *d, int field_number) {
while (d->ptr < d->limit && d->end_group == 0) {
uint32_t tag = 0;
CHK(upb_decode_varint32(&d->ptr, d->limit, &tag));
CHK(upb_skip_unknownfielddata(d, tag, field_number));
}
CHK(d->end_group == field_number);
d->end_group = 0;
return true;
}
static bool upb_array_grow(upb_array *arr, size_t elements, size_t elem_size,
upb_arena *arena) {
size_t needed = arr->len + elements;
size_t new_size = UPB_MAX(arr->size, 8);
size_t new_bytes;
size_t old_bytes;
void *new_data;
upb_alloc *alloc = upb_arena_alloc(arena);
while (new_size < needed) {
new_size *= 2;
}
old_bytes = arr->len * elem_size;
new_bytes = new_size * elem_size;
new_data = upb_realloc(alloc, arr->data, old_bytes, new_bytes);
CHK(new_data);
arr->data = new_data;
arr->size = new_size;
return true;
}
static void *upb_array_reserve(upb_array *arr, size_t elements,
size_t elem_size, upb_arena *arena) {
if (arr->size - arr->len < elements) {
CHK(upb_array_grow(arr, elements, elem_size, arena));
}
return (char*)arr->data + (arr->len * elem_size);
}
bool upb_array_add(upb_array *arr, size_t elements, size_t elem_size,
const void *data, upb_arena *arena) {
void *dest = upb_array_reserve(arr, elements, elem_size, arena);
CHK(dest);
arr->len += elements;
memcpy(dest, data, elements * elem_size);
return true;
}
static upb_array *upb_getarr(upb_decframe *frame,
const upb_msglayout_field *field) {
UPB_ASSERT(field->label == UPB_LABEL_REPEATED);
return *(upb_array**)&frame->msg[field->offset];
}
static upb_array *upb_getorcreatearr(upb_decframe *frame,
const upb_msglayout_field *field) {
upb_array *arr = upb_getarr(frame, field);
if (!arr) {
arr = upb_array_new(frame->state->arena);
CHK(arr);
*(upb_array**)&frame->msg[field->offset] = arr;
}
return arr;
}
static upb_msg *upb_getorcreatemsg(upb_decframe *frame,
const upb_msglayout_field *field,
const upb_msglayout **subm) {
upb_msg **submsg = (void*)(frame->msg + field->offset);
*subm = frame->layout->submsgs[field->submsg_index];
UPB_ASSERT(field->label != UPB_LABEL_REPEATED);
if (!*submsg) {
*submsg = upb_msg_new(*subm, frame->state->arena);
CHK(*submsg);
}
return *submsg;
}
static upb_msg *upb_addmsg(upb_decframe *frame,
const upb_msglayout_field *field,
const upb_msglayout **subm) {
upb_msg *submsg;
upb_array *arr = upb_getorcreatearr(frame, field);
*subm = frame->layout->submsgs[field->submsg_index];
submsg = upb_msg_new(*subm, frame->state->arena);
CHK(submsg);
upb_array_add(arr, 1, sizeof(submsg), &submsg, frame->state->arena);
return submsg;
}
static void upb_sethasbit(upb_decframe *frame,
const upb_msglayout_field *field) {
int32_t hasbit = field->presence;
UPB_ASSERT(field->presence > 0);
frame->msg[hasbit / 8] |= (1 << (hasbit % 8));
}
static void upb_setoneofcase(upb_decframe *frame,
const upb_msglayout_field *field) {
UPB_ASSERT(field->presence < 0);
upb_set32(frame->msg, ~field->presence, field->number);
}
static bool upb_decode_addval(upb_decframe *frame,
const upb_msglayout_field *field, void *val,
size_t size) {
char *field_mem = frame->msg + field->offset;
upb_array *arr;
if (field->label == UPB_LABEL_REPEATED) {
arr = upb_getorcreatearr(frame, field);
CHK(arr);
field_mem = upb_array_reserve(arr, 1, size, frame->state->arena);
CHK(field_mem);
}
memcpy(field_mem, val, size);
return true;
}
static void upb_decode_setpresent(upb_decframe *frame,
const upb_msglayout_field *field) {
if (field->label == UPB_LABEL_REPEATED) {
upb_array *arr = upb_getarr(frame, field);
UPB_ASSERT(arr->len < arr->size);
arr->len++;
} else if (field->presence < 0) {
upb_setoneofcase(frame, field);
} else if (field->presence > 0) {
upb_sethasbit(frame, field);
}
}
static bool upb_decode_msgfield(upb_decstate *d, upb_msg *msg,
const upb_msglayout *layout, int limit) {
const char* saved_limit = d->limit;
d->limit = d->ptr + limit;
CHK(--d->depth >= 0);
upb_decode_message(d, msg, layout);
d->depth++;
d->limit = saved_limit;
CHK(d->end_group == 0);
return true;
}
static bool upb_decode_groupfield(upb_decstate *d, upb_msg *msg,
const upb_msglayout *layout,
int field_number) {
CHK(--d->depth >= 0);
upb_decode_message(d, msg, layout);
d->depth++;
CHK(d->end_group == field_number);
d->end_group = 0;
return true;
}
static bool upb_decode_varintfield(upb_decstate *d, upb_decframe *frame,
const upb_msglayout_field *field) {
uint64_t val;
CHK(upb_decode_varint(&d->ptr, d->limit, &val));
switch (field->descriptortype) {
case UPB_DESCRIPTOR_TYPE_INT64:
case UPB_DESCRIPTOR_TYPE_UINT64:
CHK(upb_decode_addval(frame, field, &val, sizeof(val)));
break;
case UPB_DESCRIPTOR_TYPE_INT32:
case UPB_DESCRIPTOR_TYPE_UINT32:
case UPB_DESCRIPTOR_TYPE_ENUM: {
uint32_t val32 = (uint32_t)val;
CHK(upb_decode_addval(frame, field, &val32, sizeof(val32)));
break;
}
case UPB_DESCRIPTOR_TYPE_BOOL: {
bool valbool = val != 0;
CHK(upb_decode_addval(frame, field, &valbool, sizeof(valbool)));
break;
}
case UPB_DESCRIPTOR_TYPE_SINT32: {
int32_t decoded = upb_zzdecode_32((uint32_t)val);
CHK(upb_decode_addval(frame, field, &decoded, sizeof(decoded)));
break;
}
case UPB_DESCRIPTOR_TYPE_SINT64: {
int64_t decoded = upb_zzdecode_64(val);
CHK(upb_decode_addval(frame, field, &decoded, sizeof(decoded)));
break;
}
default:
return upb_append_unknown(d, frame);
}
upb_decode_setpresent(frame, field);
return true;
}
static bool upb_decode_64bitfield(upb_decstate *d, upb_decframe *frame,
const upb_msglayout_field *field) {
uint64_t val;
CHK(upb_decode_64bit(&d->ptr, d->limit, &val));
switch (field->descriptortype) {
case UPB_DESCRIPTOR_TYPE_DOUBLE:
case UPB_DESCRIPTOR_TYPE_FIXED64:
case UPB_DESCRIPTOR_TYPE_SFIXED64:
CHK(upb_decode_addval(frame, field, &val, sizeof(val)));
break;
default:
return upb_append_unknown(d, frame);
}
upb_decode_setpresent(frame, field);
return true;
}
static bool upb_decode_32bitfield(upb_decstate *d, upb_decframe *frame,
const upb_msglayout_field *field) {
uint32_t val;
CHK(upb_decode_32bit(&d->ptr, d->limit, &val));
switch (field->descriptortype) {
case UPB_DESCRIPTOR_TYPE_FLOAT:
case UPB_DESCRIPTOR_TYPE_FIXED32:
case UPB_DESCRIPTOR_TYPE_SFIXED32:
CHK(upb_decode_addval(frame, field, &val, sizeof(val)));
break;
default:
return upb_append_unknown(d, frame);
}
upb_decode_setpresent(frame, field);
return true;
}
static bool upb_decode_fixedpacked(upb_decstate *d, upb_array *arr,
uint32_t len, int elem_size) {
size_t elements = len / elem_size;
CHK((size_t)(elements * elem_size) == len);
CHK(upb_array_add(arr, elements, elem_size, d->ptr, d->arena));
d->ptr += len;
return true;
}
static upb_strview upb_decode_strfield(upb_decstate *d, uint32_t len) {
upb_strview ret;
ret.data = d->ptr;
ret.size = len;
d->ptr += len;
return ret;
}
static bool upb_decode_toarray(upb_decstate *d, upb_decframe *frame,
const upb_msglayout_field *field, int len) {
upb_array *arr = upb_getorcreatearr(frame, field);
CHK(arr);
#define VARINT_CASE(ctype, decode) \
VARINT_CASE_EX(ctype, decode, decode)
#define VARINT_CASE_EX(ctype, decode, dtype) \
{ \
const char *ptr = d->ptr; \
const char *limit = ptr + len; \
while (ptr < limit) { \
uint64_t val; \
ctype decoded; \
CHK(upb_decode_varint(&ptr, limit, &val)); \
decoded = (decode)((dtype)val); \
CHK(upb_array_add(arr, 1, sizeof(decoded), &decoded, d->arena)); \
} \
d->ptr = ptr; \
return true; \
}
switch (field->descriptortype) {
case UPB_DESCRIPTOR_TYPE_STRING:
case UPB_DESCRIPTOR_TYPE_BYTES: {
upb_strview str = upb_decode_strfield(d, len);
return upb_array_add(arr, 1, sizeof(str), &str, d->arena);
}
case UPB_DESCRIPTOR_TYPE_FLOAT:
case UPB_DESCRIPTOR_TYPE_FIXED32:
case UPB_DESCRIPTOR_TYPE_SFIXED32:
return upb_decode_fixedpacked(d, arr, len, sizeof(int32_t));
case UPB_DESCRIPTOR_TYPE_DOUBLE:
case UPB_DESCRIPTOR_TYPE_FIXED64:
case UPB_DESCRIPTOR_TYPE_SFIXED64:
return upb_decode_fixedpacked(d, arr, len, sizeof(int64_t));
case UPB_DESCRIPTOR_TYPE_INT32:
case UPB_DESCRIPTOR_TYPE_UINT32:
case UPB_DESCRIPTOR_TYPE_ENUM:
VARINT_CASE(uint32_t, uint32_t);
case UPB_DESCRIPTOR_TYPE_INT64:
case UPB_DESCRIPTOR_TYPE_UINT64:
VARINT_CASE(uint64_t, uint64_t);
case UPB_DESCRIPTOR_TYPE_BOOL:
VARINT_CASE(bool, bool);
case UPB_DESCRIPTOR_TYPE_SINT32:
VARINT_CASE_EX(int32_t, upb_zzdecode_32, uint32_t);
case UPB_DESCRIPTOR_TYPE_SINT64:
VARINT_CASE_EX(int64_t, upb_zzdecode_64, uint64_t);
case UPB_DESCRIPTOR_TYPE_MESSAGE: {
const upb_msglayout *subm;
upb_msg *submsg = upb_addmsg(frame, field, &subm);
CHK(submsg);
return upb_decode_msgfield(d, submsg, subm, len);
}
case UPB_DESCRIPTOR_TYPE_GROUP:
return upb_append_unknown(d, frame);
}
#undef VARINT_CASE
UPB_UNREACHABLE();
}
static bool upb_decode_delimitedfield(upb_decstate *d, upb_decframe *frame,
const upb_msglayout_field *field) {
int len;
CHK(upb_decode_string(&d->ptr, d->limit, &len));
if (field->label == UPB_LABEL_REPEATED) {
return upb_decode_toarray(d, frame, field, len);
} else {
switch (field->descriptortype) {
case UPB_DESCRIPTOR_TYPE_STRING:
case UPB_DESCRIPTOR_TYPE_BYTES: {
upb_strview str = upb_decode_strfield(d, len);
CHK(upb_decode_addval(frame, field, &str, sizeof(str)));
break;
}
case UPB_DESCRIPTOR_TYPE_MESSAGE: {
const upb_msglayout *subm;
upb_msg *submsg = upb_getorcreatemsg(frame, field, &subm);
CHK(submsg);
CHK(upb_decode_msgfield(d, submsg, subm, len));
break;
}
default:
/* TODO(haberman): should we accept the last element of a packed? */
d->ptr += len;
return upb_append_unknown(d, frame);
}
upb_decode_setpresent(frame, field);
return true;
}
}
static const upb_msglayout_field *upb_find_field(const upb_msglayout *l,
uint32_t field_number) {
/* Lots of optimization opportunities here. */
int i;
for (i = 0; i < l->field_count; i++) {
if (l->fields[i].number == field_number) {
return &l->fields[i];
}
}
return NULL; /* Unknown field. */
}
static bool upb_decode_field(upb_decstate *d, upb_decframe *frame) {
uint32_t tag;
const upb_msglayout_field *field;
int field_number;
d->field_start = d->ptr;
CHK(upb_decode_varint32(&d->ptr, d->limit, &tag));
field_number = tag >> 3;
field = upb_find_field(frame->layout, field_number);
if (field) {
switch (tag & 7) {
case UPB_WIRE_TYPE_VARINT:
return upb_decode_varintfield(d, frame, field);
case UPB_WIRE_TYPE_32BIT:
return upb_decode_32bitfield(d, frame, field);
case UPB_WIRE_TYPE_64BIT:
return upb_decode_64bitfield(d, frame, field);
case UPB_WIRE_TYPE_DELIMITED:
return upb_decode_delimitedfield(d, frame, field);
case UPB_WIRE_TYPE_START_GROUP: {
const upb_msglayout *layout;
upb_msg *group;
if (field->label == UPB_LABEL_REPEATED) {
group = upb_addmsg(frame, field, &layout);
} else {
group = upb_getorcreatemsg(frame, field, &layout);
}
return upb_decode_groupfield(d, group, layout, field_number);
}
case UPB_WIRE_TYPE_END_GROUP:
d->end_group = field_number;
return true;
default:
CHK(false);
}
} else {
CHK(field_number != 0);
CHK(upb_skip_unknownfielddata(d, tag, -1));
CHK(upb_append_unknown(d, frame));
return true;
}
}
static bool upb_decode_message(upb_decstate *d, char *msg, const upb_msglayout *l) {
upb_decframe frame;
frame.msg = msg;
frame.layout = l;
frame.state = d;
while (d->ptr < d->limit) {
CHK(upb_decode_field(d, &frame));
}
return true;
}
bool upb_decode(const char *buf, size_t size, void *msg, const upb_msglayout *l,
upb_arena *arena) {
upb_decstate state;
state.ptr = buf;
state.limit = buf + size;
state.arena = arena;
state.depth = 64;
state.end_group = 0;
CHK(upb_decode_message(&state, msg, l));
return state.end_group == 0;
}
#undef CHK

@ -0,0 +1,21 @@
/*
** upb_decode: parsing into a upb_msg using a upb_msglayout.
*/
#ifndef UPB_DECODE_H_
#define UPB_DECODE_H_
#include "upb/msg.h"
#ifdef __cplusplus
extern "C" {
#endif
bool upb_decode(const char *buf, size_t size, upb_msg *msg,
const upb_msglayout *l, upb_arena *arena);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* UPB_DECODE_H_ */

File diff suppressed because it is too large Load Diff

@ -0,0 +1,909 @@
/*
** Defs are upb's internal representation of the constructs that can appear
** in a .proto file:
**
** - upb::MessageDefPtr (upb_msgdef): describes a "message" construct.
** - upb::FieldDefPtr (upb_fielddef): describes a message field.
** - upb::FileDefPtr (upb_filedef): describes a .proto file and its defs.
** - upb::EnumDefPtr (upb_enumdef): describes an enum.
** - upb::OneofDefPtr (upb_oneofdef): describes a oneof.
**
** TODO: definitions of services.
**
** This is a mixed C/C++ interface that offers a full API to both languages.
** See the top-level README for more information.
*/
#ifndef UPB_DEF_H_
#define UPB_DEF_H_
#include "upb/upb.h"
#include "upb/table.int.h"
#include "google/protobuf/descriptor.upb.h"
#ifdef __cplusplus
#include <cstring>
#include <memory>
#include <string>
#include <vector>
namespace upb {
class EnumDefPtr;
class FieldDefPtr;
class FileDefPtr;
class MessageDefPtr;
class OneofDefPtr;
class SymbolTable;
}
#endif
#include "upb/port_def.inc"
struct upb_enumdef;
typedef struct upb_enumdef upb_enumdef;
struct upb_fielddef;
typedef struct upb_fielddef upb_fielddef;
struct upb_filedef;
typedef struct upb_filedef upb_filedef;
struct upb_msgdef;
typedef struct upb_msgdef upb_msgdef;
struct upb_oneofdef;
typedef struct upb_oneofdef upb_oneofdef;
struct upb_symtab;
typedef struct upb_symtab upb_symtab;
typedef enum {
UPB_SYNTAX_PROTO2 = 2,
UPB_SYNTAX_PROTO3 = 3
} upb_syntax_t;
/* All the different kind of well known type messages. For simplicity of check,
* number wrappers and string wrappers are grouped together. Make sure the
* order and merber of these groups are not changed.
*/
typedef enum {
UPB_WELLKNOWN_UNSPECIFIED,
UPB_WELLKNOWN_ANY,
UPB_WELLKNOWN_FIELDMASK,
UPB_WELLKNOWN_DURATION,
UPB_WELLKNOWN_TIMESTAMP,
/* number wrappers */
UPB_WELLKNOWN_DOUBLEVALUE,
UPB_WELLKNOWN_FLOATVALUE,
UPB_WELLKNOWN_INT64VALUE,
UPB_WELLKNOWN_UINT64VALUE,
UPB_WELLKNOWN_INT32VALUE,
UPB_WELLKNOWN_UINT32VALUE,
/* string wrappers */
UPB_WELLKNOWN_STRINGVALUE,
UPB_WELLKNOWN_BYTESVALUE,
UPB_WELLKNOWN_BOOLVALUE,
UPB_WELLKNOWN_VALUE,
UPB_WELLKNOWN_LISTVALUE,
UPB_WELLKNOWN_STRUCT
} upb_wellknowntype_t;
/* upb_fielddef ***************************************************************/
/* Maximum field number allowed for FieldDefs. This is an inherent limit of the
* protobuf wire format. */
#define UPB_MAX_FIELDNUMBER ((1 << 29) - 1)
#ifdef __cplusplus
extern "C" {
#endif
const char *upb_fielddef_fullname(const upb_fielddef *f);
upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f);
upb_descriptortype_t upb_fielddef_descriptortype(const upb_fielddef *f);
upb_label_t upb_fielddef_label(const upb_fielddef *f);
uint32_t upb_fielddef_number(const upb_fielddef *f);
const char *upb_fielddef_name(const upb_fielddef *f);
bool upb_fielddef_isextension(const upb_fielddef *f);
bool upb_fielddef_lazy(const upb_fielddef *f);
bool upb_fielddef_packed(const upb_fielddef *f);
size_t upb_fielddef_getjsonname(const upb_fielddef *f, char *buf, size_t len);
const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f);
const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f);
uint32_t upb_fielddef_index(const upb_fielddef *f);
bool upb_fielddef_issubmsg(const upb_fielddef *f);
bool upb_fielddef_isstring(const upb_fielddef *f);
bool upb_fielddef_isseq(const upb_fielddef *f);
bool upb_fielddef_isprimitive(const upb_fielddef *f);
bool upb_fielddef_ismap(const upb_fielddef *f);
int64_t upb_fielddef_defaultint64(const upb_fielddef *f);
int32_t upb_fielddef_defaultint32(const upb_fielddef *f);
uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f);
uint32_t upb_fielddef_defaultuint32(const upb_fielddef *f);
bool upb_fielddef_defaultbool(const upb_fielddef *f);
float upb_fielddef_defaultfloat(const upb_fielddef *f);
double upb_fielddef_defaultdouble(const upb_fielddef *f);
const char *upb_fielddef_defaultstr(const upb_fielddef *f, size_t *len);
bool upb_fielddef_hassubdef(const upb_fielddef *f);
bool upb_fielddef_haspresence(const upb_fielddef *f);
const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f);
const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f);
/* Internal only. */
uint32_t upb_fielddef_selectorbase(const upb_fielddef *f);
#ifdef __cplusplus
} /* extern "C" */
/* A upb_fielddef describes a single field in a message. It is most often
* found as a part of a upb_msgdef, but can also stand alone to represent
* an extension. */
class upb::FieldDefPtr {
public:
FieldDefPtr() : ptr_(nullptr) {}
explicit FieldDefPtr(const upb_fielddef *ptr) : ptr_(ptr) {}
const upb_fielddef* ptr() const { return ptr_; }
explicit operator bool() const { return ptr_ != nullptr; }
typedef upb_fieldtype_t Type;
typedef upb_label_t Label;
typedef upb_descriptortype_t DescriptorType;
const char* full_name() const { return upb_fielddef_fullname(ptr_); }
Type type() const { return upb_fielddef_type(ptr_); }
Label label() const { return upb_fielddef_label(ptr_); }
const char* name() const { return upb_fielddef_name(ptr_); }
uint32_t number() const { return upb_fielddef_number(ptr_); }
bool is_extension() const { return upb_fielddef_isextension(ptr_); }
/* Copies the JSON name for this field into the given buffer. Returns the
* actual size of the JSON name, including the NULL terminator. If the
* return value is 0, the JSON name is unset. If the return value is
* greater than len, the JSON name was truncated. The buffer is always
* NULL-terminated if len > 0.
*
* The JSON name always defaults to a camelCased version of the regular
* name. However if the regular name is unset, the JSON name will be unset
* also.
*/
size_t GetJsonName(char *buf, size_t len) const {
return upb_fielddef_getjsonname(ptr_, buf, len);
}
/* Convenience version of the above function which copies the JSON name
* into the given string, returning false if the name is not set. */
template <class T>
bool GetJsonName(T* str) {
str->resize(GetJsonName(NULL, 0));
GetJsonName(&(*str)[0], str->size());
return str->size() > 0;
}
/* For UPB_TYPE_MESSAGE fields only where is_tag_delimited() == false,
* indicates whether this field should have lazy parsing handlers that yield
* the unparsed string for the submessage.
*
* TODO(haberman): I think we want to move this into a FieldOptions container
* when we add support for custom options (the FieldOptions struct will
* contain both regular FieldOptions like "lazy" *and* custom options). */
bool lazy() const { return upb_fielddef_lazy(ptr_); }
/* For non-string, non-submessage fields, this indicates whether binary
* protobufs are encoded in packed or non-packed format.
*
* TODO(haberman): see note above about putting options like this into a
* FieldOptions container. */
bool packed() const { return upb_fielddef_packed(ptr_); }
/* An integer that can be used as an index into an array of fields for
* whatever message this field belongs to. Guaranteed to be less than
* f->containing_type()->field_count(). May only be accessed once the def has
* been finalized. */
uint32_t index() const { return upb_fielddef_index(ptr_); }
/* The MessageDef to which this field belongs.
*
* If this field has been added to a MessageDef, that message can be retrieved
* directly (this is always the case for frozen FieldDefs).
*
* If the field has not yet been added to a MessageDef, you can set the name
* of the containing type symbolically instead. This is mostly useful for
* extensions, where the extension is declared separately from the message. */
MessageDefPtr containing_type() const;
/* The OneofDef to which this field belongs, or NULL if this field is not part
* of a oneof. */
OneofDefPtr containing_oneof() const;
/* The field's type according to the enum in descriptor.proto. This is not
* the same as UPB_TYPE_*, because it distinguishes between (for example)
* INT32 and SINT32, whereas our "type" enum does not. This return of
* descriptor_type() is a function of type(), integer_format(), and
* is_tag_delimited(). */
DescriptorType descriptor_type() const {
return upb_fielddef_descriptortype(ptr_);
}
/* Convenient field type tests. */
bool IsSubMessage() const { return upb_fielddef_issubmsg(ptr_); }
bool IsString() const { return upb_fielddef_isstring(ptr_); }
bool IsSequence() const { return upb_fielddef_isseq(ptr_); }
bool IsPrimitive() const { return upb_fielddef_isprimitive(ptr_); }
bool IsMap() const { return upb_fielddef_ismap(ptr_); }
/* Returns the non-string default value for this fielddef, which may either
* be something the client set explicitly or the "default default" (0 for
* numbers, empty for strings). The field's type indicates the type of the
* returned value, except for enum fields that are still mutable.
*
* Requires that the given function matches the field's current type. */
int64_t default_int64() const { return upb_fielddef_defaultint64(ptr_); }
int32_t default_int32() const { return upb_fielddef_defaultint32(ptr_); }
uint64_t default_uint64() const { return upb_fielddef_defaultuint64(ptr_); }
uint32_t default_uint32() const { return upb_fielddef_defaultuint32(ptr_); }
bool default_bool() const { return upb_fielddef_defaultbool(ptr_); }
float default_float() const { return upb_fielddef_defaultfloat(ptr_); }
double default_double() const { return upb_fielddef_defaultdouble(ptr_); }
/* The resulting string is always NULL-terminated. If non-NULL, the length
* will be stored in *len. */
const char *default_string(size_t * len) const {
return upb_fielddef_defaultstr(ptr_, len);
}
/* Returns the enum or submessage def for this field, if any. The field's
* type must match (ie. you may only call enum_subdef() for fields where
* type() == UPB_TYPE_ENUM). */
EnumDefPtr enum_subdef() const;
MessageDefPtr message_subdef() const;
private:
const upb_fielddef *ptr_;
};
#endif /* __cplusplus */
/* upb_oneofdef ***************************************************************/
#ifdef __cplusplus
extern "C" {
#endif
typedef upb_inttable_iter upb_oneof_iter;
const char *upb_oneofdef_name(const upb_oneofdef *o);
const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o);
int upb_oneofdef_numfields(const upb_oneofdef *o);
uint32_t upb_oneofdef_index(const upb_oneofdef *o);
/* Oneof lookups:
* - ntof: look up a field by name.
* - ntofz: look up a field by name (as a null-terminated string).
* - itof: look up a field by number. */
const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o,
const char *name, size_t length);
UPB_INLINE const upb_fielddef *upb_oneofdef_ntofz(const upb_oneofdef *o,
const char *name) {
return upb_oneofdef_ntof(o, name, strlen(name));
}
const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num);
/* upb_oneof_iter i;
* for(upb_oneof_begin(&i, e); !upb_oneof_done(&i); upb_oneof_next(&i)) {
* // ...
* }
*/
void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o);
void upb_oneof_next(upb_oneof_iter *iter);
bool upb_oneof_done(upb_oneof_iter *iter);
upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter);
void upb_oneof_iter_setdone(upb_oneof_iter *iter);
bool upb_oneof_iter_isequal(const upb_oneof_iter *iter1,
const upb_oneof_iter *iter2);
#ifdef __cplusplus
} /* extern "C" */
/* Class that represents a oneof. */
class upb::OneofDefPtr {
public:
OneofDefPtr() : ptr_(nullptr) {}
explicit OneofDefPtr(const upb_oneofdef *ptr) : ptr_(ptr) {}
const upb_oneofdef* ptr() const { return ptr_; }
explicit operator bool() { return ptr_ != nullptr; }
/* Returns the MessageDef that owns this OneofDef. */
MessageDefPtr containing_type() const;
/* Returns the name of this oneof. This is the name used to look up the oneof
* by name once added to a message def. */
const char* name() const { return upb_oneofdef_name(ptr_); }
/* Returns the number of fields currently defined in the oneof. */
int field_count() const { return upb_oneofdef_numfields(ptr_); }
/* Looks up by name. */
FieldDefPtr FindFieldByName(const char *name, size_t len) const {
return FieldDefPtr(upb_oneofdef_ntof(ptr_, name, len));
}
FieldDefPtr FindFieldByName(const char* name) const {
return FieldDefPtr(upb_oneofdef_ntofz(ptr_, name));
}
template <class T>
FieldDefPtr FindFieldByName(const T& str) const {
return FindFieldByName(str.c_str(), str.size());
}
/* Looks up by tag number. */
FieldDefPtr FindFieldByNumber(uint32_t num) const {
return FieldDefPtr(upb_oneofdef_itof(ptr_, num));
}
class const_iterator
: public std::iterator<std::forward_iterator_tag, FieldDefPtr> {
public:
void operator++() { upb_oneof_next(&iter_); }
FieldDefPtr operator*() const {
return FieldDefPtr(upb_oneof_iter_field(&iter_));
}
bool operator!=(const const_iterator& other) const {
return !upb_oneof_iter_isequal(&iter_, &other.iter_);
}
bool operator==(const const_iterator& other) const {
return upb_oneof_iter_isequal(&iter_, &other.iter_);
}
private:
friend class OneofDefPtr;
const_iterator() {}
explicit const_iterator(OneofDefPtr o) {
upb_oneof_begin(&iter_, o.ptr());
}
static const_iterator end() {
const_iterator iter;
upb_oneof_iter_setdone(&iter.iter_);
return iter;
}
upb_oneof_iter iter_;
};
const_iterator begin() const { return const_iterator(*this); }
const_iterator end() const { return const_iterator::end(); }
private:
const upb_oneofdef *ptr_;
};
inline upb::OneofDefPtr upb::FieldDefPtr::containing_oneof() const {
return OneofDefPtr(upb_fielddef_containingoneof(ptr_));
}
#endif /* __cplusplus */
/* upb_msgdef *****************************************************************/
typedef upb_inttable_iter upb_msg_field_iter;
typedef upb_strtable_iter upb_msg_oneof_iter;
/* Well-known field tag numbers for map-entry messages. */
#define UPB_MAPENTRY_KEY 1
#define UPB_MAPENTRY_VALUE 2
/* Well-known field tag numbers for Any messages. */
#define UPB_ANY_TYPE 1
#define UPB_ANY_VALUE 2
/* Well-known field tag numbers for timestamp messages. */
#define UPB_DURATION_SECONDS 1
#define UPB_DURATION_NANOS 2
/* Well-known field tag numbers for duration messages. */
#define UPB_TIMESTAMP_SECONDS 1
#define UPB_TIMESTAMP_NANOS 2
#ifdef __cplusplus
extern "C" {
#endif
const char *upb_msgdef_fullname(const upb_msgdef *m);
const upb_filedef *upb_msgdef_file(const upb_msgdef *m);
const char *upb_msgdef_name(const upb_msgdef *m);
int upb_msgdef_numoneofs(const upb_msgdef *m);
upb_syntax_t upb_msgdef_syntax(const upb_msgdef *m);
bool upb_msgdef_mapentry(const upb_msgdef *m);
upb_wellknowntype_t upb_msgdef_wellknowntype(const upb_msgdef *m);
bool upb_msgdef_isnumberwrapper(const upb_msgdef *m);
bool upb_msgdef_setsyntax(upb_msgdef *m, upb_syntax_t syntax);
const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i);
const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name,
size_t len);
const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name,
size_t len);
int upb_msgdef_numfields(const upb_msgdef *m);
int upb_msgdef_numoneofs(const upb_msgdef *m);
UPB_INLINE const upb_oneofdef *upb_msgdef_ntooz(const upb_msgdef *m,
const char *name) {
return upb_msgdef_ntoo(m, name, strlen(name));
}
UPB_INLINE const upb_fielddef *upb_msgdef_ntofz(const upb_msgdef *m,
const char *name) {
return upb_msgdef_ntof(m, name, strlen(name));
}
/* Internal-only. */
size_t upb_msgdef_selectorcount(const upb_msgdef *m);
uint32_t upb_msgdef_submsgfieldcount(const upb_msgdef *m);
/* Lookup of either field or oneof by name. Returns whether either was found.
* If the return is true, then the found def will be set, and the non-found
* one set to NULL. */
bool upb_msgdef_lookupname(const upb_msgdef *m, const char *name, size_t len,
const upb_fielddef **f, const upb_oneofdef **o);
UPB_INLINE bool upb_msgdef_lookupnamez(const upb_msgdef *m, const char *name,
const upb_fielddef **f,
const upb_oneofdef **o) {
return upb_msgdef_lookupname(m, name, strlen(name), f, o);
}
/* Iteration over fields and oneofs. For example:
*
* upb_msg_field_iter i;
* for(upb_msg_field_begin(&i, m);
* !upb_msg_field_done(&i);
* upb_msg_field_next(&i)) {
* upb_fielddef *f = upb_msg_iter_field(&i);
* // ...
* }
*
* For C we don't have separate iterators for const and non-const.
* It is the caller's responsibility to cast the upb_fielddef* to
* const if the upb_msgdef* is const. */
void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m);
void upb_msg_field_next(upb_msg_field_iter *iter);
bool upb_msg_field_done(const upb_msg_field_iter *iter);
upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter);
void upb_msg_field_iter_setdone(upb_msg_field_iter *iter);
bool upb_msg_field_iter_isequal(const upb_msg_field_iter * iter1,
const upb_msg_field_iter * iter2);
/* Similar to above, we also support iterating through the oneofs in a
* msgdef. */
void upb_msg_oneof_begin(upb_msg_oneof_iter * iter, const upb_msgdef *m);
void upb_msg_oneof_next(upb_msg_oneof_iter * iter);
bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter);
const upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter);
void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter * iter);
bool upb_msg_oneof_iter_isequal(const upb_msg_oneof_iter *iter1,
const upb_msg_oneof_iter *iter2);
#ifdef __cplusplus
} /* extern "C" */
/* Structure that describes a single .proto message type. */
class upb::MessageDefPtr {
public:
MessageDefPtr() : ptr_(nullptr) {}
explicit MessageDefPtr(const upb_msgdef *ptr) : ptr_(ptr) {}
const upb_msgdef *ptr() const { return ptr_; }
explicit operator bool() const { return ptr_ != nullptr; }
const char* full_name() const { return upb_msgdef_fullname(ptr_); }
const char* name() const { return upb_msgdef_name(ptr_); }
/* The number of fields that belong to the MessageDef. */
int field_count() const { return upb_msgdef_numfields(ptr_); }
/* The number of oneofs that belong to the MessageDef. */
int oneof_count() const { return upb_msgdef_numoneofs(ptr_); }
upb_syntax_t syntax() const { return upb_msgdef_syntax(ptr_); }
/* These return null pointers if the field is not found. */
FieldDefPtr FindFieldByNumber(uint32_t number) const {
return FieldDefPtr(upb_msgdef_itof(ptr_, number));
}
FieldDefPtr FindFieldByName(const char* name, size_t len) const {
return FieldDefPtr(upb_msgdef_ntof(ptr_, name, len));
}
FieldDefPtr FindFieldByName(const char *name) const {
return FieldDefPtr(upb_msgdef_ntofz(ptr_, name));
}
template <class T>
FieldDefPtr FindFieldByName(const T& str) const {
return FindFieldByName(str.c_str(), str.size());
}
OneofDefPtr FindOneofByName(const char* name, size_t len) const {
return OneofDefPtr(upb_msgdef_ntoo(ptr_, name, len));
}
OneofDefPtr FindOneofByName(const char *name) const {
return OneofDefPtr(upb_msgdef_ntooz(ptr_, name));
}
template <class T>
OneofDefPtr FindOneofByName(const T &str) const {
return FindOneofByName(str.c_str(), str.size());
}
/* Is this message a map entry? */
bool mapentry() const { return upb_msgdef_mapentry(ptr_); }
/* Return the type of well known type message. UPB_WELLKNOWN_UNSPECIFIED for
* non-well-known message. */
upb_wellknowntype_t wellknowntype() const {
return upb_msgdef_wellknowntype(ptr_);
}
/* Whether is a number wrapper. */
bool isnumberwrapper() const { return upb_msgdef_isnumberwrapper(ptr_); }
/* Iteration over fields. The order is undefined. */
class const_field_iterator
: public std::iterator<std::forward_iterator_tag, FieldDefPtr> {
public:
void operator++() { upb_msg_field_next(&iter_); }
FieldDefPtr operator*() const {
return FieldDefPtr(upb_msg_iter_field(&iter_));
}
bool operator!=(const const_field_iterator &other) const {
return !upb_msg_field_iter_isequal(&iter_, &other.iter_);
}
bool operator==(const const_field_iterator &other) const {
return upb_msg_field_iter_isequal(&iter_, &other.iter_);
}
private:
friend class MessageDefPtr;
explicit const_field_iterator() {}
explicit const_field_iterator(MessageDefPtr msg) {
upb_msg_field_begin(&iter_, msg.ptr());
}
static const_field_iterator end() {
const_field_iterator iter;
upb_msg_field_iter_setdone(&iter.iter_);
return iter;
}
upb_msg_field_iter iter_;
};
/* Iteration over oneofs. The order is undefined. */
class const_oneof_iterator
: public std::iterator<std::forward_iterator_tag, OneofDefPtr> {
public:
void operator++() { upb_msg_oneof_next(&iter_); }
OneofDefPtr operator*() const {
return OneofDefPtr(upb_msg_iter_oneof(&iter_));
}
bool operator!=(const const_oneof_iterator& other) const {
return !upb_msg_oneof_iter_isequal(&iter_, &other.iter_);
}
bool operator==(const const_oneof_iterator &other) const {
return upb_msg_oneof_iter_isequal(&iter_, &other.iter_);
}
private:
friend class MessageDefPtr;
const_oneof_iterator() {}
explicit const_oneof_iterator(MessageDefPtr msg) {
upb_msg_oneof_begin(&iter_, msg.ptr());
}
static const_oneof_iterator end() {
const_oneof_iterator iter;
upb_msg_oneof_iter_setdone(&iter.iter_);
return iter;
}
upb_msg_oneof_iter iter_;
};
class ConstFieldAccessor {
public:
explicit ConstFieldAccessor(const upb_msgdef* md) : md_(md) {}
const_field_iterator begin() { return MessageDefPtr(md_).field_begin(); }
const_field_iterator end() { return MessageDefPtr(md_).field_end(); }
private:
const upb_msgdef* md_;
};
class ConstOneofAccessor {
public:
explicit ConstOneofAccessor(const upb_msgdef* md) : md_(md) {}
const_oneof_iterator begin() { return MessageDefPtr(md_).oneof_begin(); }
const_oneof_iterator end() { return MessageDefPtr(md_).oneof_end(); }
private:
const upb_msgdef* md_;
};
const_field_iterator field_begin() const {
return const_field_iterator(*this);
}
const_field_iterator field_end() const { return const_field_iterator::end(); }
const_oneof_iterator oneof_begin() const {
return const_oneof_iterator(*this);
}
const_oneof_iterator oneof_end() const { return const_oneof_iterator::end(); }
ConstFieldAccessor fields() const { return ConstFieldAccessor(ptr()); }
ConstOneofAccessor oneofs() const { return ConstOneofAccessor(ptr()); }
private:
const upb_msgdef* ptr_;
};
inline upb::MessageDefPtr upb::FieldDefPtr::message_subdef() const {
return MessageDefPtr(upb_fielddef_msgsubdef(ptr_));
}
inline upb::MessageDefPtr upb::FieldDefPtr::containing_type() const {
return MessageDefPtr(upb_fielddef_containingtype(ptr_));
}
inline upb::MessageDefPtr upb::OneofDefPtr::containing_type() const {
return MessageDefPtr(upb_oneofdef_containingtype(ptr_));
}
#endif /* __cplusplus */
/* upb_enumdef ****************************************************************/
typedef upb_strtable_iter upb_enum_iter;
const char *upb_enumdef_fullname(const upb_enumdef *e);
const char *upb_enumdef_name(const upb_enumdef *e);
const upb_filedef *upb_enumdef_file(const upb_enumdef *e);
int32_t upb_enumdef_default(const upb_enumdef *e);
int upb_enumdef_numvals(const upb_enumdef *e);
/* Enum lookups:
* - ntoi: look up a name with specified length.
* - ntoiz: look up a name provided as a null-terminated string.
* - iton: look up an integer, returning the name as a null-terminated
* string. */
bool upb_enumdef_ntoi(const upb_enumdef *e, const char *name, size_t len,
int32_t *num);
UPB_INLINE bool upb_enumdef_ntoiz(const upb_enumdef *e,
const char *name, int32_t *num) {
return upb_enumdef_ntoi(e, name, strlen(name), num);
}
const char *upb_enumdef_iton(const upb_enumdef *e, int32_t num);
/* upb_enum_iter i;
* for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) {
* // ...
* }
*/
void upb_enum_begin(upb_enum_iter *iter, const upb_enumdef *e);
void upb_enum_next(upb_enum_iter *iter);
bool upb_enum_done(upb_enum_iter *iter);
const char *upb_enum_iter_name(upb_enum_iter *iter);
int32_t upb_enum_iter_number(upb_enum_iter *iter);
#ifdef __cplusplus
class upb::EnumDefPtr {
public:
EnumDefPtr() : ptr_(nullptr) {}
explicit EnumDefPtr(const upb_enumdef* ptr) : ptr_(ptr) {}
const upb_enumdef* ptr() const { return ptr_; }
explicit operator bool() const { return ptr_ != nullptr; }
const char* full_name() const { return upb_enumdef_fullname(ptr_); }
const char* name() const { return upb_enumdef_name(ptr_); }
/* The value that is used as the default when no field default is specified.
* If not set explicitly, the first value that was added will be used.
* The default value must be a member of the enum.
* Requires that value_count() > 0. */
int32_t default_value() const { return upb_enumdef_default(ptr_); }
/* Returns the number of values currently defined in the enum. Note that
* multiple names can refer to the same number, so this may be greater than
* the total number of unique numbers. */
int value_count() const { return upb_enumdef_numvals(ptr_); }
/* Lookups from name to integer, returning true if found. */
bool FindValueByName(const char *name, int32_t *num) const {
return upb_enumdef_ntoiz(ptr_, name, num);
}
/* Finds the name corresponding to the given number, or NULL if none was
* found. If more than one name corresponds to this number, returns the
* first one that was added. */
const char *FindValueByNumber(int32_t num) const {
return upb_enumdef_iton(ptr_, num);
}
/* Iteration over name/value pairs. The order is undefined.
* Adding an enum val invalidates any iterators.
*
* TODO: make compatible with range-for, with elements as pairs? */
class Iterator {
public:
explicit Iterator(EnumDefPtr e) { upb_enum_begin(&iter_, e.ptr()); }
int32_t number() { return upb_enum_iter_number(&iter_); }
const char *name() { return upb_enum_iter_name(&iter_); }
bool Done() { return upb_enum_done(&iter_); }
void Next() { return upb_enum_next(&iter_); }
private:
upb_enum_iter iter_;
};
private:
const upb_enumdef *ptr_;
};
inline upb::EnumDefPtr upb::FieldDefPtr::enum_subdef() const {
return EnumDefPtr(upb_fielddef_enumsubdef(ptr_));
}
#endif /* __cplusplus */
/* upb_filedef ****************************************************************/
#ifdef __cplusplus
extern "C" {
#endif
const char *upb_filedef_name(const upb_filedef *f);
const char *upb_filedef_package(const upb_filedef *f);
const char *upb_filedef_phpprefix(const upb_filedef *f);
const char *upb_filedef_phpnamespace(const upb_filedef *f);
upb_syntax_t upb_filedef_syntax(const upb_filedef *f);
int upb_filedef_depcount(const upb_filedef *f);
int upb_filedef_msgcount(const upb_filedef *f);
int upb_filedef_enumcount(const upb_filedef *f);
const upb_filedef *upb_filedef_dep(const upb_filedef *f, int i);
const upb_msgdef *upb_filedef_msg(const upb_filedef *f, int i);
const upb_enumdef *upb_filedef_enum(const upb_filedef *f, int i);
#ifdef __cplusplus
} /* extern "C" */
/* Class that represents a .proto file with some things defined in it.
*
* Many users won't care about FileDefs, but they are necessary if you want to
* read the values of file-level options. */
class upb::FileDefPtr {
public:
explicit FileDefPtr(const upb_filedef *ptr) : ptr_(ptr) {}
const upb_filedef* ptr() const { return ptr_; }
explicit operator bool() const { return ptr_ != nullptr; }
/* Get/set name of the file (eg. "foo/bar.proto"). */
const char* name() const { return upb_filedef_name(ptr_); }
/* Package name for definitions inside the file (eg. "foo.bar"). */
const char* package() const { return upb_filedef_package(ptr_); }
/* Sets the php class prefix which is prepended to all php generated classes
* from this .proto. Default is empty. */
const char* phpprefix() const { return upb_filedef_phpprefix(ptr_); }
/* Use this option to change the namespace of php generated classes. Default
* is empty. When this option is empty, the package name will be used for
* determining the namespace. */
const char* phpnamespace() const { return upb_filedef_phpnamespace(ptr_); }
/* Syntax for the file. Defaults to proto2. */
upb_syntax_t syntax() const { return upb_filedef_syntax(ptr_); }
/* Get the list of dependencies from the file. These are returned in the
* order that they were added to the FileDefPtr. */
int dependency_count() const { return upb_filedef_depcount(ptr_); }
const FileDefPtr dependency(int index) const {
return FileDefPtr(upb_filedef_dep(ptr_, index));
}
private:
const upb_filedef* ptr_;
};
#endif /* __cplusplus */
/* upb_symtab *****************************************************************/
#ifdef __cplusplus
extern "C" {
#endif
upb_symtab *upb_symtab_new(void);
void upb_symtab_free(upb_symtab* s);
const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym);
const upb_msgdef *upb_symtab_lookupmsg2(
const upb_symtab *s, const char *sym, size_t len);
const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym);
const upb_filedef *upb_symtab_lookupfile(const upb_symtab *s, const char *name);
int upb_symtab_filecount(const upb_symtab *s);
const upb_filedef *upb_symtab_addfile(
upb_symtab *s, const google_protobuf_FileDescriptorProto *file,
upb_status *status);
/* For generated code only: loads a generated descriptor. */
typedef struct upb_def_init {
struct upb_def_init **deps;
const char *filename;
upb_strview descriptor;
} upb_def_init;
bool _upb_symtab_loaddefinit(upb_symtab *s, const upb_def_init *init);
#ifdef __cplusplus
} /* extern "C" */
/* Non-const methods in upb::SymbolTable are NOT thread-safe. */
class upb::SymbolTable {
public:
SymbolTable() : ptr_(upb_symtab_new(), upb_symtab_free) {}
explicit SymbolTable(upb_symtab* s) : ptr_(s, upb_symtab_free) {}
const upb_symtab* ptr() const { return ptr_.get(); }
upb_symtab* ptr() { return ptr_.get(); }
/* Finds an entry in the symbol table with this exact name. If not found,
* returns NULL. */
MessageDefPtr LookupMessage(const char *sym) const {
return MessageDefPtr(upb_symtab_lookupmsg(ptr_.get(), sym));
}
EnumDefPtr LookupEnum(const char *sym) const {
return EnumDefPtr(upb_symtab_lookupenum(ptr_.get(), sym));
}
FileDefPtr LookupFile(const char *name) const {
return FileDefPtr(upb_symtab_lookupfile(ptr_.get(), name));
}
/* TODO: iteration? */
/* Adds the given serialized FileDescriptorProto to the pool. */
FileDefPtr AddFile(const google_protobuf_FileDescriptorProto *file_proto,
Status *status) {
return FileDefPtr(
upb_symtab_addfile(ptr_.get(), file_proto, status->ptr()));
}
private:
std::unique_ptr<upb_symtab, decltype(&upb_symtab_free)> ptr_;
};
UPB_INLINE const char* upb_safecstr(const std::string& str) {
UPB_ASSERT(str.size() == std::strlen(str.c_str()));
return str.c_str();
}
#endif /* __cplusplus */
#include "upb/port_undef.inc"
#endif /* UPB_DEF_H_ */

@ -0,0 +1,378 @@
/* We encode backwards, to avoid pre-computing lengths (one-pass encode). */
#include "upb/encode.h"
#include <string.h>
#include "upb/msg.h"
#include "upb/upb.h"
#include "upb/port_def.inc"
#define UPB_PB_VARINT_MAX_LEN 10
#define CHK(x) do { if (!(x)) { return false; } } while(0)
static size_t upb_encode_varint(uint64_t val, char *buf) {
size_t i;
if (val < 128) { buf[0] = val; return 1; }
i = 0;
while (val) {
uint8_t byte = val & 0x7fU;
val >>= 7;
if (val) byte |= 0x80U;
buf[i++] = byte;
}
return i;
}
static uint32_t upb_zzencode_32(int32_t n) { return ((uint32_t)n << 1) ^ (n >> 31); }
static uint64_t upb_zzencode_64(int64_t n) { return ((uint64_t)n << 1) ^ (n >> 63); }
typedef struct {
upb_alloc *alloc;
char *buf, *ptr, *limit;
} upb_encstate;
static size_t upb_roundup_pow2(size_t bytes) {
size_t ret = 128;
while (ret < bytes) {
ret *= 2;
}
return ret;
}
static bool upb_encode_growbuffer(upb_encstate *e, size_t bytes) {
size_t old_size = e->limit - e->buf;
size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr));
char *new_buf = upb_realloc(e->alloc, e->buf, old_size, new_size);
CHK(new_buf);
/* We want previous data at the end, realloc() put it at the beginning. */
if (old_size > 0) {
memmove(new_buf + new_size - old_size, e->buf, old_size);
}
e->ptr = new_buf + new_size - (e->limit - e->ptr);
e->limit = new_buf + new_size;
e->buf = new_buf;
return true;
}
/* Call to ensure that at least "bytes" bytes are available for writing at
* e->ptr. Returns false if the bytes could not be allocated. */
static bool upb_encode_reserve(upb_encstate *e, size_t bytes) {
CHK(UPB_LIKELY((size_t)(e->ptr - e->buf) >= bytes) ||
upb_encode_growbuffer(e, bytes));
e->ptr -= bytes;
return true;
}
/* Writes the given bytes to the buffer, handling reserve/advance. */
static bool upb_put_bytes(upb_encstate *e, const void *data, size_t len) {
CHK(upb_encode_reserve(e, len));
memcpy(e->ptr, data, len);
return true;
}
static bool upb_put_fixed64(upb_encstate *e, uint64_t val) {
/* TODO(haberman): byte-swap for big endian. */
return upb_put_bytes(e, &val, sizeof(uint64_t));
}
static bool upb_put_fixed32(upb_encstate *e, uint32_t val) {
/* TODO(haberman): byte-swap for big endian. */
return upb_put_bytes(e, &val, sizeof(uint32_t));
}
static bool upb_put_varint(upb_encstate *e, uint64_t val) {
size_t len;
char *start;
CHK(upb_encode_reserve(e, UPB_PB_VARINT_MAX_LEN));
len = upb_encode_varint(val, e->ptr);
start = e->ptr + UPB_PB_VARINT_MAX_LEN - len;
memmove(start, e->ptr, len);
e->ptr = start;
return true;
}
static bool upb_put_double(upb_encstate *e, double d) {
uint64_t u64;
UPB_ASSERT(sizeof(double) == sizeof(uint64_t));
memcpy(&u64, &d, sizeof(uint64_t));
return upb_put_fixed64(e, u64);
}
static bool upb_put_float(upb_encstate *e, float d) {
uint32_t u32;
UPB_ASSERT(sizeof(float) == sizeof(uint32_t));
memcpy(&u32, &d, sizeof(uint32_t));
return upb_put_fixed32(e, u32);
}
static uint32_t upb_readcase(const char *msg, const upb_msglayout_field *f) {
uint32_t ret;
uint32_t offset = ~f->presence;
memcpy(&ret, msg + offset, sizeof(ret));
return ret;
}
static bool upb_readhasbit(const char *msg, const upb_msglayout_field *f) {
uint32_t hasbit = f->presence;
UPB_ASSERT(f->presence > 0);
return msg[hasbit / 8] & (1 << (hasbit % 8));
}
static bool upb_put_tag(upb_encstate *e, int field_number, int wire_type) {
return upb_put_varint(e, (field_number << 3) | wire_type);
}
static bool upb_put_fixedarray(upb_encstate *e, const upb_array *arr,
size_t size) {
size_t bytes = arr->len * size;
return upb_put_bytes(e, arr->data, bytes) && upb_put_varint(e, bytes);
}
bool upb_encode_message(upb_encstate *e, const char *msg,
const upb_msglayout *m, size_t *size);
static bool upb_encode_array(upb_encstate *e, const char *field_mem,
const upb_msglayout *m,
const upb_msglayout_field *f) {
const upb_array *arr = *(const upb_array**)field_mem;
if (arr == NULL || arr->len == 0) {
return true;
}
#define VARINT_CASE(ctype, encode) { \
ctype *start = arr->data; \
ctype *ptr = start + arr->len; \
size_t pre_len = e->limit - e->ptr; \
do { \
ptr--; \
CHK(upb_put_varint(e, encode)); \
} while (ptr != start); \
CHK(upb_put_varint(e, e->limit - e->ptr - pre_len)); \
} \
break; \
do { ; } while(0)
switch (f->descriptortype) {
case UPB_DESCRIPTOR_TYPE_DOUBLE:
CHK(upb_put_fixedarray(e, arr, sizeof(double)));
break;
case UPB_DESCRIPTOR_TYPE_FLOAT:
CHK(upb_put_fixedarray(e, arr, sizeof(float)));
break;
case UPB_DESCRIPTOR_TYPE_SFIXED64:
case UPB_DESCRIPTOR_TYPE_FIXED64:
CHK(upb_put_fixedarray(e, arr, sizeof(uint64_t)));
break;
case UPB_DESCRIPTOR_TYPE_FIXED32:
case UPB_DESCRIPTOR_TYPE_SFIXED32:
CHK(upb_put_fixedarray(e, arr, sizeof(uint32_t)));
break;
case UPB_DESCRIPTOR_TYPE_INT64:
case UPB_DESCRIPTOR_TYPE_UINT64:
VARINT_CASE(uint64_t, *ptr);
case UPB_DESCRIPTOR_TYPE_UINT32:
VARINT_CASE(uint32_t, *ptr);
case UPB_DESCRIPTOR_TYPE_INT32:
case UPB_DESCRIPTOR_TYPE_ENUM:
VARINT_CASE(int32_t, (int64_t)*ptr);
case UPB_DESCRIPTOR_TYPE_BOOL:
VARINT_CASE(bool, *ptr);
case UPB_DESCRIPTOR_TYPE_SINT32:
VARINT_CASE(int32_t, upb_zzencode_32(*ptr));
case UPB_DESCRIPTOR_TYPE_SINT64:
VARINT_CASE(int64_t, upb_zzencode_64(*ptr));
case UPB_DESCRIPTOR_TYPE_STRING:
case UPB_DESCRIPTOR_TYPE_BYTES: {
upb_strview *start = arr->data;
upb_strview *ptr = start + arr->len;
do {
ptr--;
CHK(upb_put_bytes(e, ptr->data, ptr->size) &&
upb_put_varint(e, ptr->size) &&
upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED));
} while (ptr != start);
return true;
}
case UPB_DESCRIPTOR_TYPE_GROUP: {
void **start = arr->data;
void **ptr = start + arr->len;
const upb_msglayout *subm = m->submsgs[f->submsg_index];
do {
size_t size;
ptr--;
CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP) &&
upb_encode_message(e, *ptr, subm, &size) &&
upb_put_tag(e, f->number, UPB_WIRE_TYPE_START_GROUP));
} while (ptr != start);
return true;
}
case UPB_DESCRIPTOR_TYPE_MESSAGE: {
void **start = arr->data;
void **ptr = start + arr->len;
const upb_msglayout *subm = m->submsgs[f->submsg_index];
do {
size_t size;
ptr--;
CHK(upb_encode_message(e, *ptr, subm, &size) &&
upb_put_varint(e, size) &&
upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED));
} while (ptr != start);
return true;
}
}
#undef VARINT_CASE
/* We encode all primitive arrays as packed, regardless of what was specified
* in the .proto file. Could special case 1-sized arrays. */
CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED));
return true;
}
static bool upb_encode_scalarfield(upb_encstate *e, const char *field_mem,
const upb_msglayout *m,
const upb_msglayout_field *f,
bool skip_zero_value) {
#define CASE(ctype, type, wire_type, encodeval) do { \
ctype val = *(ctype*)field_mem; \
if (skip_zero_value && val == 0) { \
return true; \
} \
return upb_put_ ## type(e, encodeval) && \
upb_put_tag(e, f->number, wire_type); \
} while(0)
switch (f->descriptortype) {
case UPB_DESCRIPTOR_TYPE_DOUBLE:
CASE(double, double, UPB_WIRE_TYPE_64BIT, val);
case UPB_DESCRIPTOR_TYPE_FLOAT:
CASE(float, float, UPB_WIRE_TYPE_32BIT, val);
case UPB_DESCRIPTOR_TYPE_INT64:
case UPB_DESCRIPTOR_TYPE_UINT64:
CASE(uint64_t, varint, UPB_WIRE_TYPE_VARINT, val);
case UPB_DESCRIPTOR_TYPE_UINT32:
CASE(uint32_t, varint, UPB_WIRE_TYPE_VARINT, val);
case UPB_DESCRIPTOR_TYPE_INT32:
case UPB_DESCRIPTOR_TYPE_ENUM:
CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, (int64_t)val);
case UPB_DESCRIPTOR_TYPE_SFIXED64:
case UPB_DESCRIPTOR_TYPE_FIXED64:
CASE(uint64_t, fixed64, UPB_WIRE_TYPE_64BIT, val);
case UPB_DESCRIPTOR_TYPE_FIXED32:
case UPB_DESCRIPTOR_TYPE_SFIXED32:
CASE(uint32_t, fixed32, UPB_WIRE_TYPE_32BIT, val);
case UPB_DESCRIPTOR_TYPE_BOOL:
CASE(bool, varint, UPB_WIRE_TYPE_VARINT, val);
case UPB_DESCRIPTOR_TYPE_SINT32:
CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, upb_zzencode_32(val));
case UPB_DESCRIPTOR_TYPE_SINT64:
CASE(int64_t, varint, UPB_WIRE_TYPE_VARINT, upb_zzencode_64(val));
case UPB_DESCRIPTOR_TYPE_STRING:
case UPB_DESCRIPTOR_TYPE_BYTES: {
upb_strview view = *(upb_strview*)field_mem;
if (skip_zero_value && view.size == 0) {
return true;
}
return upb_put_bytes(e, view.data, view.size) &&
upb_put_varint(e, view.size) &&
upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
}
case UPB_DESCRIPTOR_TYPE_GROUP: {
size_t size;
void *submsg = *(void **)field_mem;
const upb_msglayout *subm = m->submsgs[f->submsg_index];
if (submsg == NULL) {
return true;
}
return upb_put_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP) &&
upb_encode_message(e, submsg, subm, &size) &&
upb_put_tag(e, f->number, UPB_WIRE_TYPE_START_GROUP);
}
case UPB_DESCRIPTOR_TYPE_MESSAGE: {
size_t size;
void *submsg = *(void **)field_mem;
const upb_msglayout *subm = m->submsgs[f->submsg_index];
if (submsg == NULL) {
return true;
}
return upb_encode_message(e, submsg, subm, &size) &&
upb_put_varint(e, size) &&
upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
}
}
#undef CASE
UPB_UNREACHABLE();
}
bool upb_encode_message(upb_encstate *e, const char *msg,
const upb_msglayout *m, size_t *size) {
int i;
size_t pre_len = e->limit - e->ptr;
const char *unknown;
size_t unknown_size;
for (i = m->field_count - 1; i >= 0; i--) {
const upb_msglayout_field *f = &m->fields[i];
if (f->label == UPB_LABEL_REPEATED) {
CHK(upb_encode_array(e, msg + f->offset, m, f));
} else {
bool skip_empty = false;
if (f->presence == 0) {
/* Proto3 presence. */
skip_empty = true;
} else if (f->presence > 0) {
/* Proto2 presence: hasbit. */
if (!upb_readhasbit(msg, f)) {
continue;
}
} else {
/* Field is in a oneof. */
if (upb_readcase(msg, f) != f->number) {
continue;
}
}
CHK(upb_encode_scalarfield(e, msg + f->offset, m, f, skip_empty));
}
}
unknown = upb_msg_getunknown(msg, &unknown_size);
if (unknown) {
upb_put_bytes(e, unknown, unknown_size);
}
*size = (e->limit - e->ptr) - pre_len;
return true;
}
char *upb_encode(const void *msg, const upb_msglayout *m, upb_arena *arena,
size_t *size) {
upb_encstate e;
e.alloc = upb_arena_alloc(arena);
e.buf = NULL;
e.limit = NULL;
e.ptr = NULL;
if (!upb_encode_message(&e, msg, m, size)) {
*size = 0;
return NULL;
}
*size = e.limit - e.ptr;
if (*size == 0) {
static char ch;
return &ch;
} else {
UPB_ASSERT(e.ptr);
return e.ptr;
}
}
#undef CHK

@ -0,0 +1,21 @@
/*
** upb_encode: parsing into a upb_msg using a upb_msglayout.
*/
#ifndef UPB_ENCODE_H_
#define UPB_ENCODE_H_
#include "upb/msg.h"
#ifdef __cplusplus
extern "C" {
#endif
char *upb_encode(const void *msg, const upb_msglayout *l, upb_arena *arena,
size_t *size);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* UPB_ENCODE_H_ */

@ -0,0 +1,105 @@
/*
** Functions for use by generated code. These are not public and users must
** not call them directly.
*/
#ifndef UPB_GENERATED_UTIL_H_
#define UPB_GENERATED_UTIL_H_
#include <stdint.h>
#include "upb/msg.h"
#include "upb/port_def.inc"
#define PTR_AT(msg, ofs, type) (type*)((const char*)msg + ofs)
UPB_INLINE const void *_upb_array_accessor(const void *msg, size_t ofs,
size_t *size) {
const upb_array *arr = *PTR_AT(msg, ofs, const upb_array*);
if (arr) {
if (size) *size = arr->len;
return arr->data;
} else {
if (size) *size = 0;
return NULL;
}
}
UPB_INLINE void *_upb_array_mutable_accessor(void *msg, size_t ofs,
size_t *size) {
upb_array *arr = *PTR_AT(msg, ofs, upb_array*);
if (arr) {
if (size) *size = arr->len;
return arr->data;
} else {
if (size) *size = 0;
return NULL;
}
}
/* TODO(haberman): this is a mess. It will improve when upb_array no longer
* carries reflective state (type, elem_size). */
UPB_INLINE void *_upb_array_resize_accessor(void *msg, size_t ofs, size_t size,
size_t elem_size,
upb_fieldtype_t type,
upb_arena *arena) {
upb_array *arr = *PTR_AT(msg, ofs, upb_array*);
if (!arr) {
arr = upb_array_new(arena);
if (!arr) return NULL;
*PTR_AT(msg, ofs, upb_array*) = arr;
}
if (size > arr->size) {
size_t new_size = UPB_MAX(arr->size, 4);
size_t old_bytes = arr->size * elem_size;
size_t new_bytes;
while (new_size < size) new_size *= 2;
new_bytes = new_size * elem_size;
arr->data = upb_arena_realloc(arena, arr->data, old_bytes, new_bytes);
if (!arr->data) {
return NULL;
}
arr->size = new_size;
}
arr->len = size;
return arr->data;
}
UPB_INLINE bool _upb_array_append_accessor(void *msg, size_t ofs,
size_t elem_size,
upb_fieldtype_t type,
const void *value,
upb_arena *arena) {
upb_array *arr = *PTR_AT(msg, ofs, upb_array*);
size_t i = arr ? arr->len : 0;
void *data =
_upb_array_resize_accessor(msg, ofs, i + 1, elem_size, type, arena);
if (!data) return false;
memcpy(PTR_AT(data, i * elem_size, char), value, elem_size);
return true;
}
UPB_INLINE bool _upb_has_field(const void *msg, size_t idx) {
return (*PTR_AT(msg, idx / 8, const char) & (1 << (idx % 8))) != 0;
}
UPB_INLINE bool _upb_sethas(const void *msg, size_t idx) {
return (*PTR_AT(msg, idx / 8, char)) |= (char)(1 << (idx % 8));
}
UPB_INLINE bool _upb_clearhas(const void *msg, size_t idx) {
return (*PTR_AT(msg, idx / 8, char)) &= (char)(~(1 << (idx % 8)));
}
UPB_INLINE bool _upb_has_oneof_field(const void *msg, size_t case_ofs, int32_t num) {
return *PTR_AT(msg, case_ofs, int32_t) == num;
}
#undef PTR_AT
#include "upb/port_undef.inc"
#endif /* UPB_GENERATED_UTIL_H_ */

@ -0,0 +1,923 @@
/*
** Inline definitions for handlers.h, which are particularly long and a bit
** tricky.
*/
#ifndef UPB_HANDLERS_INL_H_
#define UPB_HANDLERS_INL_H_
#include <limits.h>
#include <stddef.h>
#include "upb/handlers.h"
#include "upb/port_def.inc"
#ifdef __cplusplus
/* Type detection and typedefs for integer types.
* For platforms where there are multiple 32-bit or 64-bit types, we need to be
* able to enumerate them so we can properly create overloads for all variants.
*
* If any platform existed where there were three integer types with the same
* size, this would have to become more complicated. For example, short, int,
* and long could all be 32-bits. Even more diabolically, short, int, long,
* and long long could all be 64 bits and still be standard-compliant.
* However, few platforms are this strange, and it's unlikely that upb will be
* used on the strangest ones. */
/* Can't count on stdint.h limits like INT32_MAX, because in C++ these are
* only defined when __STDC_LIMIT_MACROS are defined before the *first* include
* of stdint.h. We can't guarantee that someone else didn't include these first
* without defining __STDC_LIMIT_MACROS. */
#define UPB_INT32_MAX 0x7fffffffLL
#define UPB_INT32_MIN (-UPB_INT32_MAX - 1)
#define UPB_INT64_MAX 0x7fffffffffffffffLL
#define UPB_INT64_MIN (-UPB_INT64_MAX - 1)
#if INT_MAX == UPB_INT32_MAX && INT_MIN == UPB_INT32_MIN
#define UPB_INT_IS_32BITS 1
#endif
#if LONG_MAX == UPB_INT32_MAX && LONG_MIN == UPB_INT32_MIN
#define UPB_LONG_IS_32BITS 1
#endif
#if LONG_MAX == UPB_INT64_MAX && LONG_MIN == UPB_INT64_MIN
#define UPB_LONG_IS_64BITS 1
#endif
#if LLONG_MAX == UPB_INT64_MAX && LLONG_MIN == UPB_INT64_MIN
#define UPB_LLONG_IS_64BITS 1
#endif
/* We use macros instead of typedefs so we can undefine them later and avoid
* leaking them outside this header file. */
#if UPB_INT_IS_32BITS
#define UPB_INT32_T int
#define UPB_UINT32_T unsigned int
#if UPB_LONG_IS_32BITS
#define UPB_TWO_32BIT_TYPES 1
#define UPB_INT32ALT_T long
#define UPB_UINT32ALT_T unsigned long
#endif /* UPB_LONG_IS_32BITS */
#elif UPB_LONG_IS_32BITS /* && !UPB_INT_IS_32BITS */
#define UPB_INT32_T long
#define UPB_UINT32_T unsigned long
#endif /* UPB_INT_IS_32BITS */
#if UPB_LONG_IS_64BITS
#define UPB_INT64_T long
#define UPB_UINT64_T unsigned long
#if UPB_LLONG_IS_64BITS
#define UPB_TWO_64BIT_TYPES 1
#define UPB_INT64ALT_T long long
#define UPB_UINT64ALT_T unsigned long long
#endif /* UPB_LLONG_IS_64BITS */
#elif UPB_LLONG_IS_64BITS /* && !UPB_LONG_IS_64BITS */
#define UPB_INT64_T long long
#define UPB_UINT64_T unsigned long long
#endif /* UPB_LONG_IS_64BITS */
#undef UPB_INT32_MAX
#undef UPB_INT32_MIN
#undef UPB_INT64_MAX
#undef UPB_INT64_MIN
#undef UPB_INT_IS_32BITS
#undef UPB_LONG_IS_32BITS
#undef UPB_LONG_IS_64BITS
#undef UPB_LLONG_IS_64BITS
namespace upb {
typedef void CleanupFunc(void *ptr);
/* Template to remove "const" from "const T*" and just return "T*".
*
* We define a nonsense default because otherwise it will fail to instantiate as
* a function parameter type even in cases where we don't expect any caller to
* actually match the overload. */
class CouldntRemoveConst {};
template <class T> struct remove_constptr { typedef CouldntRemoveConst type; };
template <class T> struct remove_constptr<const T *> { typedef T *type; };
/* Template that we use below to remove a template specialization from
* consideration if it matches a specific type. */
template <class T, class U> struct disable_if_same { typedef void Type; };
template <class T> struct disable_if_same<T, T> {};
template <class T> void DeletePointer(void *p) { delete static_cast<T>(p); }
template <class T1, class T2>
struct FirstUnlessVoidOrBool {
typedef T1 value;
};
template <class T2>
struct FirstUnlessVoidOrBool<void, T2> {
typedef T2 value;
};
template <class T2>
struct FirstUnlessVoidOrBool<bool, T2> {
typedef T2 value;
};
template<class T, class U>
struct is_same {
static bool value;
};
template<class T>
struct is_same<T, T> {
static bool value;
};
template<class T, class U>
bool is_same<T, U>::value = false;
template<class T>
bool is_same<T, T>::value = true;
/* FuncInfo *******************************************************************/
/* Info about the user's original, pre-wrapped function. */
template <class C, class R = void>
struct FuncInfo {
/* The type of the closure that the function takes (its first param). */
typedef C Closure;
/* The return type. */
typedef R Return;
};
/* Func ***********************************************************************/
/* Func1, Func2, Func3: Template classes representing a function and its
* signature.
*
* Since the function is a template parameter, calling the function can be
* inlined at compile-time and does not require a function pointer at runtime.
* These functions are not bound to a handler data so have no data or cleanup
* handler. */
struct UnboundFunc {
CleanupFunc *GetCleanup() { return nullptr; }
void *GetData() { return nullptr; }
};
template <class R, class P1, R F(P1), class I>
struct Func1 : public UnboundFunc {
typedef R Return;
typedef I FuncInfo;
static R Call(P1 p1) { return F(p1); }
};
template <class R, class P1, class P2, R F(P1, P2), class I>
struct Func2 : public UnboundFunc {
typedef R Return;
typedef I FuncInfo;
static R Call(P1 p1, P2 p2) { return F(p1, p2); }
};
template <class R, class P1, class P2, class P3, R F(P1, P2, P3), class I>
struct Func3 : public UnboundFunc {
typedef R Return;
typedef I FuncInfo;
static R Call(P1 p1, P2 p2, P3 p3) { return F(p1, p2, p3); }
};
template <class R, class P1, class P2, class P3, class P4, R F(P1, P2, P3, P4),
class I>
struct Func4 : public UnboundFunc {
typedef R Return;
typedef I FuncInfo;
static R Call(P1 p1, P2 p2, P3 p3, P4 p4) { return F(p1, p2, p3, p4); }
};
template <class R, class P1, class P2, class P3, class P4, class P5,
R F(P1, P2, P3, P4, P5), class I>
struct Func5 : public UnboundFunc {
typedef R Return;
typedef I FuncInfo;
static R Call(P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) {
return F(p1, p2, p3, p4, p5);
}
};
/* BoundFunc ******************************************************************/
/* BoundFunc2, BoundFunc3: Like Func2/Func3 except also contains a value that
* shall be bound to the function's second parameter.
*
* Note that the second parameter is a const pointer, but our stored bound value
* is non-const so we can free it when the handlers are destroyed. */
template <class T>
struct BoundFunc {
typedef typename remove_constptr<T>::type MutableP2;
explicit BoundFunc(MutableP2 data_) : data(data_) {}
CleanupFunc *GetCleanup() { return &DeletePointer<MutableP2>; }
MutableP2 GetData() { return data; }
MutableP2 data;
};
template <class R, class P1, class P2, R F(P1, P2), class I>
struct BoundFunc2 : public BoundFunc<P2> {
typedef BoundFunc<P2> Base;
typedef I FuncInfo;
explicit BoundFunc2(typename Base::MutableP2 arg) : Base(arg) {}
};
template <class R, class P1, class P2, class P3, R F(P1, P2, P3), class I>
struct BoundFunc3 : public BoundFunc<P2> {
typedef BoundFunc<P2> Base;
typedef I FuncInfo;
explicit BoundFunc3(typename Base::MutableP2 arg) : Base(arg) {}
};
template <class R, class P1, class P2, class P3, class P4, R F(P1, P2, P3, P4),
class I>
struct BoundFunc4 : public BoundFunc<P2> {
typedef BoundFunc<P2> Base;
typedef I FuncInfo;
explicit BoundFunc4(typename Base::MutableP2 arg) : Base(arg) {}
};
template <class R, class P1, class P2, class P3, class P4, class P5,
R F(P1, P2, P3, P4, P5), class I>
struct BoundFunc5 : public BoundFunc<P2> {
typedef BoundFunc<P2> Base;
typedef I FuncInfo;
explicit BoundFunc5(typename Base::MutableP2 arg) : Base(arg) {}
};
/* FuncSig ********************************************************************/
/* FuncSig1, FuncSig2, FuncSig3: template classes reflecting a function
* *signature*, but without a specific function attached.
*
* These classes contain member functions that can be invoked with a
* specific function to return a Func/BoundFunc class. */
template <class R, class P1>
struct FuncSig1 {
template <R F(P1)>
Func1<R, P1, F, FuncInfo<P1, R> > GetFunc() {
return Func1<R, P1, F, FuncInfo<P1, R> >();
}
};
template <class R, class P1, class P2>
struct FuncSig2 {
template <R F(P1, P2)>
Func2<R, P1, P2, F, FuncInfo<P1, R> > GetFunc() {
return Func2<R, P1, P2, F, FuncInfo<P1, R> >();
}
template <R F(P1, P2)>
BoundFunc2<R, P1, P2, F, FuncInfo<P1, R> > GetFunc(
typename remove_constptr<P2>::type param2) {
return BoundFunc2<R, P1, P2, F, FuncInfo<P1, R> >(param2);
}
};
template <class R, class P1, class P2, class P3>
struct FuncSig3 {
template <R F(P1, P2, P3)>
Func3<R, P1, P2, P3, F, FuncInfo<P1, R> > GetFunc() {
return Func3<R, P1, P2, P3, F, FuncInfo<P1, R> >();
}
template <R F(P1, P2, P3)>
BoundFunc3<R, P1, P2, P3, F, FuncInfo<P1, R> > GetFunc(
typename remove_constptr<P2>::type param2) {
return BoundFunc3<R, P1, P2, P3, F, FuncInfo<P1, R> >(param2);
}
};
template <class R, class P1, class P2, class P3, class P4>
struct FuncSig4 {
template <R F(P1, P2, P3, P4)>
Func4<R, P1, P2, P3, P4, F, FuncInfo<P1, R> > GetFunc() {
return Func4<R, P1, P2, P3, P4, F, FuncInfo<P1, R> >();
}
template <R F(P1, P2, P3, P4)>
BoundFunc4<R, P1, P2, P3, P4, F, FuncInfo<P1, R> > GetFunc(
typename remove_constptr<P2>::type param2) {
return BoundFunc4<R, P1, P2, P3, P4, F, FuncInfo<P1, R> >(param2);
}
};
template <class R, class P1, class P2, class P3, class P4, class P5>
struct FuncSig5 {
template <R F(P1, P2, P3, P4, P5)>
Func5<R, P1, P2, P3, P4, P5, F, FuncInfo<P1, R> > GetFunc() {
return Func5<R, P1, P2, P3, P4, P5, F, FuncInfo<P1, R> >();
}
template <R F(P1, P2, P3, P4, P5)>
BoundFunc5<R, P1, P2, P3, P4, P5, F, FuncInfo<P1, R> > GetFunc(
typename remove_constptr<P2>::type param2) {
return BoundFunc5<R, P1, P2, P3, P4, P5, F, FuncInfo<P1, R> >(param2);
}
};
/* Overloaded template function that can construct the appropriate FuncSig*
* class given a function pointer by deducing the template parameters. */
template <class R, class P1>
inline FuncSig1<R, P1> MatchFunc(R (*f)(P1)) {
UPB_UNUSED(f); /* Only used for template parameter deduction. */
return FuncSig1<R, P1>();
}
template <class R, class P1, class P2>
inline FuncSig2<R, P1, P2> MatchFunc(R (*f)(P1, P2)) {
UPB_UNUSED(f); /* Only used for template parameter deduction. */
return FuncSig2<R, P1, P2>();
}
template <class R, class P1, class P2, class P3>
inline FuncSig3<R, P1, P2, P3> MatchFunc(R (*f)(P1, P2, P3)) {
UPB_UNUSED(f); /* Only used for template parameter deduction. */
return FuncSig3<R, P1, P2, P3>();
}
template <class R, class P1, class P2, class P3, class P4>
inline FuncSig4<R, P1, P2, P3, P4> MatchFunc(R (*f)(P1, P2, P3, P4)) {
UPB_UNUSED(f); /* Only used for template parameter deduction. */
return FuncSig4<R, P1, P2, P3, P4>();
}
template <class R, class P1, class P2, class P3, class P4, class P5>
inline FuncSig5<R, P1, P2, P3, P4, P5> MatchFunc(R (*f)(P1, P2, P3, P4, P5)) {
UPB_UNUSED(f); /* Only used for template parameter deduction. */
return FuncSig5<R, P1, P2, P3, P4, P5>();
}
/* MethodSig ******************************************************************/
/* CallMethod*: a function template that calls a given method. */
template <class R, class C, R (C::*F)()>
R CallMethod0(C *obj) {
return ((*obj).*F)();
}
template <class R, class C, class P1, R (C::*F)(P1)>
R CallMethod1(C *obj, P1 arg1) {
return ((*obj).*F)(arg1);
}
template <class R, class C, class P1, class P2, R (C::*F)(P1, P2)>
R CallMethod2(C *obj, P1 arg1, P2 arg2) {
return ((*obj).*F)(arg1, arg2);
}
template <class R, class C, class P1, class P2, class P3, R (C::*F)(P1, P2, P3)>
R CallMethod3(C *obj, P1 arg1, P2 arg2, P3 arg3) {
return ((*obj).*F)(arg1, arg2, arg3);
}
template <class R, class C, class P1, class P2, class P3, class P4,
R (C::*F)(P1, P2, P3, P4)>
R CallMethod4(C *obj, P1 arg1, P2 arg2, P3 arg3, P4 arg4) {
return ((*obj).*F)(arg1, arg2, arg3, arg4);
}
/* MethodSig: like FuncSig, but for member functions.
*
* GetFunc() returns a normal FuncN object, so after calling GetFunc() no
* more logic is required to special-case methods. */
template <class R, class C>
struct MethodSig0 {
template <R (C::*F)()>
Func1<R, C *, CallMethod0<R, C, F>, FuncInfo<C *, R> > GetFunc() {
return Func1<R, C *, CallMethod0<R, C, F>, FuncInfo<C *, R> >();
}
};
template <class R, class C, class P1>
struct MethodSig1 {
template <R (C::*F)(P1)>
Func2<R, C *, P1, CallMethod1<R, C, P1, F>, FuncInfo<C *, R> > GetFunc() {
return Func2<R, C *, P1, CallMethod1<R, C, P1, F>, FuncInfo<C *, R> >();
}
template <R (C::*F)(P1)>
BoundFunc2<R, C *, P1, CallMethod1<R, C, P1, F>, FuncInfo<C *, R> > GetFunc(
typename remove_constptr<P1>::type param1) {
return BoundFunc2<R, C *, P1, CallMethod1<R, C, P1, F>, FuncInfo<C *, R> >(
param1);
}
};
template <class R, class C, class P1, class P2>
struct MethodSig2 {
template <R (C::*F)(P1, P2)>
Func3<R, C *, P1, P2, CallMethod2<R, C, P1, P2, F>, FuncInfo<C *, R> >
GetFunc() {
return Func3<R, C *, P1, P2, CallMethod2<R, C, P1, P2, F>,
FuncInfo<C *, R> >();
}
template <R (C::*F)(P1, P2)>
BoundFunc3<R, C *, P1, P2, CallMethod2<R, C, P1, P2, F>, FuncInfo<C *, R> >
GetFunc(typename remove_constptr<P1>::type param1) {
return BoundFunc3<R, C *, P1, P2, CallMethod2<R, C, P1, P2, F>,
FuncInfo<C *, R> >(param1);
}
};
template <class R, class C, class P1, class P2, class P3>
struct MethodSig3 {
template <R (C::*F)(P1, P2, P3)>
Func4<R, C *, P1, P2, P3, CallMethod3<R, C, P1, P2, P3, F>, FuncInfo<C *, R> >
GetFunc() {
return Func4<R, C *, P1, P2, P3, CallMethod3<R, C, P1, P2, P3, F>,
FuncInfo<C *, R> >();
}
template <R (C::*F)(P1, P2, P3)>
BoundFunc4<R, C *, P1, P2, P3, CallMethod3<R, C, P1, P2, P3, F>,
FuncInfo<C *, R> >
GetFunc(typename remove_constptr<P1>::type param1) {
return BoundFunc4<R, C *, P1, P2, P3, CallMethod3<R, C, P1, P2, P3, F>,
FuncInfo<C *, R> >(param1);
}
};
template <class R, class C, class P1, class P2, class P3, class P4>
struct MethodSig4 {
template <R (C::*F)(P1, P2, P3, P4)>
Func5<R, C *, P1, P2, P3, P4, CallMethod4<R, C, P1, P2, P3, P4, F>,
FuncInfo<C *, R> >
GetFunc() {
return Func5<R, C *, P1, P2, P3, P4, CallMethod4<R, C, P1, P2, P3, P4, F>,
FuncInfo<C *, R> >();
}
template <R (C::*F)(P1, P2, P3, P4)>
BoundFunc5<R, C *, P1, P2, P3, P4, CallMethod4<R, C, P1, P2, P3, P4, F>,
FuncInfo<C *, R> >
GetFunc(typename remove_constptr<P1>::type param1) {
return BoundFunc5<R, C *, P1, P2, P3, P4,
CallMethod4<R, C, P1, P2, P3, P4, F>, FuncInfo<C *, R> >(
param1);
}
};
template <class R, class C>
inline MethodSig0<R, C> MatchFunc(R (C::*f)()) {
UPB_UNUSED(f); /* Only used for template parameter deduction. */
return MethodSig0<R, C>();
}
template <class R, class C, class P1>
inline MethodSig1<R, C, P1> MatchFunc(R (C::*f)(P1)) {
UPB_UNUSED(f); /* Only used for template parameter deduction. */
return MethodSig1<R, C, P1>();
}
template <class R, class C, class P1, class P2>
inline MethodSig2<R, C, P1, P2> MatchFunc(R (C::*f)(P1, P2)) {
UPB_UNUSED(f); /* Only used for template parameter deduction. */
return MethodSig2<R, C, P1, P2>();
}
template <class R, class C, class P1, class P2, class P3>
inline MethodSig3<R, C, P1, P2, P3> MatchFunc(R (C::*f)(P1, P2, P3)) {
UPB_UNUSED(f); /* Only used for template parameter deduction. */
return MethodSig3<R, C, P1, P2, P3>();
}
template <class R, class C, class P1, class P2, class P3, class P4>
inline MethodSig4<R, C, P1, P2, P3, P4> MatchFunc(R (C::*f)(P1, P2, P3, P4)) {
UPB_UNUSED(f); /* Only used for template parameter deduction. */
return MethodSig4<R, C, P1, P2, P3, P4>();
}
/* MaybeWrapReturn ************************************************************/
/* Template class that attempts to wrap the return value of the function so it
* matches the expected type. There are two main adjustments it may make:
*
* 1. If the function returns void, make it return the expected type and with
* a value that always indicates success.
* 2. If the function returns bool, make it return the expected type with a
* value that indicates success or failure.
*
* The "expected type" for return is:
* 1. void* for start handlers. If the closure parameter has a different type
* we will cast it to void* for the return in the success case.
* 2. size_t for string buffer handlers.
* 3. bool for everything else. */
/* Template parameters are FuncN type and desired return type. */
template <class F, class R, class Enable = void>
struct MaybeWrapReturn;
/* If the return type matches, return the given function unwrapped. */
template <class F>
struct MaybeWrapReturn<F, typename F::Return> {
typedef F Func;
};
/* Function wrapper that munges the return value from void to (bool)true. */
template <class P1, class P2, void F(P1, P2)>
bool ReturnTrue2(P1 p1, P2 p2) {
F(p1, p2);
return true;
}
template <class P1, class P2, class P3, void F(P1, P2, P3)>
bool ReturnTrue3(P1 p1, P2 p2, P3 p3) {
F(p1, p2, p3);
return true;
}
/* Function wrapper that munges the return value from void to (void*)arg1 */
template <class P1, class P2, void F(P1, P2)>
void *ReturnClosure2(P1 p1, P2 p2) {
F(p1, p2);
return p1;
}
template <class P1, class P2, class P3, void F(P1, P2, P3)>
void *ReturnClosure3(P1 p1, P2 p2, P3 p3) {
F(p1, p2, p3);
return p1;
}
/* Function wrapper that munges the return value from R to void*. */
template <class R, class P1, class P2, R F(P1, P2)>
void *CastReturnToVoidPtr2(P1 p1, P2 p2) {
return F(p1, p2);
}
template <class R, class P1, class P2, class P3, R F(P1, P2, P3)>
void *CastReturnToVoidPtr3(P1 p1, P2 p2, P3 p3) {
return F(p1, p2, p3);
}
/* Function wrapper that munges the return value from bool to void*. */
template <class P1, class P2, bool F(P1, P2)>
void *ReturnClosureOrBreak2(P1 p1, P2 p2) {
return F(p1, p2) ? p1 : UPB_BREAK;
}
template <class P1, class P2, class P3, bool F(P1, P2, P3)>
void *ReturnClosureOrBreak3(P1 p1, P2 p2, P3 p3) {
return F(p1, p2, p3) ? p1 : UPB_BREAK;
}
/* For the string callback, which takes five params, returns the size param. */
template <class P1, class P2,
void F(P1, P2, const char *, size_t, const upb_bufhandle *)>
size_t ReturnStringLen(P1 p1, P2 p2, const char *p3, size_t p4,
const upb_bufhandle *p5) {
F(p1, p2, p3, p4, p5);
return p4;
}
/* For the string callback, which takes five params, returns the size param or
* zero. */
template <class P1, class P2,
bool F(P1, P2, const char *, size_t, const upb_bufhandle *)>
size_t ReturnNOr0(P1 p1, P2 p2, const char *p3, size_t p4,
const upb_bufhandle *p5) {
return F(p1, p2, p3, p4, p5) ? p4 : 0;
}
/* If we have a function returning void but want a function returning bool, wrap
* it in a function that returns true. */
template <class P1, class P2, void F(P1, P2), class I>
struct MaybeWrapReturn<Func2<void, P1, P2, F, I>, bool> {
typedef Func2<bool, P1, P2, ReturnTrue2<P1, P2, F>, I> Func;
};
template <class P1, class P2, class P3, void F(P1, P2, P3), class I>
struct MaybeWrapReturn<Func3<void, P1, P2, P3, F, I>, bool> {
typedef Func3<bool, P1, P2, P3, ReturnTrue3<P1, P2, P3, F>, I> Func;
};
/* If our function returns void but we want one returning void*, wrap it in a
* function that returns the first argument. */
template <class P1, class P2, void F(P1, P2), class I>
struct MaybeWrapReturn<Func2<void, P1, P2, F, I>, void *> {
typedef Func2<void *, P1, P2, ReturnClosure2<P1, P2, F>, I> Func;
};
template <class P1, class P2, class P3, void F(P1, P2, P3), class I>
struct MaybeWrapReturn<Func3<void, P1, P2, P3, F, I>, void *> {
typedef Func3<void *, P1, P2, P3, ReturnClosure3<P1, P2, P3, F>, I> Func;
};
/* If our function returns R* but we want one returning void*, wrap it in a
* function that casts to void*. */
template <class R, class P1, class P2, R *F(P1, P2), class I>
struct MaybeWrapReturn<Func2<R *, P1, P2, F, I>, void *,
typename disable_if_same<R *, void *>::Type> {
typedef Func2<void *, P1, P2, CastReturnToVoidPtr2<R *, P1, P2, F>, I> Func;
};
template <class R, class P1, class P2, class P3, R *F(P1, P2, P3), class I>
struct MaybeWrapReturn<Func3<R *, P1, P2, P3, F, I>, void *,
typename disable_if_same<R *, void *>::Type> {
typedef Func3<void *, P1, P2, P3, CastReturnToVoidPtr3<R *, P1, P2, P3, F>, I>
Func;
};
/* If our function returns bool but we want one returning void*, wrap it in a
* function that returns either the first param or UPB_BREAK. */
template <class P1, class P2, bool F(P1, P2), class I>
struct MaybeWrapReturn<Func2<bool, P1, P2, F, I>, void *> {
typedef Func2<void *, P1, P2, ReturnClosureOrBreak2<P1, P2, F>, I> Func;
};
template <class P1, class P2, class P3, bool F(P1, P2, P3), class I>
struct MaybeWrapReturn<Func3<bool, P1, P2, P3, F, I>, void *> {
typedef Func3<void *, P1, P2, P3, ReturnClosureOrBreak3<P1, P2, P3, F>, I>
Func;
};
/* If our function returns void but we want one returning size_t, wrap it in a
* function that returns the size argument. */
template <class P1, class P2,
void F(P1, P2, const char *, size_t, const upb_bufhandle *), class I>
struct MaybeWrapReturn<
Func5<void, P1, P2, const char *, size_t, const upb_bufhandle *, F, I>,
size_t> {
typedef Func5<size_t, P1, P2, const char *, size_t, const upb_bufhandle *,
ReturnStringLen<P1, P2, F>, I> Func;
};
/* If our function returns bool but we want one returning size_t, wrap it in a
* function that returns either 0 or the buf size. */
template <class P1, class P2,
bool F(P1, P2, const char *, size_t, const upb_bufhandle *), class I>
struct MaybeWrapReturn<
Func5<bool, P1, P2, const char *, size_t, const upb_bufhandle *, F, I>,
size_t> {
typedef Func5<size_t, P1, P2, const char *, size_t, const upb_bufhandle *,
ReturnNOr0<P1, P2, F>, I> Func;
};
/* ConvertParams **************************************************************/
/* Template class that converts the function parameters if necessary, and
* ignores the HandlerData parameter if appropriate.
*
* Template parameter is the are FuncN function type. */
template <class F, class T>
struct ConvertParams;
/* Function that discards the handler data parameter. */
template <class R, class P1, R F(P1)>
R IgnoreHandlerData2(void *p1, const void *hd) {
UPB_UNUSED(hd);
return F(static_cast<P1>(p1));
}
template <class R, class P1, class P2Wrapper, class P2Wrapped,
R F(P1, P2Wrapped)>
R IgnoreHandlerData3(void *p1, const void *hd, P2Wrapper p2) {
UPB_UNUSED(hd);
return F(static_cast<P1>(p1), p2);
}
template <class R, class P1, class P2, class P3, R F(P1, P2, P3)>
R IgnoreHandlerData4(void *p1, const void *hd, P2 p2, P3 p3) {
UPB_UNUSED(hd);
return F(static_cast<P1>(p1), p2, p3);
}
template <class R, class P1, class P2, class P3, class P4, R F(P1, P2, P3, P4)>
R IgnoreHandlerData5(void *p1, const void *hd, P2 p2, P3 p3, P4 p4) {
UPB_UNUSED(hd);
return F(static_cast<P1>(p1), p2, p3, p4);
}
template <class R, class P1, R F(P1, const char*, size_t)>
R IgnoreHandlerDataIgnoreHandle(void *p1, const void *hd, const char *p2,
size_t p3, const upb_bufhandle *handle) {
UPB_UNUSED(hd);
UPB_UNUSED(handle);
return F(static_cast<P1>(p1), p2, p3);
}
/* Function that casts the handler data parameter. */
template <class R, class P1, class P2, R F(P1, P2)>
R CastHandlerData2(void *c, const void *hd) {
return F(static_cast<P1>(c), static_cast<P2>(hd));
}
template <class R, class P1, class P2, class P3Wrapper, class P3Wrapped,
R F(P1, P2, P3Wrapped)>
R CastHandlerData3(void *c, const void *hd, P3Wrapper p3) {
return F(static_cast<P1>(c), static_cast<P2>(hd), p3);
}
template <class R, class P1, class P2, class P3, class P4, class P5,
R F(P1, P2, P3, P4, P5)>
R CastHandlerData5(void *c, const void *hd, P3 p3, P4 p4, P5 p5) {
return F(static_cast<P1>(c), static_cast<P2>(hd), p3, p4, p5);
}
template <class R, class P1, class P2, R F(P1, P2, const char *, size_t)>
R CastHandlerDataIgnoreHandle(void *c, const void *hd, const char *p3,
size_t p4, const upb_bufhandle *handle) {
UPB_UNUSED(handle);
return F(static_cast<P1>(c), static_cast<P2>(hd), p3, p4);
}
/* For unbound functions, ignore the handler data. */
template <class R, class P1, R F(P1), class I, class T>
struct ConvertParams<Func1<R, P1, F, I>, T> {
typedef Func2<R, void *, const void *, IgnoreHandlerData2<R, P1, F>, I> Func;
};
template <class R, class P1, class P2, R F(P1, P2), class I,
class R2, class P1_2, class P2_2, class P3_2>
struct ConvertParams<Func2<R, P1, P2, F, I>,
R2 (*)(P1_2, P2_2, P3_2)> {
typedef Func3<R, void *, const void *, P3_2,
IgnoreHandlerData3<R, P1, P3_2, P2, F>, I> Func;
};
/* For StringBuffer only; this ignores both the handler data and the
* upb_bufhandle. */
template <class R, class P1, R F(P1, const char *, size_t), class I, class T>
struct ConvertParams<Func3<R, P1, const char *, size_t, F, I>, T> {
typedef Func5<R, void *, const void *, const char *, size_t,
const upb_bufhandle *, IgnoreHandlerDataIgnoreHandle<R, P1, F>,
I> Func;
};
template <class R, class P1, class P2, class P3, class P4, R F(P1, P2, P3, P4),
class I, class T>
struct ConvertParams<Func4<R, P1, P2, P3, P4, F, I>, T> {
typedef Func5<R, void *, const void *, P2, P3, P4,
IgnoreHandlerData5<R, P1, P2, P3, P4, F>, I> Func;
};
/* For bound functions, cast the handler data. */
template <class R, class P1, class P2, R F(P1, P2), class I, class T>
struct ConvertParams<BoundFunc2<R, P1, P2, F, I>, T> {
typedef Func2<R, void *, const void *, CastHandlerData2<R, P1, P2, F>, I>
Func;
};
template <class R, class P1, class P2, class P3, R F(P1, P2, P3), class I,
class R2, class P1_2, class P2_2, class P3_2>
struct ConvertParams<BoundFunc3<R, P1, P2, P3, F, I>,
R2 (*)(P1_2, P2_2, P3_2)> {
typedef Func3<R, void *, const void *, P3_2,
CastHandlerData3<R, P1, P2, P3_2, P3, F>, I> Func;
};
/* For StringBuffer only; this ignores the upb_bufhandle. */
template <class R, class P1, class P2, R F(P1, P2, const char *, size_t),
class I, class T>
struct ConvertParams<BoundFunc4<R, P1, P2, const char *, size_t, F, I>, T> {
typedef Func5<R, void *, const void *, const char *, size_t,
const upb_bufhandle *,
CastHandlerDataIgnoreHandle<R, P1, P2, F>, I>
Func;
};
template <class R, class P1, class P2, class P3, class P4, class P5,
R F(P1, P2, P3, P4, P5), class I, class T>
struct ConvertParams<BoundFunc5<R, P1, P2, P3, P4, P5, F, I>, T> {
typedef Func5<R, void *, const void *, P3, P4, P5,
CastHandlerData5<R, P1, P2, P3, P4, P5, F>, I> Func;
};
/* utype/ltype are upper/lower-case, ctype is canonical C type, vtype is
* variant C type. */
#define TYPE_METHODS(utype, ltype, ctype, vtype) \
template <> \
struct CanonicalType<vtype> { \
typedef ctype Type; \
}; \
template <> \
inline bool HandlersPtr::SetValueHandler<vtype>( \
FieldDefPtr f, const HandlersPtr::utype##Handler &handler) { \
handler.AddCleanup(ptr()); \
return upb_handlers_set##ltype(ptr(), f.ptr(), handler.handler(), \
&handler.attr()); \
}
TYPE_METHODS(Double, double, double, double)
TYPE_METHODS(Float, float, float, float)
TYPE_METHODS(UInt64, uint64, uint64_t, UPB_UINT64_T)
TYPE_METHODS(UInt32, uint32, uint32_t, UPB_UINT32_T)
TYPE_METHODS(Int64, int64, int64_t, UPB_INT64_T)
TYPE_METHODS(Int32, int32, int32_t, UPB_INT32_T)
TYPE_METHODS(Bool, bool, bool, bool)
#ifdef UPB_TWO_32BIT_TYPES
TYPE_METHODS(Int32, int32, int32_t, UPB_INT32ALT_T)
TYPE_METHODS(UInt32, uint32, uint32_t, UPB_UINT32ALT_T)
#endif
#ifdef UPB_TWO_64BIT_TYPES
TYPE_METHODS(Int64, int64, int64_t, UPB_INT64ALT_T)
TYPE_METHODS(UInt64, uint64, uint64_t, UPB_UINT64ALT_T)
#endif
#undef TYPE_METHODS
template <> struct CanonicalType<Status*> {
typedef Status* Type;
};
template <class F> struct ReturnOf;
template <class R, class P1, class P2>
struct ReturnOf<R (*)(P1, P2)> {
typedef R Return;
};
template <class R, class P1, class P2, class P3>
struct ReturnOf<R (*)(P1, P2, P3)> {
typedef R Return;
};
template <class R, class P1, class P2, class P3, class P4>
struct ReturnOf<R (*)(P1, P2, P3, P4)> {
typedef R Return;
};
template <class R, class P1, class P2, class P3, class P4, class P5>
struct ReturnOf<R (*)(P1, P2, P3, P4, P5)> {
typedef R Return;
};
template <class T>
template <class F>
inline Handler<T>::Handler(F func)
: registered_(false),
cleanup_data_(func.GetData()),
cleanup_func_(func.GetCleanup()) {
attr_.handler_data = func.GetData();
typedef typename ReturnOf<T>::Return Return;
typedef typename ConvertParams<F, T>::Func ConvertedParamsFunc;
typedef typename MaybeWrapReturn<ConvertedParamsFunc, Return>::Func
ReturnWrappedFunc;
handler_ = ReturnWrappedFunc().Call;
/* Set attributes based on what templates can statically tell us about the
* user's function. */
/* If the original function returns void, then we know that we wrapped it to
* always return ok. */
bool always_ok = is_same<typename F::FuncInfo::Return, void>::value;
attr_.alwaysok = always_ok;
/* Closure parameter and return type. */
attr_.closure_type = UniquePtrForType<typename F::FuncInfo::Closure>();
/* We use the closure type (from the first parameter) if the return type is
* void or bool, since these are the two cases we wrap to return the closure's
* type anyway.
*
* This is all nonsense for non START* handlers, but it doesn't matter because
* in that case the value will be ignored. */
typedef typename FirstUnlessVoidOrBool<typename F::FuncInfo::Return,
typename F::FuncInfo::Closure>::value
EffectiveReturn;
attr_.return_closure_type = UniquePtrForType<EffectiveReturn>();
}
template <class T>
inline void Handler<T>::AddCleanup(upb_handlers* h) const {
UPB_ASSERT(!registered_);
registered_ = true;
if (cleanup_func_) {
bool ok = upb_handlers_addcleanup(h, cleanup_data_, cleanup_func_);
UPB_ASSERT(ok);
}
}
} /* namespace upb */
#endif /* __cplusplus */
#undef UPB_TWO_32BIT_TYPES
#undef UPB_TWO_64BIT_TYPES
#undef UPB_INT32_T
#undef UPB_UINT32_T
#undef UPB_INT32ALT_T
#undef UPB_UINT32ALT_T
#undef UPB_INT64_T
#undef UPB_UINT64_T
#undef UPB_INT64ALT_T
#undef UPB_UINT64ALT_T
#include "upb/port_undef.inc"
#endif /* UPB_HANDLERS_INL_H_ */

@ -0,0 +1,567 @@
/*
** TODO(haberman): it's unclear whether a lot of the consistency checks should
** UPB_ASSERT() or return false.
*/
#include "upb/handlers.h"
#include <string.h>
#include "upb/sink.h"
#include "upb/port_def.inc"
struct upb_handlers {
upb_handlercache *cache;
const upb_msgdef *msg;
const upb_handlers **sub;
const void *top_closure_type;
upb_handlers_tabent table[1]; /* Dynamically-sized field handler array. */
};
static void *upb_calloc(upb_arena *arena, size_t size) {
void *mem = upb_malloc(upb_arena_alloc(arena), size);
if (mem) {
memset(mem, 0, size);
}
return mem;
}
/* Defined for the sole purpose of having a unique pointer value for
* UPB_NO_CLOSURE. */
char _upb_noclosure;
/* Given a selector for a STARTSUBMSG handler, resolves to a pointer to the
* subhandlers for this submessage field. */
#define SUBH(h, selector) (h->sub[selector])
/* The selector for a submessage field is the field index. */
#define SUBH_F(h, f) SUBH(h, upb_fielddef_index(f))
static int32_t trygetsel(upb_handlers *h, const upb_fielddef *f,
upb_handlertype_t type) {
upb_selector_t sel;
bool ok;
ok = upb_handlers_getselector(f, type, &sel);
UPB_ASSERT(upb_handlers_msgdef(h) == upb_fielddef_containingtype(f));
UPB_ASSERT(ok);
return sel;
}
static upb_selector_t handlers_getsel(upb_handlers *h, const upb_fielddef *f,
upb_handlertype_t type) {
int32_t sel = trygetsel(h, f, type);
UPB_ASSERT(sel >= 0);
return sel;
}
static const void **returntype(upb_handlers *h, const upb_fielddef *f,
upb_handlertype_t type) {
return &h->table[handlers_getsel(h, f, type)].attr.return_closure_type;
}
static bool doset(upb_handlers *h, int32_t sel, const upb_fielddef *f,
upb_handlertype_t type, upb_func *func,
const upb_handlerattr *attr) {
upb_handlerattr set_attr = UPB_HANDLERATTR_INIT;
const void *closure_type;
const void **context_closure_type;
UPB_ASSERT(!h->table[sel].func);
if (attr) {
set_attr = *attr;
}
/* Check that the given closure type matches the closure type that has been
* established for this context (if any). */
closure_type = set_attr.closure_type;
if (type == UPB_HANDLER_STRING) {
context_closure_type = returntype(h, f, UPB_HANDLER_STARTSTR);
} else if (f && upb_fielddef_isseq(f) &&
type != UPB_HANDLER_STARTSEQ &&
type != UPB_HANDLER_ENDSEQ) {
context_closure_type = returntype(h, f, UPB_HANDLER_STARTSEQ);
} else {
context_closure_type = &h->top_closure_type;
}
if (closure_type && *context_closure_type &&
closure_type != *context_closure_type) {
return false;
}
if (closure_type)
*context_closure_type = closure_type;
/* If this is a STARTSEQ or STARTSTR handler, check that the returned pointer
* matches any pre-existing expectations about what type is expected. */
if (type == UPB_HANDLER_STARTSEQ || type == UPB_HANDLER_STARTSTR) {
const void *return_type = set_attr.return_closure_type;
const void *table_return_type = h->table[sel].attr.return_closure_type;
if (return_type && table_return_type && return_type != table_return_type) {
return false;
}
if (table_return_type && !return_type) {
set_attr.return_closure_type = table_return_type;
}
}
h->table[sel].func = (upb_func*)func;
h->table[sel].attr = set_attr;
return true;
}
/* Returns the effective closure type for this handler (which will propagate
* from outer frames if this frame has no START* handler). Not implemented for
* UPB_HANDLER_STRING at the moment since this is not needed. Returns NULL is
* the effective closure type is unspecified (either no handler was registered
* to specify it or the handler that was registered did not specify the closure
* type). */
const void *effective_closure_type(upb_handlers *h, const upb_fielddef *f,
upb_handlertype_t type) {
const void *ret;
upb_selector_t sel;
UPB_ASSERT(type != UPB_HANDLER_STRING);
ret = h->top_closure_type;
if (upb_fielddef_isseq(f) &&
type != UPB_HANDLER_STARTSEQ &&
type != UPB_HANDLER_ENDSEQ &&
h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)].func) {
ret = h->table[sel].attr.return_closure_type;
}
if (type == UPB_HANDLER_STRING &&
h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSTR)].func) {
ret = h->table[sel].attr.return_closure_type;
}
/* The effective type of the submessage; not used yet.
* if (type == SUBMESSAGE &&
* h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)].func) {
* ret = h->table[sel].attr.return_closure_type;
* } */
return ret;
}
/* Checks whether the START* handler specified by f & type is missing even
* though it is required to convert the established type of an outer frame
* ("closure_type") into the established type of an inner frame (represented in
* the return closure type of this handler's attr. */
bool checkstart(upb_handlers *h, const upb_fielddef *f, upb_handlertype_t type,
upb_status *status) {
const void *closure_type;
const upb_handlerattr *attr;
const void *return_closure_type;
upb_selector_t sel = handlers_getsel(h, f, type);
if (h->table[sel].func) return true;
closure_type = effective_closure_type(h, f, type);
attr = &h->table[sel].attr;
return_closure_type = attr->return_closure_type;
if (closure_type && return_closure_type &&
closure_type != return_closure_type) {
return false;
}
return true;
}
static upb_handlers *upb_handlers_new(const upb_msgdef *md,
upb_handlercache *cache,
upb_arena *arena) {
int extra;
upb_handlers *h;
extra = sizeof(upb_handlers_tabent) * (upb_msgdef_selectorcount(md) - 1);
h = upb_calloc(arena, sizeof(*h) + extra);
if (!h) return NULL;
h->cache = cache;
h->msg = md;
if (upb_msgdef_submsgfieldcount(md) > 0) {
size_t bytes = upb_msgdef_submsgfieldcount(md) * sizeof(*h->sub);
h->sub = upb_calloc(arena, bytes);
if (!h->sub) return NULL;
} else {
h->sub = 0;
}
/* calloc() above initialized all handlers to NULL. */
return h;
}
/* Public interface ***********************************************************/
#define SETTER(name, handlerctype, handlertype) \
bool upb_handlers_set##name(upb_handlers *h, const upb_fielddef *f, \
handlerctype func, \
const upb_handlerattr *attr) { \
int32_t sel = trygetsel(h, f, handlertype); \
return doset(h, sel, f, handlertype, (upb_func *)func, attr); \
}
SETTER(int32, upb_int32_handlerfunc*, UPB_HANDLER_INT32)
SETTER(int64, upb_int64_handlerfunc*, UPB_HANDLER_INT64)
SETTER(uint32, upb_uint32_handlerfunc*, UPB_HANDLER_UINT32)
SETTER(uint64, upb_uint64_handlerfunc*, UPB_HANDLER_UINT64)
SETTER(float, upb_float_handlerfunc*, UPB_HANDLER_FLOAT)
SETTER(double, upb_double_handlerfunc*, UPB_HANDLER_DOUBLE)
SETTER(bool, upb_bool_handlerfunc*, UPB_HANDLER_BOOL)
SETTER(startstr, upb_startstr_handlerfunc*, UPB_HANDLER_STARTSTR)
SETTER(string, upb_string_handlerfunc*, UPB_HANDLER_STRING)
SETTER(endstr, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSTR)
SETTER(startseq, upb_startfield_handlerfunc*, UPB_HANDLER_STARTSEQ)
SETTER(startsubmsg, upb_startfield_handlerfunc*, UPB_HANDLER_STARTSUBMSG)
SETTER(endsubmsg, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSUBMSG)
SETTER(endseq, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSEQ)
#undef SETTER
bool upb_handlers_setunknown(upb_handlers *h, upb_unknown_handlerfunc *func,
const upb_handlerattr *attr) {
return doset(h, UPB_UNKNOWN_SELECTOR, NULL, UPB_HANDLER_INT32,
(upb_func *)func, attr);
}
bool upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handlerfunc *func,
const upb_handlerattr *attr) {
return doset(h, UPB_STARTMSG_SELECTOR, NULL, UPB_HANDLER_INT32,
(upb_func *)func, attr);
}
bool upb_handlers_setendmsg(upb_handlers *h, upb_endmsg_handlerfunc *func,
const upb_handlerattr *attr) {
return doset(h, UPB_ENDMSG_SELECTOR, NULL, UPB_HANDLER_INT32,
(upb_func *)func, attr);
}
bool upb_handlers_setsubhandlers(upb_handlers *h, const upb_fielddef *f,
const upb_handlers *sub) {
UPB_ASSERT(sub);
UPB_ASSERT(upb_fielddef_issubmsg(f));
if (SUBH_F(h, f)) return false; /* Can't reset. */
if (upb_handlers_msgdef(sub) != upb_fielddef_msgsubdef(f)) {
return false;
}
SUBH_F(h, f) = sub;
return true;
}
const upb_handlers *upb_handlers_getsubhandlers(const upb_handlers *h,
const upb_fielddef *f) {
UPB_ASSERT(upb_fielddef_issubmsg(f));
return SUBH_F(h, f);
}
upb_func *upb_handlers_gethandler(const upb_handlers *h, upb_selector_t s,
const void **handler_data) {
upb_func *ret = (upb_func *)h->table[s].func;
if (ret && handler_data) {
*handler_data = h->table[s].attr.handler_data;
}
return ret;
}
bool upb_handlers_getattr(const upb_handlers *h, upb_selector_t sel,
upb_handlerattr *attr) {
if (!upb_handlers_gethandler(h, sel, NULL))
return false;
*attr = h->table[sel].attr;
return true;
}
const upb_handlers *upb_handlers_getsubhandlers_sel(const upb_handlers *h,
upb_selector_t sel) {
/* STARTSUBMSG selector in sel is the field's selector base. */
return SUBH(h, sel - UPB_STATIC_SELECTOR_COUNT);
}
const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h) { return h->msg; }
bool upb_handlers_addcleanup(upb_handlers *h, void *p, upb_handlerfree *func) {
return upb_handlercache_addcleanup(h->cache, p, func);
}
upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f) {
switch (upb_fielddef_type(f)) {
case UPB_TYPE_INT32:
case UPB_TYPE_ENUM: return UPB_HANDLER_INT32;
case UPB_TYPE_INT64: return UPB_HANDLER_INT64;
case UPB_TYPE_UINT32: return UPB_HANDLER_UINT32;
case UPB_TYPE_UINT64: return UPB_HANDLER_UINT64;
case UPB_TYPE_FLOAT: return UPB_HANDLER_FLOAT;
case UPB_TYPE_DOUBLE: return UPB_HANDLER_DOUBLE;
case UPB_TYPE_BOOL: return UPB_HANDLER_BOOL;
default: UPB_ASSERT(false); return -1; /* Invalid input. */
}
}
bool upb_handlers_getselector(const upb_fielddef *f, upb_handlertype_t type,
upb_selector_t *s) {
uint32_t selector_base = upb_fielddef_selectorbase(f);
switch (type) {
case UPB_HANDLER_INT32:
case UPB_HANDLER_INT64:
case UPB_HANDLER_UINT32:
case UPB_HANDLER_UINT64:
case UPB_HANDLER_FLOAT:
case UPB_HANDLER_DOUBLE:
case UPB_HANDLER_BOOL:
if (!upb_fielddef_isprimitive(f) ||
upb_handlers_getprimitivehandlertype(f) != type)
return false;
*s = selector_base;
break;
case UPB_HANDLER_STRING:
if (upb_fielddef_isstring(f)) {
*s = selector_base;
} else if (upb_fielddef_lazy(f)) {
*s = selector_base + 3;
} else {
return false;
}
break;
case UPB_HANDLER_STARTSTR:
if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) {
*s = selector_base + 1;
} else {
return false;
}
break;
case UPB_HANDLER_ENDSTR:
if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) {
*s = selector_base + 2;
} else {
return false;
}
break;
case UPB_HANDLER_STARTSEQ:
if (!upb_fielddef_isseq(f)) return false;
*s = selector_base - 2;
break;
case UPB_HANDLER_ENDSEQ:
if (!upb_fielddef_isseq(f)) return false;
*s = selector_base - 1;
break;
case UPB_HANDLER_STARTSUBMSG:
if (!upb_fielddef_issubmsg(f)) return false;
/* Selectors for STARTSUBMSG are at the beginning of the table so that the
* selector can also be used as an index into the "sub" array of
* subhandlers. The indexes for the two into these two tables are the
* same, except that in the handler table the static selectors come first. */
*s = upb_fielddef_index(f) + UPB_STATIC_SELECTOR_COUNT;
break;
case UPB_HANDLER_ENDSUBMSG:
if (!upb_fielddef_issubmsg(f)) return false;
*s = selector_base;
break;
}
UPB_ASSERT((size_t)*s < upb_msgdef_selectorcount(upb_fielddef_containingtype(f)));
return true;
}
/* upb_handlercache ***********************************************************/
struct upb_handlercache {
upb_arena *arena;
upb_inttable tab; /* maps upb_msgdef* -> upb_handlers*. */
upb_handlers_callback *callback;
const void *closure;
};
const upb_handlers *upb_handlercache_get(upb_handlercache *c,
const upb_msgdef *md) {
upb_msg_field_iter i;
upb_value v;
upb_handlers *h;
if (upb_inttable_lookupptr(&c->tab, md, &v)) {
return upb_value_getptr(v);
}
h = upb_handlers_new(md, c, c->arena);
v = upb_value_ptr(h);
if (!h) return NULL;
if (!upb_inttable_insertptr(&c->tab, md, v)) return NULL;
c->callback(c->closure, h);
/* For each submessage field, get or create a handlers object and set it as
* the subhandlers. */
for(upb_msg_field_begin(&i, md);
!upb_msg_field_done(&i);
upb_msg_field_next(&i)) {
upb_fielddef *f = upb_msg_iter_field(&i);
if (upb_fielddef_issubmsg(f)) {
const upb_msgdef *subdef = upb_fielddef_msgsubdef(f);
const upb_handlers *sub_mh = upb_handlercache_get(c, subdef);
if (!sub_mh) return NULL;
upb_handlers_setsubhandlers(h, f, sub_mh);
}
}
return h;
}
upb_handlercache *upb_handlercache_new(upb_handlers_callback *callback,
const void *closure) {
upb_handlercache *cache = upb_gmalloc(sizeof(*cache));
if (!cache) return NULL;
cache->arena = upb_arena_new();
cache->callback = callback;
cache->closure = closure;
if (!upb_inttable_init(&cache->tab, UPB_CTYPE_PTR)) goto oom;
return cache;
oom:
upb_gfree(cache);
return NULL;
}
void upb_handlercache_free(upb_handlercache *cache) {
upb_inttable_uninit(&cache->tab);
upb_arena_free(cache->arena);
upb_gfree(cache);
}
bool upb_handlercache_addcleanup(upb_handlercache *c, void *p,
upb_handlerfree *func) {
return upb_arena_addcleanup(c->arena, p, func);
}
/* upb_byteshandler ***********************************************************/
bool upb_byteshandler_setstartstr(upb_byteshandler *h,
upb_startstr_handlerfunc *func, void *d) {
h->table[UPB_STARTSTR_SELECTOR].func = (upb_func*)func;
h->table[UPB_STARTSTR_SELECTOR].attr.handler_data = d;
return true;
}
bool upb_byteshandler_setstring(upb_byteshandler *h,
upb_string_handlerfunc *func, void *d) {
h->table[UPB_STRING_SELECTOR].func = (upb_func*)func;
h->table[UPB_STRING_SELECTOR].attr.handler_data = d;
return true;
}
bool upb_byteshandler_setendstr(upb_byteshandler *h,
upb_endfield_handlerfunc *func, void *d) {
h->table[UPB_ENDSTR_SELECTOR].func = (upb_func*)func;
h->table[UPB_ENDSTR_SELECTOR].attr.handler_data = d;
return true;
}
/** Handlers for upb_msg ******************************************************/
typedef struct {
size_t offset;
int32_t hasbit;
} upb_msg_handlerdata;
/* Fallback implementation if the handler is not specialized by the producer. */
#define MSG_WRITER(type, ctype) \
bool upb_msg_set ## type (void *c, const void *hd, ctype val) { \
uint8_t *m = c; \
const upb_msg_handlerdata *d = hd; \
if (d->hasbit > 0) \
*(uint8_t*)&m[d->hasbit / 8] |= 1 << (d->hasbit % 8); \
*(ctype*)&m[d->offset] = val; \
return true; \
} \
MSG_WRITER(double, double)
MSG_WRITER(float, float)
MSG_WRITER(int32, int32_t)
MSG_WRITER(int64, int64_t)
MSG_WRITER(uint32, uint32_t)
MSG_WRITER(uint64, uint64_t)
MSG_WRITER(bool, bool)
bool upb_msg_setscalarhandler(upb_handlers *h, const upb_fielddef *f,
size_t offset, int32_t hasbit) {
upb_handlerattr attr = UPB_HANDLERATTR_INIT;
bool ok;
upb_msg_handlerdata *d = upb_gmalloc(sizeof(*d));
if (!d) return false;
d->offset = offset;
d->hasbit = hasbit;
attr.handler_data = d;
attr.alwaysok = true;
upb_handlers_addcleanup(h, d, upb_gfree);
#define TYPE(u, l) \
case UPB_TYPE_##u: \
ok = upb_handlers_set##l(h, f, upb_msg_set##l, &attr); break;
ok = false;
switch (upb_fielddef_type(f)) {
TYPE(INT64, int64);
TYPE(INT32, int32);
TYPE(ENUM, int32);
TYPE(UINT64, uint64);
TYPE(UINT32, uint32);
TYPE(DOUBLE, double);
TYPE(FLOAT, float);
TYPE(BOOL, bool);
default: UPB_ASSERT(false); break;
}
#undef TYPE
return ok;
}
bool upb_msg_getscalarhandlerdata(const upb_handlers *h,
upb_selector_t s,
upb_fieldtype_t *type,
size_t *offset,
int32_t *hasbit) {
const upb_msg_handlerdata *d;
const void *p;
upb_func *f = upb_handlers_gethandler(h, s, &p);
if ((upb_int64_handlerfunc*)f == upb_msg_setint64) {
*type = UPB_TYPE_INT64;
} else if ((upb_int32_handlerfunc*)f == upb_msg_setint32) {
*type = UPB_TYPE_INT32;
} else if ((upb_uint64_handlerfunc*)f == upb_msg_setuint64) {
*type = UPB_TYPE_UINT64;
} else if ((upb_uint32_handlerfunc*)f == upb_msg_setuint32) {
*type = UPB_TYPE_UINT32;
} else if ((upb_double_handlerfunc*)f == upb_msg_setdouble) {
*type = UPB_TYPE_DOUBLE;
} else if ((upb_float_handlerfunc*)f == upb_msg_setfloat) {
*type = UPB_TYPE_FLOAT;
} else if ((upb_bool_handlerfunc*)f == upb_msg_setbool) {
*type = UPB_TYPE_BOOL;
} else {
return false;
}
d = p;
*offset = d->offset;
*hasbit = d->hasbit;
return true;
}

@ -0,0 +1,732 @@
/*
** upb::Handlers (upb_handlers)
**
** A upb_handlers is like a virtual table for a upb_msgdef. Each field of the
** message can have associated functions that will be called when we are
** parsing or visiting a stream of data. This is similar to how handlers work
** in SAX (the Simple API for XML).
**
** The handlers have no idea where the data is coming from, so a single set of
** handlers could be used with two completely different data sources (for
** example, a parser and a visitor over in-memory objects). This decoupling is
** the most important feature of upb, because it allows parsers and serializers
** to be highly reusable.
**
** This is a mixed C/C++ interface that offers a full API to both languages.
** See the top-level README for more information.
*/
#ifndef UPB_HANDLERS_H
#define UPB_HANDLERS_H
#include "upb/def.h"
#include "upb/table.int.h"
#include "upb/port_def.inc"
#ifdef __cplusplus
namespace upb {
class HandlersPtr;
class HandlerCache;
template <class T> class Handler;
template <class T> struct CanonicalType;
} /* namespace upb */
#endif
/* The maximum depth that the handler graph can have. This is a resource limit
* for the C stack since we sometimes need to recursively traverse the graph.
* Cycles are ok; the traversal will stop when it detects a cycle, but we must
* hit the cycle before the maximum depth is reached.
*
* If having a single static limit is too inflexible, we can add another variant
* of Handlers::Freeze that allows specifying this as a parameter. */
#define UPB_MAX_HANDLER_DEPTH 64
/* All the different types of handlers that can be registered.
* Only needed for the advanced functions in upb::Handlers. */
typedef enum {
UPB_HANDLER_INT32,
UPB_HANDLER_INT64,
UPB_HANDLER_UINT32,
UPB_HANDLER_UINT64,
UPB_HANDLER_FLOAT,
UPB_HANDLER_DOUBLE,
UPB_HANDLER_BOOL,
UPB_HANDLER_STARTSTR,
UPB_HANDLER_STRING,
UPB_HANDLER_ENDSTR,
UPB_HANDLER_STARTSUBMSG,
UPB_HANDLER_ENDSUBMSG,
UPB_HANDLER_STARTSEQ,
UPB_HANDLER_ENDSEQ
} upb_handlertype_t;
#define UPB_HANDLER_MAX (UPB_HANDLER_ENDSEQ+1)
#define UPB_BREAK NULL
/* A convenient definition for when no closure is needed. */
extern char _upb_noclosure;
#define UPB_NO_CLOSURE &_upb_noclosure
/* A selector refers to a specific field handler in the Handlers object
* (for example: the STARTSUBMSG handler for field "field15"). */
typedef int32_t upb_selector_t;
/* Static selectors for upb::Handlers. */
#define UPB_STARTMSG_SELECTOR 0
#define UPB_ENDMSG_SELECTOR 1
#define UPB_UNKNOWN_SELECTOR 2
#define UPB_STATIC_SELECTOR_COUNT 3 /* Warning: also in upb/def.c. */
/* Static selectors for upb::BytesHandler. */
#define UPB_STARTSTR_SELECTOR 0
#define UPB_STRING_SELECTOR 1
#define UPB_ENDSTR_SELECTOR 2
#ifdef __cplusplus
template<class T> const void *UniquePtrForType() {
static const char ch = 0;
return &ch;
}
#endif
/* upb_handlers ************************************************************/
/* Handler attributes, to be registered with the handler itself. */
typedef struct {
const void *handler_data;
const void *closure_type;
const void *return_closure_type;
bool alwaysok;
} upb_handlerattr;
#define UPB_HANDLERATTR_INIT {NULL, NULL, NULL, false}
/* Bufhandle, data passed along with a buffer to indicate its provenance. */
typedef struct {
/* The beginning of the buffer. This may be different than the pointer
* passed to a StringBuf handler because the handler may receive data
* that is from the middle or end of a larger buffer. */
const char *buf;
/* The offset within the attached object where this buffer begins. Only
* meaningful if there is an attached object. */
size_t objofs;
/* The attached object (if any) and a pointer representing its type. */
const void *obj;
const void *objtype;
#ifdef __cplusplus
template <class T>
void SetAttachedObject(const T* _obj) {
obj = _obj;
objtype = UniquePtrForType<T>();
}
template <class T>
const T *GetAttachedObject() const {
return objtype == UniquePtrForType<T>() ? static_cast<const T *>(obj)
: NULL;
}
#endif
} upb_bufhandle;
#define UPB_BUFHANDLE_INIT {NULL, 0, NULL, NULL}
/* Handler function typedefs. */
typedef void upb_handlerfree(void *d);
typedef bool upb_unknown_handlerfunc(void *c, const void *hd, const char *buf,
size_t n);
typedef bool upb_startmsg_handlerfunc(void *c, const void*);
typedef bool upb_endmsg_handlerfunc(void *c, const void *, upb_status *status);
typedef void* upb_startfield_handlerfunc(void *c, const void *hd);
typedef bool upb_endfield_handlerfunc(void *c, const void *hd);
typedef bool upb_int32_handlerfunc(void *c, const void *hd, int32_t val);
typedef bool upb_int64_handlerfunc(void *c, const void *hd, int64_t val);
typedef bool upb_uint32_handlerfunc(void *c, const void *hd, uint32_t val);
typedef bool upb_uint64_handlerfunc(void *c, const void *hd, uint64_t val);
typedef bool upb_float_handlerfunc(void *c, const void *hd, float val);
typedef bool upb_double_handlerfunc(void *c, const void *hd, double val);
typedef bool upb_bool_handlerfunc(void *c, const void *hd, bool val);
typedef void *upb_startstr_handlerfunc(void *c, const void *hd,
size_t size_hint);
typedef size_t upb_string_handlerfunc(void *c, const void *hd, const char *buf,
size_t n, const upb_bufhandle* handle);
struct upb_handlers;
typedef struct upb_handlers upb_handlers;
#ifdef __cplusplus
extern "C" {
#endif
/* Mutating accessors. */
const upb_status *upb_handlers_status(upb_handlers *h);
void upb_handlers_clearerr(upb_handlers *h);
const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h);
bool upb_handlers_addcleanup(upb_handlers *h, void *p, upb_handlerfree *hfree);
bool upb_handlers_setunknown(upb_handlers *h, upb_unknown_handlerfunc *func,
const upb_handlerattr *attr);
bool upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handlerfunc *func,
const upb_handlerattr *attr);
bool upb_handlers_setendmsg(upb_handlers *h, upb_endmsg_handlerfunc *func,
const upb_handlerattr *attr);
bool upb_handlers_setint32(upb_handlers *h, const upb_fielddef *f,
upb_int32_handlerfunc *func,
const upb_handlerattr *attr);
bool upb_handlers_setint64(upb_handlers *h, const upb_fielddef *f,
upb_int64_handlerfunc *func,
const upb_handlerattr *attr);
bool upb_handlers_setuint32(upb_handlers *h, const upb_fielddef *f,
upb_uint32_handlerfunc *func,
const upb_handlerattr *attr);
bool upb_handlers_setuint64(upb_handlers *h, const upb_fielddef *f,
upb_uint64_handlerfunc *func,
const upb_handlerattr *attr);
bool upb_handlers_setfloat(upb_handlers *h, const upb_fielddef *f,
upb_float_handlerfunc *func,
const upb_handlerattr *attr);
bool upb_handlers_setdouble(upb_handlers *h, const upb_fielddef *f,
upb_double_handlerfunc *func,
const upb_handlerattr *attr);
bool upb_handlers_setbool(upb_handlers *h, const upb_fielddef *f,
upb_bool_handlerfunc *func,
const upb_handlerattr *attr);
bool upb_handlers_setstartstr(upb_handlers *h, const upb_fielddef *f,
upb_startstr_handlerfunc *func,
const upb_handlerattr *attr);
bool upb_handlers_setstring(upb_handlers *h, const upb_fielddef *f,
upb_string_handlerfunc *func,
const upb_handlerattr *attr);
bool upb_handlers_setendstr(upb_handlers *h, const upb_fielddef *f,
upb_endfield_handlerfunc *func,
const upb_handlerattr *attr);
bool upb_handlers_setstartseq(upb_handlers *h, const upb_fielddef *f,
upb_startfield_handlerfunc *func,
const upb_handlerattr *attr);
bool upb_handlers_setstartsubmsg(upb_handlers *h, const upb_fielddef *f,
upb_startfield_handlerfunc *func,
const upb_handlerattr *attr);
bool upb_handlers_setendsubmsg(upb_handlers *h, const upb_fielddef *f,
upb_endfield_handlerfunc *func,
const upb_handlerattr *attr);
bool upb_handlers_setendseq(upb_handlers *h, const upb_fielddef *f,
upb_endfield_handlerfunc *func,
const upb_handlerattr *attr);
/* Read-only accessors. */
const upb_handlers *upb_handlers_getsubhandlers(const upb_handlers *h,
const upb_fielddef *f);
const upb_handlers *upb_handlers_getsubhandlers_sel(const upb_handlers *h,
upb_selector_t sel);
upb_func *upb_handlers_gethandler(const upb_handlers *h, upb_selector_t s,
const void **handler_data);
bool upb_handlers_getattr(const upb_handlers *h, upb_selector_t s,
upb_handlerattr *attr);
/* "Static" methods */
upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f);
bool upb_handlers_getselector(const upb_fielddef *f, upb_handlertype_t type,
upb_selector_t *s);
UPB_INLINE upb_selector_t upb_handlers_getendselector(upb_selector_t start) {
return start + 1;
}
#ifdef __cplusplus
} /* extern "C" */
namespace upb {
typedef upb_handlers Handlers;
}
/* Convenience macros for creating a Handler object that is wrapped with a
* type-safe wrapper function that converts the "void*" parameters/returns
* of the underlying C API into nice C++ function.
*
* Sample usage:
* void OnValue1(MyClosure* c, const MyHandlerData* d, int32_t val) {
* // do stuff ...
* }
*
* // Handler that doesn't need any data bound to it.
* void OnValue2(MyClosure* c, int32_t val) {
* // do stuff ...
* }
*
* // Handler that returns bool so it can return failure if necessary.
* bool OnValue3(MyClosure* c, int32_t val) {
* // do stuff ...
* return ok;
* }
*
* // Member function handler.
* class MyClosure {
* public:
* void OnValue(int32_t val) {
* // do stuff ...
* }
* };
*
* // Takes ownership of the MyHandlerData.
* handlers->SetInt32Handler(f1, UpbBind(OnValue1, new MyHandlerData(...)));
* handlers->SetInt32Handler(f2, UpbMakeHandler(OnValue2));
* handlers->SetInt32Handler(f1, UpbMakeHandler(OnValue3));
* handlers->SetInt32Handler(f2, UpbMakeHandler(&MyClosure::OnValue));
*/
/* In C++11, the "template" disambiguator can appear even outside templates,
* so all calls can safely use this pair of macros. */
#define UpbMakeHandler(f) upb::MatchFunc(f).template GetFunc<f>()
/* We have to be careful to only evaluate "d" once. */
#define UpbBind(f, d) upb::MatchFunc(f).template GetFunc<f>((d))
/* Handler: a struct that contains the (handler, data, deleter) tuple that is
* used to register all handlers. Users can Make() these directly but it's
* more convenient to use the UpbMakeHandler/UpbBind macros above. */
template <class T> class upb::Handler {
public:
/* The underlying, handler function signature that upb uses internally. */
typedef T FuncPtr;
/* Intentionally implicit. */
template <class F> Handler(F func);
~Handler() { UPB_ASSERT(registered_); }
void AddCleanup(upb_handlers* h) const;
FuncPtr handler() const { return handler_; }
const upb_handlerattr& attr() const { return attr_; }
private:
Handler(const Handler&) = delete;
Handler& operator=(const Handler&) = delete;
FuncPtr handler_;
mutable upb_handlerattr attr_;
mutable bool registered_;
void *cleanup_data_;
upb_handlerfree *cleanup_func_;
};
/* A upb::Handlers object represents the set of handlers associated with a
* message in the graph of messages. You can think of it as a big virtual
* table with functions corresponding to all the events that can fire while
* parsing or visiting a message of a specific type.
*
* Any handlers that are not set behave as if they had successfully consumed
* the value. Any unset Start* handlers will propagate their closure to the
* inner frame.
*
* The easiest way to create the *Handler objects needed by the Set* methods is
* with the UpbBind() and UpbMakeHandler() macros; see below. */
class upb::HandlersPtr {
public:
HandlersPtr(upb_handlers* ptr) : ptr_(ptr) {}
upb_handlers* ptr() const { return ptr_; }
typedef upb_selector_t Selector;
typedef upb_handlertype_t Type;
typedef Handler<void *(*)(void *, const void *)> StartFieldHandler;
typedef Handler<bool (*)(void *, const void *)> EndFieldHandler;
typedef Handler<bool (*)(void *, const void *)> StartMessageHandler;
typedef Handler<bool (*)(void *, const void *, upb_status *)>
EndMessageHandler;
typedef Handler<void *(*)(void *, const void *, size_t)> StartStringHandler;
typedef Handler<size_t (*)(void *, const void *, const char *, size_t,
const upb_bufhandle *)>
StringHandler;
template <class T> struct ValueHandler {
typedef Handler<bool(*)(void *, const void *, T)> H;
};
typedef ValueHandler<int32_t>::H Int32Handler;
typedef ValueHandler<int64_t>::H Int64Handler;
typedef ValueHandler<uint32_t>::H UInt32Handler;
typedef ValueHandler<uint64_t>::H UInt64Handler;
typedef ValueHandler<float>::H FloatHandler;
typedef ValueHandler<double>::H DoubleHandler;
typedef ValueHandler<bool>::H BoolHandler;
/* Any function pointer can be converted to this and converted back to its
* correct type. */
typedef void GenericFunction();
typedef void HandlersCallback(const void *closure, upb_handlers *h);
/* Returns the msgdef associated with this handlers object. */
MessageDefPtr message_def() const {
return MessageDefPtr(upb_handlers_msgdef(ptr()));
}
/* Adds the given pointer and function to the list of cleanup functions that
* will be run when these handlers are freed. If this pointer has previously
* been registered, the function returns false and does nothing. */
bool AddCleanup(void *ptr, upb_handlerfree *cleanup) {
return upb_handlers_addcleanup(ptr_, ptr, cleanup);
}
/* Sets the startmsg handler for the message, which is defined as follows:
*
* bool startmsg(MyType* closure) {
* // Called when the message begins. Returns true if processing should
* // continue.
* return true;
* }
*/
bool SetStartMessageHandler(const StartMessageHandler &h) {
h.AddCleanup(ptr());
return upb_handlers_setstartmsg(ptr(), h.handler(), &h.attr());
}
/* Sets the endmsg handler for the message, which is defined as follows:
*
* bool endmsg(MyType* closure, upb_status *status) {
* // Called when processing of this message ends, whether in success or
* // failure. "status" indicates the final status of processing, and
* // can also be modified in-place to update the final status.
* }
*/
bool SetEndMessageHandler(const EndMessageHandler& h) {
h.AddCleanup(ptr());
return upb_handlers_setendmsg(ptr(), h.handler(), &h.attr());
}
/* Sets the value handler for the given field, which is defined as follows
* (this is for an int32 field; other field types will pass their native
* C/C++ type for "val"):
*
* bool OnValue(MyClosure* c, const MyHandlerData* d, int32_t val) {
* // Called when the field's value is encountered. "d" contains
* // whatever data was bound to this field when it was registered.
* // Returns true if processing should continue.
* return true;
* }
*
* handers->SetInt32Handler(f, UpbBind(OnValue, new MyHandlerData(...)));
*
* The value type must exactly match f->type().
* For example, a handler that takes an int32_t parameter may only be used for
* fields of type UPB_TYPE_INT32 and UPB_TYPE_ENUM.
*
* Returns false if the handler failed to register; in this case the cleanup
* handler (if any) will be called immediately.
*/
bool SetInt32Handler(FieldDefPtr f, const Int32Handler &h) {
h.AddCleanup(ptr());
return upb_handlers_setint32(ptr(), f.ptr(), h.handler(), &h.attr());
}
bool SetInt64Handler (FieldDefPtr f, const Int64Handler& h) {
h.AddCleanup(ptr());
return upb_handlers_setint64(ptr(), f.ptr(), h.handler(), &h.attr());
}
bool SetUInt32Handler(FieldDefPtr f, const UInt32Handler& h) {
h.AddCleanup(ptr());
return upb_handlers_setuint32(ptr(), f.ptr(), h.handler(), &h.attr());
}
bool SetUInt64Handler(FieldDefPtr f, const UInt64Handler& h) {
h.AddCleanup(ptr());
return upb_handlers_setuint64(ptr(), f.ptr(), h.handler(), &h.attr());
}
bool SetFloatHandler (FieldDefPtr f, const FloatHandler& h) {
h.AddCleanup(ptr());
return upb_handlers_setfloat(ptr(), f.ptr(), h.handler(), &h.attr());
}
bool SetDoubleHandler(FieldDefPtr f, const DoubleHandler& h) {
h.AddCleanup(ptr());
return upb_handlers_setdouble(ptr(), f.ptr(), h.handler(), &h.attr());
}
bool SetBoolHandler(FieldDefPtr f, const BoolHandler &h) {
h.AddCleanup(ptr());
return upb_handlers_setbool(ptr(), f.ptr(), h.handler(), &h.attr());
}
/* Like the previous, but templated on the type on the value (ie. int32).
* This is mostly useful to call from other templates. To call this you must
* specify the template parameter explicitly, ie:
* h->SetValueHandler<T>(f, UpbBind(MyHandler<T>, MyData)); */
template <class T>
bool SetValueHandler(
FieldDefPtr f,
const typename ValueHandler<typename CanonicalType<T>::Type>::H &handler);
/* Sets handlers for a string field, which are defined as follows:
*
* MySubClosure* startstr(MyClosure* c, const MyHandlerData* d,
* size_t size_hint) {
* // Called when a string value begins. The return value indicates the
* // closure for the string. "size_hint" indicates the size of the
* // string if it is known, however if the string is length-delimited
* // and the end-of-string is not available size_hint will be zero.
* // This case is indistinguishable from the case where the size is
* // known to be zero.
* //
* // TODO(haberman): is it important to distinguish these cases?
* // If we had ssize_t as a type we could make -1 "unknown", but
* // ssize_t is POSIX (not ANSI) and therefore less portable.
* // In practice I suspect it won't be important to distinguish.
* return closure;
* }
*
* size_t str(MyClosure* closure, const MyHandlerData* d,
* const char *str, size_t len) {
* // Called for each buffer of string data; the multiple physical buffers
* // are all part of the same logical string. The return value indicates
* // how many bytes were consumed. If this number is less than "len",
* // this will also indicate that processing should be halted for now,
* // like returning false or UPB_BREAK from any other callback. If
* // number is greater than "len", the excess bytes will be skipped over
* // and not passed to the callback.
* return len;
* }
*
* bool endstr(MyClosure* c, const MyHandlerData* d) {
* // Called when a string value ends. Return value indicates whether
* // processing should continue.
* return true;
* }
*/
bool SetStartStringHandler(FieldDefPtr f, const StartStringHandler &h) {
h.AddCleanup(ptr());
return upb_handlers_setstartstr(ptr(), f.ptr(), h.handler(), &h.attr());
}
bool SetStringHandler(FieldDefPtr f, const StringHandler& h) {
h.AddCleanup(ptr());
return upb_handlers_setstring(ptr(), f.ptr(), h.handler(), &h.attr());
}
bool SetEndStringHandler(FieldDefPtr f, const EndFieldHandler& h) {
h.AddCleanup(ptr());
return upb_handlers_setendstr(ptr(), f.ptr(), h.handler(), &h.attr());
}
/* Sets the startseq handler, which is defined as follows:
*
* MySubClosure *startseq(MyClosure* c, const MyHandlerData* d) {
* // Called when a sequence (repeated field) begins. The returned
* // pointer indicates the closure for the sequence (or UPB_BREAK
* // to interrupt processing).
* return closure;
* }
*
* h->SetStartSequenceHandler(f, UpbBind(startseq, new MyHandlerData(...)));
*
* Returns "false" if "f" does not belong to this message or is not a
* repeated field.
*/
bool SetStartSequenceHandler(FieldDefPtr f, const StartFieldHandler &h) {
h.AddCleanup(ptr());
return upb_handlers_setstartseq(ptr(), f.ptr(), h.handler(), &h.attr());
}
/* Sets the startsubmsg handler for the given field, which is defined as
* follows:
*
* MySubClosure* startsubmsg(MyClosure* c, const MyHandlerData* d) {
* // Called when a submessage begins. The returned pointer indicates the
* // closure for the sequence (or UPB_BREAK to interrupt processing).
* return closure;
* }
*
* h->SetStartSubMessageHandler(f, UpbBind(startsubmsg,
* new MyHandlerData(...)));
*
* Returns "false" if "f" does not belong to this message or is not a
* submessage/group field.
*/
bool SetStartSubMessageHandler(FieldDefPtr f, const StartFieldHandler& h) {
h.AddCleanup(ptr());
return upb_handlers_setstartsubmsg(ptr(), f.ptr(), h.handler(), &h.attr());
}
/* Sets the endsubmsg handler for the given field, which is defined as
* follows:
*
* bool endsubmsg(MyClosure* c, const MyHandlerData* d) {
* // Called when a submessage ends. Returns true to continue processing.
* return true;
* }
*
* Returns "false" if "f" does not belong to this message or is not a
* submessage/group field.
*/
bool SetEndSubMessageHandler(FieldDefPtr f, const EndFieldHandler &h) {
h.AddCleanup(ptr());
return upb_handlers_setendsubmsg(ptr(), f.ptr(), h.handler(), &h.attr());
}
/* Starts the endsubseq handler for the given field, which is defined as
* follows:
*
* bool endseq(MyClosure* c, const MyHandlerData* d) {
* // Called when a sequence ends. Returns true continue processing.
* return true;
* }
*
* Returns "false" if "f" does not belong to this message or is not a
* repeated field.
*/
bool SetEndSequenceHandler(FieldDefPtr f, const EndFieldHandler &h) {
h.AddCleanup(ptr());
return upb_handlers_setendseq(ptr(), f.ptr(), h.handler(), &h.attr());
}
private:
upb_handlers* ptr_;
};
#endif /* __cplusplus */
/* upb_handlercache ***********************************************************/
/* A upb_handlercache lazily builds and caches upb_handlers. You pass it a
* function (with optional closure) that can build handlers for a given
* message on-demand, and the cache maintains a map of msgdef->handlers. */
#ifdef __cplusplus
extern "C" {
#endif
struct upb_handlercache;
typedef struct upb_handlercache upb_handlercache;
typedef void upb_handlers_callback(const void *closure, upb_handlers *h);
upb_handlercache *upb_handlercache_new(upb_handlers_callback *callback,
const void *closure);
void upb_handlercache_free(upb_handlercache *cache);
const upb_handlers *upb_handlercache_get(upb_handlercache *cache,
const upb_msgdef *md);
bool upb_handlercache_addcleanup(upb_handlercache *h, void *p,
upb_handlerfree *hfree);
#ifdef __cplusplus
} /* extern "C" */
class upb::HandlerCache {
public:
HandlerCache(upb_handlers_callback *callback, const void *closure)
: ptr_(upb_handlercache_new(callback, closure), upb_handlercache_free) {}
HandlerCache(HandlerCache&&) = default;
HandlerCache& operator=(HandlerCache&&) = default;
HandlerCache(upb_handlercache* c) : ptr_(c, upb_handlercache_free) {}
upb_handlercache* ptr() { return ptr_.get(); }
const upb_handlers *Get(MessageDefPtr md) {
return upb_handlercache_get(ptr_.get(), md.ptr());
}
private:
std::unique_ptr<upb_handlercache, decltype(&upb_handlercache_free)> ptr_;
};
#endif /* __cplusplus */
/* upb_byteshandler ***********************************************************/
typedef struct {
upb_func *func;
/* It is wasteful to include the entire attributes here:
*
* * Some of the information is redundant (like storing the closure type
* separately for each handler that must match).
* * Some of the info is only needed prior to freeze() (like closure types).
* * alignment padding wastes a lot of space for alwaysok_.
*
* If/when the size and locality of handlers is an issue, we can optimize this
* not to store the entire attr like this. We do not expose the table's
* layout to allow this optimization in the future. */
upb_handlerattr attr;
} upb_handlers_tabent;
#define UPB_TABENT_INIT {NULL, UPB_HANDLERATTR_INIT}
typedef struct {
upb_handlers_tabent table[3];
} upb_byteshandler;
#define UPB_BYTESHANDLER_INIT \
{ \
{ UPB_TABENT_INIT, UPB_TABENT_INIT, UPB_TABENT_INIT } \
}
UPB_INLINE void upb_byteshandler_init(upb_byteshandler *handler) {
upb_byteshandler init = UPB_BYTESHANDLER_INIT;
*handler = init;
}
#ifdef __cplusplus
extern "C" {
#endif
/* Caller must ensure that "d" outlives the handlers. */
bool upb_byteshandler_setstartstr(upb_byteshandler *h,
upb_startstr_handlerfunc *func, void *d);
bool upb_byteshandler_setstring(upb_byteshandler *h,
upb_string_handlerfunc *func, void *d);
bool upb_byteshandler_setendstr(upb_byteshandler *h,
upb_endfield_handlerfunc *func, void *d);
#ifdef __cplusplus
} /* extern "C" */
namespace upb {
typedef upb_byteshandler BytesHandler;
}
#endif
/** Message handlers ******************************************************************/
#ifdef __cplusplus
extern "C" {
#endif
/* These are the handlers used internally by upb_msgfactory_getmergehandlers().
* They write scalar data to a known offset from the message pointer.
*
* These would be trivial for anyone to implement themselves, but it's better
* to use these because some JITs will recognize and specialize these instead
* of actually calling the function. */
/* Sets a handler for the given primitive field that will write the data at the
* given offset. If hasbit > 0, also sets a hasbit at the given bit offset
* (addressing each byte low to high). */
bool upb_msg_setscalarhandler(upb_handlers *h,
const upb_fielddef *f,
size_t offset,
int32_t hasbit);
/* If the given handler is a msghandlers_primitive field, returns true and sets
* *type, *offset and *hasbit. Otherwise returns false. */
bool upb_msg_getscalarhandlerdata(const upb_handlers *h,
upb_selector_t s,
upb_fieldtype_t *type,
size_t *offset,
int32_t *hasbit);
#ifdef __cplusplus
} /* extern "C" */
#endif
#include "upb/port_undef.inc"
#include "upb/handlers-inl.h"
#endif /* UPB_HANDLERS_H */

@ -0,0 +1,140 @@
/*
** upb::json::Parser (upb_json_parser)
**
** Parses JSON according to a specific schema.
** Support for parsing arbitrary JSON (schema-less) will be added later.
*/
#ifndef UPB_JSON_PARSER_H_
#define UPB_JSON_PARSER_H_
#include "upb/sink.h"
#ifdef __cplusplus
namespace upb {
namespace json {
class CodeCache;
class ParserPtr;
class ParserMethodPtr;
} /* namespace json */
} /* namespace upb */
#endif
/* upb_json_parsermethod ******************************************************/
struct upb_json_parsermethod;
typedef struct upb_json_parsermethod upb_json_parsermethod;
#ifdef __cplusplus
extern "C" {
#endif
const upb_byteshandler* upb_json_parsermethod_inputhandler(
const upb_json_parsermethod* m);
#ifdef __cplusplus
} /* extern "C" */
class upb::json::ParserMethodPtr {
public:
ParserMethodPtr() : ptr_(nullptr) {}
ParserMethodPtr(const upb_json_parsermethod* ptr) : ptr_(ptr) {}
const upb_json_parsermethod* ptr() const { return ptr_; }
const BytesHandler* input_handler() const {
return upb_json_parsermethod_inputhandler(ptr());
}
private:
const upb_json_parsermethod* ptr_;
};
#endif /* __cplusplus */
/* upb_json_parser ************************************************************/
/* Preallocation hint: parser won't allocate more bytes than this when first
* constructed. This hint may be an overestimate for some build configurations.
* But if the parser library is upgraded without recompiling the application,
* it may be an underestimate. */
#define UPB_JSON_PARSER_SIZE 5712
struct upb_json_parser;
typedef struct upb_json_parser upb_json_parser;
#ifdef __cplusplus
extern "C" {
#endif
upb_json_parser* upb_json_parser_create(upb_arena* a,
const upb_json_parsermethod* m,
const upb_symtab* symtab,
upb_sink output,
upb_status *status,
bool ignore_json_unknown);
upb_bytessink upb_json_parser_input(upb_json_parser* p);
#ifdef __cplusplus
} /* extern "C" */
/* Parses an incoming BytesStream, pushing the results to the destination
* sink. */
class upb::json::ParserPtr {
public:
ParserPtr(upb_json_parser* ptr) : ptr_(ptr) {}
static ParserPtr Create(Arena* arena, ParserMethodPtr method,
SymbolTable* symtab, Sink output, Status* status,
bool ignore_json_unknown) {
upb_symtab* symtab_ptr = symtab ? symtab->ptr() : nullptr;
return ParserPtr(upb_json_parser_create(
arena->ptr(), method.ptr(), symtab_ptr, output.sink(), status->ptr(),
ignore_json_unknown));
}
BytesSink input() { return upb_json_parser_input(ptr_); }
private:
upb_json_parser* ptr_;
};
#endif /* __cplusplus */
/* upb_json_codecache *********************************************************/
/* Lazily builds and caches decoder methods that will push data to the given
* handlers. The upb_symtab object(s) must outlive this object. */
struct upb_json_codecache;
typedef struct upb_json_codecache upb_json_codecache;
#ifdef __cplusplus
extern "C" {
#endif
upb_json_codecache *upb_json_codecache_new(void);
void upb_json_codecache_free(upb_json_codecache *cache);
const upb_json_parsermethod* upb_json_codecache_get(upb_json_codecache* cache,
const upb_msgdef* md);
#ifdef __cplusplus
} /* extern "C" */
class upb::json::CodeCache {
public:
CodeCache() : ptr_(upb_json_codecache_new(), upb_json_codecache_free) {}
/* Returns a DecoderMethod that can push data to the given handlers.
* If a suitable method already exists, it will be returned from the cache. */
ParserMethodPtr Get(MessageDefPtr md) {
return upb_json_codecache_get(ptr_.get(), md.ptr());
}
private:
std::unique_ptr<upb_json_codecache, decltype(&upb_json_codecache_free)> ptr_;
};
#endif
#endif /* UPB_JSON_PARSER_H_ */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,72 @@
/*
** upb::json::Printer
**
** Handlers that emit JSON according to a specific protobuf schema.
*/
#ifndef UPB_JSON_TYPED_PRINTER_H_
#define UPB_JSON_TYPED_PRINTER_H_
#include "upb/sink.h"
#ifdef __cplusplus
namespace upb {
namespace json {
class PrinterPtr;
} /* namespace json */
} /* namespace upb */
#endif
/* upb_json_printer ***********************************************************/
#define UPB_JSON_PRINTER_SIZE 192
struct upb_json_printer;
typedef struct upb_json_printer upb_json_printer;
#ifdef __cplusplus
extern "C" {
#endif
/* Native C API. */
upb_json_printer *upb_json_printer_create(upb_arena *a, const upb_handlers *h,
upb_bytessink output);
upb_sink upb_json_printer_input(upb_json_printer *p);
const upb_handlers *upb_json_printer_newhandlers(const upb_msgdef *md,
bool preserve_fieldnames,
const void *owner);
/* Lazily builds and caches handlers that will push encoded data to a bytessink.
* Any msgdef objects used with this object must outlive it. */
upb_handlercache *upb_json_printer_newcache(bool preserve_proto_fieldnames);
#ifdef __cplusplus
} /* extern "C" */
/* Prints an incoming stream of data to a BytesSink in JSON format. */
class upb::json::PrinterPtr {
public:
PrinterPtr(upb_json_printer* ptr) : ptr_(ptr) {}
static PrinterPtr Create(Arena *arena, const upb::Handlers *handlers,
BytesSink output) {
return PrinterPtr(
upb_json_printer_create(arena->ptr(), handlers, output.sink()));
}
/* The input to the printer. */
Sink input() { return upb_json_printer_input(ptr_); }
static const size_t kSize = UPB_JSON_PRINTER_SIZE;
static HandlerCache NewCache(bool preserve_proto_fieldnames) {
return upb_json_printer_newcache(preserve_proto_fieldnames);
}
private:
upb_json_printer* ptr_;
};
#endif /* __cplusplus */
#endif /* UPB_JSON_TYPED_PRINTER_H_ */

@ -0,0 +1,399 @@
#include "upb/legacy_msg_reflection.h"
#include <string.h>
#include "upb/table.int.h"
#include "upb/msg.h"
#include "upb/port_def.inc"
bool upb_fieldtype_mapkeyok(upb_fieldtype_t type) {
return type == UPB_TYPE_BOOL || type == UPB_TYPE_INT32 ||
type == UPB_TYPE_UINT32 || type == UPB_TYPE_INT64 ||
type == UPB_TYPE_UINT64 || type == UPB_TYPE_STRING;
}
#define PTR_AT(msg, ofs, type) (type*)((char*)msg + ofs)
#define VOIDPTR_AT(msg, ofs) PTR_AT(msg, ofs, void)
#define ENCODE_MAX_NESTING 64
#define CHECK_TRUE(x) if (!(x)) { return false; }
/** upb_msgval ****************************************************************/
/* These functions will generate real memcpy() calls on ARM sadly, because
* the compiler assumes they might not be aligned. */
static upb_msgval upb_msgval_read(const void *p, size_t ofs,
uint8_t size) {
upb_msgval val;
p = (char*)p + ofs;
memcpy(&val, p, size);
return val;
}
static void upb_msgval_write(void *p, size_t ofs, upb_msgval val,
uint8_t size) {
p = (char*)p + ofs;
memcpy(p, &val, size);
}
static size_t upb_msgval_sizeof(upb_fieldtype_t type) {
switch (type) {
case UPB_TYPE_DOUBLE:
case UPB_TYPE_INT64:
case UPB_TYPE_UINT64:
return 8;
case UPB_TYPE_ENUM:
case UPB_TYPE_INT32:
case UPB_TYPE_UINT32:
case UPB_TYPE_FLOAT:
return 4;
case UPB_TYPE_BOOL:
return 1;
case UPB_TYPE_MESSAGE:
return sizeof(void*);
case UPB_TYPE_BYTES:
case UPB_TYPE_STRING:
return sizeof(upb_strview);
}
UPB_UNREACHABLE();
}
static uint8_t upb_msg_fieldsize(const upb_msglayout_field *field) {
if (field->label == UPB_LABEL_REPEATED) {
return sizeof(void*);
} else {
return upb_msgval_sizeof(upb_desctype_to_fieldtype[field->descriptortype]);
}
}
/* TODO(haberman): this is broken right now because upb_msgval can contain
* a char* / size_t pair, which is too big for a upb_value. To fix this
* we'll probably need to dynamically allocate a upb_msgval and store a
* pointer to that in the tables for extensions/maps. */
static upb_value upb_toval(upb_msgval val) {
upb_value ret;
UPB_UNUSED(val);
memset(&ret, 0, sizeof(upb_value)); /* XXX */
return ret;
}
static upb_msgval upb_msgval_fromval(upb_value val) {
upb_msgval ret;
UPB_UNUSED(val);
memset(&ret, 0, sizeof(upb_msgval)); /* XXX */
return ret;
}
static upb_ctype_t upb_fieldtotabtype(upb_fieldtype_t type) {
switch (type) {
case UPB_TYPE_FLOAT: return UPB_CTYPE_FLOAT;
case UPB_TYPE_DOUBLE: return UPB_CTYPE_DOUBLE;
case UPB_TYPE_BOOL: return UPB_CTYPE_BOOL;
case UPB_TYPE_BYTES:
case UPB_TYPE_MESSAGE:
case UPB_TYPE_STRING: return UPB_CTYPE_CONSTPTR;
case UPB_TYPE_ENUM:
case UPB_TYPE_INT32: return UPB_CTYPE_INT32;
case UPB_TYPE_UINT32: return UPB_CTYPE_UINT32;
case UPB_TYPE_INT64: return UPB_CTYPE_INT64;
case UPB_TYPE_UINT64: return UPB_CTYPE_UINT64;
default: UPB_ASSERT(false); return 0;
}
}
/** upb_msg *******************************************************************/
/* If we always read/write as a consistent type to each address, this shouldn't
* violate aliasing.
*/
#define DEREF(msg, ofs, type) *PTR_AT(msg, ofs, type)
static const upb_msglayout_field *upb_msg_checkfield(int field_index,
const upb_msglayout *l) {
UPB_ASSERT(field_index >= 0 && field_index < l->field_count);
return &l->fields[field_index];
}
static bool upb_msg_inoneof(const upb_msglayout_field *field) {
return field->presence < 0;
}
static uint32_t *upb_msg_oneofcase(const upb_msg *msg, int field_index,
const upb_msglayout *l) {
const upb_msglayout_field *field = upb_msg_checkfield(field_index, l);
UPB_ASSERT(upb_msg_inoneof(field));
return PTR_AT(msg, ~field->presence, uint32_t);
}
bool upb_msg_has(const upb_msg *msg,
int field_index,
const upb_msglayout *l) {
const upb_msglayout_field *field = upb_msg_checkfield(field_index, l);
UPB_ASSERT(field->presence);
if (upb_msg_inoneof(field)) {
/* Oneofs are set when the oneof number is set to this field. */
return *upb_msg_oneofcase(msg, field_index, l) == field->number;
} else {
/* Other fields are set when their hasbit is set. */
uint32_t hasbit = field->presence;
return DEREF(msg, hasbit / 8, char) | (1 << (hasbit % 8));
}
}
upb_msgval upb_msg_get(const upb_msg *msg, int field_index,
const upb_msglayout *l) {
const upb_msglayout_field *field = upb_msg_checkfield(field_index, l);
int size = upb_msg_fieldsize(field);
return upb_msgval_read(msg, field->offset, size);
}
void upb_msg_set(upb_msg *msg, int field_index, upb_msgval val,
const upb_msglayout *l) {
const upb_msglayout_field *field = upb_msg_checkfield(field_index, l);
int size = upb_msg_fieldsize(field);
upb_msgval_write(msg, field->offset, val, size);
}
/** upb_array *****************************************************************/
#define DEREF_ARR(arr, i, type) ((type*)arr->data)[i]
size_t upb_array_size(const upb_array *arr) {
return arr->len;
}
upb_msgval upb_array_get(const upb_array *arr, upb_fieldtype_t type, size_t i) {
size_t element_size = upb_msgval_sizeof(type);
UPB_ASSERT(i < arr->len);
return upb_msgval_read(arr->data, i * element_size, element_size);
}
bool upb_array_set(upb_array *arr, upb_fieldtype_t type, size_t i,
upb_msgval val, upb_arena *arena) {
size_t element_size = upb_msgval_sizeof(type);
UPB_ASSERT(i <= arr->len);
if (i == arr->len) {
/* Extending the array. */
if (i == arr->size) {
/* Need to reallocate. */
size_t new_size = UPB_MAX(arr->size * 2, 8);
size_t new_bytes = new_size * element_size;
size_t old_bytes = arr->size * element_size;
upb_alloc *alloc = upb_arena_alloc(arena);
upb_msgval *new_data =
upb_realloc(alloc, arr->data, old_bytes, new_bytes);
if (!new_data) {
return false;
}
arr->data = new_data;
arr->size = new_size;
}
arr->len = i + 1;
}
upb_msgval_write(arr->data, i * element_size, val, element_size);
return true;
}
/** upb_map *******************************************************************/
struct upb_map {
upb_fieldtype_t key_type;
upb_fieldtype_t val_type;
/* We may want to optimize this to use inttable where possible, for greater
* efficiency and lower memory footprint. */
upb_strtable strtab;
upb_arena *arena;
};
static void upb_map_tokey(upb_fieldtype_t type, upb_msgval *key,
const char **out_key, size_t *out_len) {
switch (type) {
case UPB_TYPE_STRING:
/* Point to string data of the input key. */
*out_key = key->str.data;
*out_len = key->str.size;
return;
case UPB_TYPE_BOOL:
case UPB_TYPE_INT32:
case UPB_TYPE_UINT32:
case UPB_TYPE_INT64:
case UPB_TYPE_UINT64:
/* Point to the key itself. XXX: big-endian. */
*out_key = (const char*)key;
*out_len = upb_msgval_sizeof(type);
return;
case UPB_TYPE_BYTES:
case UPB_TYPE_DOUBLE:
case UPB_TYPE_ENUM:
case UPB_TYPE_FLOAT:
case UPB_TYPE_MESSAGE:
break; /* Cannot be a map key. */
}
UPB_UNREACHABLE();
}
static upb_msgval upb_map_fromkey(upb_fieldtype_t type, const char *key,
size_t len) {
switch (type) {
case UPB_TYPE_STRING:
return upb_msgval_makestr(key, len);
case UPB_TYPE_BOOL:
case UPB_TYPE_INT32:
case UPB_TYPE_UINT32:
case UPB_TYPE_INT64:
case UPB_TYPE_UINT64:
return upb_msgval_read(key, 0, upb_msgval_sizeof(type));
case UPB_TYPE_BYTES:
case UPB_TYPE_DOUBLE:
case UPB_TYPE_ENUM:
case UPB_TYPE_FLOAT:
case UPB_TYPE_MESSAGE:
break; /* Cannot be a map key. */
}
UPB_UNREACHABLE();
}
upb_map *upb_map_new(upb_fieldtype_t ktype, upb_fieldtype_t vtype,
upb_arena *a) {
upb_ctype_t vtabtype = upb_fieldtotabtype(vtype);
upb_alloc *alloc = upb_arena_alloc(a);
upb_map *map = upb_malloc(alloc, sizeof(upb_map));
if (!map) {
return NULL;
}
UPB_ASSERT(upb_fieldtype_mapkeyok(ktype));
map->key_type = ktype;
map->val_type = vtype;
map->arena = a;
if (!upb_strtable_init2(&map->strtab, vtabtype, alloc)) {
return NULL;
}
return map;
}
size_t upb_map_size(const upb_map *map) {
return upb_strtable_count(&map->strtab);
}
upb_fieldtype_t upb_map_keytype(const upb_map *map) {
return map->key_type;
}
upb_fieldtype_t upb_map_valuetype(const upb_map *map) {
return map->val_type;
}
bool upb_map_get(const upb_map *map, upb_msgval key, upb_msgval *val) {
upb_value tabval;
const char *key_str;
size_t key_len;
bool ret;
upb_map_tokey(map->key_type, &key, &key_str, &key_len);
ret = upb_strtable_lookup2(&map->strtab, key_str, key_len, &tabval);
if (ret) {
memcpy(val, &tabval, sizeof(tabval));
}
return ret;
}
bool upb_map_set(upb_map *map, upb_msgval key, upb_msgval val,
upb_msgval *removed) {
const char *key_str;
size_t key_len;
upb_value tabval = upb_toval(val);
upb_value removedtabval;
upb_alloc *a = upb_arena_alloc(map->arena);
upb_map_tokey(map->key_type, &key, &key_str, &key_len);
/* TODO(haberman): add overwrite operation to minimize number of lookups. */
if (upb_strtable_lookup2(&map->strtab, key_str, key_len, NULL)) {
upb_strtable_remove3(&map->strtab, key_str, key_len, &removedtabval, a);
memcpy(&removed, &removedtabval, sizeof(removed));
}
return upb_strtable_insert3(&map->strtab, key_str, key_len, tabval, a);
}
bool upb_map_del(upb_map *map, upb_msgval key) {
const char *key_str;
size_t key_len;
upb_alloc *a = upb_arena_alloc(map->arena);
upb_map_tokey(map->key_type, &key, &key_str, &key_len);
return upb_strtable_remove3(&map->strtab, key_str, key_len, NULL, a);
}
/** upb_mapiter ***************************************************************/
struct upb_mapiter {
upb_strtable_iter iter;
upb_fieldtype_t key_type;
};
size_t upb_mapiter_sizeof(void) {
return sizeof(upb_mapiter);
}
void upb_mapiter_begin(upb_mapiter *i, const upb_map *map) {
upb_strtable_begin(&i->iter, &map->strtab);
i->key_type = map->key_type;
}
upb_mapiter *upb_mapiter_new(const upb_map *t, upb_alloc *a) {
upb_mapiter *ret = upb_malloc(a, upb_mapiter_sizeof());
if (!ret) {
return NULL;
}
upb_mapiter_begin(ret, t);
return ret;
}
void upb_mapiter_free(upb_mapiter *i, upb_alloc *a) {
upb_free(a, i);
}
void upb_mapiter_next(upb_mapiter *i) {
upb_strtable_next(&i->iter);
}
bool upb_mapiter_done(const upb_mapiter *i) {
return upb_strtable_done(&i->iter);
}
upb_msgval upb_mapiter_key(const upb_mapiter *i) {
return upb_map_fromkey(i->key_type, upb_strtable_iter_key(&i->iter),
upb_strtable_iter_keylength(&i->iter));
}
upb_msgval upb_mapiter_value(const upb_mapiter *i) {
return upb_msgval_fromval(upb_strtable_iter_value(&i->iter));
}
void upb_mapiter_setdone(upb_mapiter *i) {
upb_strtable_iter_setdone(&i->iter);
}
bool upb_mapiter_isequal(const upb_mapiter *i1, const upb_mapiter *i2) {
return upb_strtable_iter_isequal(&i1->iter, &i2->iter);
}

@ -0,0 +1,191 @@
#ifndef UPB_LEGACY_MSG_REFLECTION_H_
#define UPB_LEGACY_MSG_REFLECTION_H_
#include "upb/upb.h"
#include "upb/msg.h"
#include "upb/port_def.inc"
struct upb_map;
typedef struct upb_map upb_map;
struct upb_mapiter;
typedef struct upb_mapiter upb_mapiter;
/** upb_msgval ****************************************************************/
/* A union representing all possible protobuf values. Used for generic get/set
* operations. */
typedef union {
bool b;
float flt;
double dbl;
int32_t i32;
int64_t i64;
uint32_t u32;
uint64_t u64;
const upb_map* map;
const upb_msg* msg;
const upb_array* arr;
const void* ptr;
upb_strview str;
} upb_msgval;
#define ACCESSORS(name, membername, ctype) \
UPB_INLINE ctype upb_msgval_get ## name(upb_msgval v) { \
return v.membername; \
} \
UPB_INLINE void upb_msgval_set ## name(upb_msgval *v, ctype cval) { \
v->membername = cval; \
} \
UPB_INLINE upb_msgval upb_msgval_ ## name(ctype v) { \
upb_msgval ret; \
ret.membername = v; \
return ret; \
}
ACCESSORS(bool, b, bool)
ACCESSORS(float, flt, float)
ACCESSORS(double, dbl, double)
ACCESSORS(int32, i32, int32_t)
ACCESSORS(int64, i64, int64_t)
ACCESSORS(uint32, u32, uint32_t)
ACCESSORS(uint64, u64, uint64_t)
ACCESSORS(map, map, const upb_map*)
ACCESSORS(msg, msg, const upb_msg*)
ACCESSORS(ptr, ptr, const void*)
ACCESSORS(arr, arr, const upb_array*)
ACCESSORS(str, str, upb_strview)
#undef ACCESSORS
UPB_INLINE upb_msgval upb_msgval_makestr(const char *data, size_t size) {
return upb_msgval_str(upb_strview_make(data, size));
}
/** upb_msg *******************************************************************/
/* A upb_msg represents a protobuf message. It always corresponds to a specific
* upb_msglayout, which describes how it is laid out in memory. */
/* Read-only message API. Can be safely called by anyone. */
/* Returns the value associated with this field:
* - for scalar fields (including strings), the value directly.
* - return upb_msg*, or upb_map* for msg/map.
* If the field is unset for these field types, returns NULL.
*
* TODO(haberman): should we let users store cached array/map/msg
* pointers here for fields that are unset? Could be useful for the
* strongly-owned submessage model (ie. generated C API that doesn't use
* arenas).
*/
upb_msgval upb_msg_get(const upb_msg *msg,
int field_index,
const upb_msglayout *l);
/* May only be called for fields where upb_fielddef_haspresence(f) == true. */
bool upb_msg_has(const upb_msg *msg,
int field_index,
const upb_msglayout *l);
/* Mutable message API. May only be called by the owner of the message who
* knows its ownership scheme and how to keep it consistent. */
/* Sets the given field to the given value. Does not perform any memory
* management: if you overwrite a pointer to a msg/array/map/string without
* cleaning it up (or using an arena) it will leak.
*/
void upb_msg_set(upb_msg *msg,
int field_index,
upb_msgval val,
const upb_msglayout *l);
/* For a primitive field, set it back to its default. For repeated, string, and
* submessage fields set it back to NULL. This could involve releasing some
* internal memory (for example, from an extension dictionary), but it is not
* recursive in any way and will not recover any memory that may be used by
* arrays/maps/strings/msgs that this field may have pointed to.
*/
bool upb_msg_clearfield(upb_msg *msg,
int field_index,
const upb_msglayout *l);
/* TODO(haberman): copyfrom()/mergefrom()? */
/** upb_array *****************************************************************/
/* A upb_array stores data for a repeated field. The memory management
* semantics are the same as upb_msg. A upb_array allocates dynamic
* memory internally for the array elements. */
upb_fieldtype_t upb_array_type(const upb_array *arr);
/* Read-only interface. Safe for anyone to call. */
size_t upb_array_size(const upb_array *arr);
upb_msgval upb_array_get(const upb_array *arr, upb_fieldtype_t type, size_t i);
/* Write interface. May only be called by the message's owner who can enforce
* its memory management invariants. */
bool upb_array_set(upb_array *arr, upb_fieldtype_t type, size_t i,
upb_msgval val, upb_arena *arena);
/** upb_map *******************************************************************/
/* A upb_map stores data for a map field. The memory management semantics are
* the same as upb_msg, with one notable exception. upb_map will internally
* store a copy of all string keys, but *not* any string values or submessages.
* So you must ensure that any string or message values outlive the map, and you
* must delete them manually when they are no longer required. */
upb_map *upb_map_new(upb_fieldtype_t ktype, upb_fieldtype_t vtype,
upb_arena *a);
/* Read-only interface. Safe for anyone to call. */
size_t upb_map_size(const upb_map *map);
upb_fieldtype_t upb_map_keytype(const upb_map *map);
upb_fieldtype_t upb_map_valuetype(const upb_map *map);
bool upb_map_get(const upb_map *map, upb_msgval key, upb_msgval *val);
/* Write interface. May only be called by the message's owner who can enforce
* its memory management invariants. */
/* Sets or overwrites an entry in the map. Return value indicates whether
* the operation succeeded or failed with OOM, and also whether an existing
* key was replaced or not. */
bool upb_map_set(upb_map *map,
upb_msgval key, upb_msgval val,
upb_msgval *valremoved);
/* Deletes an entry in the map. Returns true if the key was present. */
bool upb_map_del(upb_map *map, upb_msgval key);
/** upb_mapiter ***************************************************************/
/* For iterating over a map. Map iterators are invalidated by mutations to the
* map, but an invalidated iterator will never return junk or crash the process.
* An invalidated iterator may return entries that were already returned though,
* and if you keep invalidating the iterator during iteration, the program may
* enter an infinite loop. */
size_t upb_mapiter_sizeof(void);
void upb_mapiter_begin(upb_mapiter *i, const upb_map *t);
upb_mapiter *upb_mapiter_new(const upb_map *t, upb_alloc *a);
void upb_mapiter_free(upb_mapiter *i, upb_alloc *a);
void upb_mapiter_next(upb_mapiter *i);
bool upb_mapiter_done(const upb_mapiter *i);
upb_msgval upb_mapiter_key(const upb_mapiter *i);
upb_msgval upb_mapiter_value(const upb_mapiter *i);
void upb_mapiter_setdone(upb_mapiter *i);
bool upb_mapiter_isequal(const upb_mapiter *i1, const upb_mapiter *i2);
#include "upb/port_undef.inc"
#endif /* UPB_LEGACY_MSG_REFLECTION_H_ */

@ -0,0 +1,111 @@
#include "upb/msg.h"
#include "upb/table.int.h"
#include "upb/port_def.inc"
#define VOIDPTR_AT(msg, ofs) (void*)((char*)msg + (int)ofs)
/* Internal members of a upb_msg. We can change this without breaking binary
* compatibility. We put these before the user's data. The user's upb_msg*
* points after the upb_msg_internal. */
/* Used when a message is not extendable. */
typedef struct {
char *unknown;
size_t unknown_len;
size_t unknown_size;
} upb_msg_internal;
/* Used when a message is extendable. */
typedef struct {
upb_inttable *extdict;
upb_msg_internal base;
} upb_msg_internal_withext;
static int upb_msg_internalsize(const upb_msglayout *l) {
return sizeof(upb_msg_internal) - l->extendable * sizeof(void *);
}
static size_t upb_msg_sizeof(const upb_msglayout *l) {
return l->size + upb_msg_internalsize(l);
}
static upb_msg_internal *upb_msg_getinternal(upb_msg *msg) {
return VOIDPTR_AT(msg, -sizeof(upb_msg_internal));
}
static const upb_msg_internal *upb_msg_getinternal_const(const upb_msg *msg) {
return VOIDPTR_AT(msg, -sizeof(upb_msg_internal));
}
static upb_msg_internal_withext *upb_msg_getinternalwithext(
upb_msg *msg, const upb_msglayout *l) {
UPB_ASSERT(l->extendable);
return VOIDPTR_AT(msg, -sizeof(upb_msg_internal_withext));
}
upb_msg *upb_msg_new(const upb_msglayout *l, upb_arena *a) {
upb_alloc *alloc = upb_arena_alloc(a);
void *mem = upb_malloc(alloc, upb_msg_sizeof(l));
upb_msg_internal *in;
upb_msg *msg;
if (!mem) {
return NULL;
}
msg = VOIDPTR_AT(mem, upb_msg_internalsize(l));
/* Initialize normal members. */
memset(msg, 0, l->size);
/* Initialize internal members. */
in = upb_msg_getinternal(msg);
in->unknown = NULL;
in->unknown_len = 0;
in->unknown_size = 0;
if (l->extendable) {
upb_msg_getinternalwithext(msg, l)->extdict = NULL;
}
return msg;
}
upb_array *upb_array_new(upb_arena *a) {
upb_array *ret = upb_arena_malloc(a, sizeof(upb_array));
if (!ret) {
return NULL;
}
ret->data = NULL;
ret->len = 0;
ret->size = 0;
return ret;
}
void upb_msg_addunknown(upb_msg *msg, const char *data, size_t len,
upb_arena *arena) {
upb_msg_internal *in = upb_msg_getinternal(msg);
if (len > in->unknown_size - in->unknown_len) {
upb_alloc *alloc = upb_arena_alloc(arena);
size_t need = in->unknown_size + len;
size_t newsize = UPB_MAX(in->unknown_size * 2, need);
in->unknown = upb_realloc(alloc, in->unknown, in->unknown_size, newsize);
in->unknown_size = newsize;
}
memcpy(in->unknown + in->unknown_len, data, len);
in->unknown_len += len;
}
const char *upb_msg_getunknown(const upb_msg *msg, size_t *len) {
const upb_msg_internal* in = upb_msg_getinternal_const(msg);
*len = in->unknown_len;
return in->unknown;
}
#undef VOIDPTR_AT

@ -0,0 +1,69 @@
/*
** Data structures for message tables, used for parsing and serialization.
** This are much lighter-weight than full reflection, but they are do not
** have enough information to convert to text format, JSON, etc.
**
** The definitions in this file are internal to upb.
**/
#ifndef UPB_MSG_H_
#define UPB_MSG_H_
#include <stdint.h>
#include <string.h>
#include "upb/upb.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef void upb_msg;
/** upb_msglayout *************************************************************/
/* upb_msglayout represents the memory layout of a given upb_msgdef. The
* members are public so generated code can initialize them, but users MUST NOT
* read or write any of its members. */
typedef struct {
uint32_t number;
uint16_t offset;
int16_t presence; /* If >0, hasbit_index+1. If <0, oneof_index+1. */
uint16_t submsg_index; /* undefined if descriptortype != MESSAGE or GROUP. */
uint8_t descriptortype;
uint8_t label;
} upb_msglayout_field;
typedef struct upb_msglayout {
const struct upb_msglayout *const* submsgs;
const upb_msglayout_field *fields;
/* Must be aligned to sizeof(void*). Doesn't include internal members like
* unknown fields, extension dict, pointer to msglayout, etc. */
uint16_t size;
uint16_t field_count;
bool extendable;
} upb_msglayout;
/** Message internal representation *******************************************/
/* Our internal representation for repeated fields. */
typedef struct {
void *data; /* Each element is element_size. */
size_t len; /* Measured in elements. */
size_t size; /* Measured in elements. */
} upb_array;
upb_msg *upb_msg_new(const upb_msglayout *l, upb_arena *a);
upb_msg *upb_msg_new(const upb_msglayout *l, upb_arena *a);
void upb_msg_addunknown(upb_msg *msg, const char *data, size_t len,
upb_arena *arena);
const char *upb_msg_getunknown(const upb_msg *msg, size_t *len);
upb_array *upb_array_new(upb_arena *a);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* UPB_MSG_H_ */

@ -0,0 +1,248 @@
#include "upb/msgfactory.h"
#include "upb/port_def.inc"
static bool is_power_of_two(size_t val) {
return (val & (val - 1)) == 0;
}
/* Align up to the given power of 2. */
static size_t align_up(size_t val, size_t align) {
UPB_ASSERT(is_power_of_two(align));
return (val + align - 1) & ~(align - 1);
}
static size_t div_round_up(size_t n, size_t d) {
return (n + d - 1) / d;
}
static size_t upb_msgval_sizeof2(upb_fieldtype_t type) {
switch (type) {
case UPB_TYPE_DOUBLE:
case UPB_TYPE_INT64:
case UPB_TYPE_UINT64:
return 8;
case UPB_TYPE_ENUM:
case UPB_TYPE_INT32:
case UPB_TYPE_UINT32:
case UPB_TYPE_FLOAT:
return 4;
case UPB_TYPE_BOOL:
return 1;
case UPB_TYPE_MESSAGE:
return sizeof(void*);
case UPB_TYPE_BYTES:
case UPB_TYPE_STRING:
return sizeof(upb_strview);
}
UPB_UNREACHABLE();
}
static uint8_t upb_msg_fielddefsize(const upb_fielddef *f) {
if (upb_fielddef_isseq(f)) {
return sizeof(void*);
} else {
return upb_msgval_sizeof2(upb_fielddef_type(f));
}
}
/** upb_msglayout *************************************************************/
static void upb_msglayout_free(upb_msglayout *l) {
upb_gfree(l);
}
static size_t upb_msglayout_place(upb_msglayout *l, size_t size) {
size_t ret;
l->size = align_up(l->size, size);
ret = l->size;
l->size += size;
return ret;
}
static bool upb_msglayout_init(const upb_msgdef *m,
upb_msglayout *l,
upb_msgfactory *factory) {
upb_msg_field_iter it;
upb_msg_oneof_iter oit;
size_t hasbit;
size_t submsg_count = 0;
const upb_msglayout **submsgs;
upb_msglayout_field *fields;
for (upb_msg_field_begin(&it, m);
!upb_msg_field_done(&it);
upb_msg_field_next(&it)) {
const upb_fielddef* f = upb_msg_iter_field(&it);
if (upb_fielddef_issubmsg(f)) {
submsg_count++;
}
}
memset(l, 0, sizeof(*l));
fields = upb_gmalloc(upb_msgdef_numfields(m) * sizeof(*fields));
submsgs = upb_gmalloc(submsg_count * sizeof(*submsgs));
if ((!fields && upb_msgdef_numfields(m)) ||
(!submsgs && submsg_count)) {
/* OOM. */
upb_gfree(fields);
upb_gfree(submsgs);
return false;
}
l->field_count = upb_msgdef_numfields(m);
l->fields = fields;
l->submsgs = submsgs;
/* Allocate data offsets in three stages:
*
* 1. hasbits.
* 2. regular fields.
* 3. oneof fields.
*
* OPT: There is a lot of room for optimization here to minimize the size.
*/
/* Allocate hasbits and set basic field attributes. */
submsg_count = 0;
for (upb_msg_field_begin(&it, m), hasbit = 0;
!upb_msg_field_done(&it);
upb_msg_field_next(&it)) {
const upb_fielddef* f = upb_msg_iter_field(&it);
upb_msglayout_field *field = &fields[upb_fielddef_index(f)];
field->number = upb_fielddef_number(f);
field->descriptortype = upb_fielddef_descriptortype(f);
field->label = upb_fielddef_label(f);
if (upb_fielddef_issubmsg(f)) {
const upb_msglayout *sub_layout =
upb_msgfactory_getlayout(factory, upb_fielddef_msgsubdef(f));
field->submsg_index = submsg_count++;
submsgs[field->submsg_index] = sub_layout;
}
if (upb_fielddef_haspresence(f) && !upb_fielddef_containingoneof(f)) {
field->presence = (hasbit++);
} else {
field->presence = 0;
}
}
/* Account for space used by hasbits. */
l->size = div_round_up(hasbit, 8);
/* Allocate non-oneof fields. */
for (upb_msg_field_begin(&it, m); !upb_msg_field_done(&it);
upb_msg_field_next(&it)) {
const upb_fielddef* f = upb_msg_iter_field(&it);
size_t field_size = upb_msg_fielddefsize(f);
size_t index = upb_fielddef_index(f);
if (upb_fielddef_containingoneof(f)) {
/* Oneofs are handled separately below. */
continue;
}
fields[index].offset = upb_msglayout_place(l, field_size);
}
/* Allocate oneof fields. Each oneof field consists of a uint32 for the case
* and space for the actual data. */
for (upb_msg_oneof_begin(&oit, m); !upb_msg_oneof_done(&oit);
upb_msg_oneof_next(&oit)) {
const upb_oneofdef* o = upb_msg_iter_oneof(&oit);
upb_oneof_iter fit;
size_t case_size = sizeof(uint32_t); /* Could potentially optimize this. */
size_t field_size = 0;
uint32_t case_offset;
uint32_t data_offset;
/* Calculate field size: the max of all field sizes. */
for (upb_oneof_begin(&fit, o);
!upb_oneof_done(&fit);
upb_oneof_next(&fit)) {
const upb_fielddef* f = upb_oneof_iter_field(&fit);
field_size = UPB_MAX(field_size, upb_msg_fielddefsize(f));
}
/* Align and allocate case offset. */
case_offset = upb_msglayout_place(l, case_size);
data_offset = upb_msglayout_place(l, field_size);
for (upb_oneof_begin(&fit, o);
!upb_oneof_done(&fit);
upb_oneof_next(&fit)) {
const upb_fielddef* f = upb_oneof_iter_field(&fit);
fields[upb_fielddef_index(f)].offset = data_offset;
fields[upb_fielddef_index(f)].presence = ~case_offset;
}
}
/* Size of the entire structure should be a multiple of its greatest
* alignment. TODO: track overall alignment for real? */
l->size = align_up(l->size, 8);
return true;
}
/** upb_msgfactory ************************************************************/
struct upb_msgfactory {
const upb_symtab *symtab; /* We own a ref. */
upb_inttable layouts;
};
upb_msgfactory *upb_msgfactory_new(const upb_symtab *symtab) {
upb_msgfactory *ret = upb_gmalloc(sizeof(*ret));
ret->symtab = symtab;
upb_inttable_init(&ret->layouts, UPB_CTYPE_PTR);
return ret;
}
void upb_msgfactory_free(upb_msgfactory *f) {
upb_inttable_iter i;
upb_inttable_begin(&i, &f->layouts);
for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
upb_msglayout *l = upb_value_getptr(upb_inttable_iter_value(&i));
upb_msglayout_free(l);
}
upb_inttable_uninit(&f->layouts);
upb_gfree(f);
}
const upb_symtab *upb_msgfactory_symtab(const upb_msgfactory *f) {
return f->symtab;
}
const upb_msglayout *upb_msgfactory_getlayout(upb_msgfactory *f,
const upb_msgdef *m) {
upb_value v;
UPB_ASSERT(upb_symtab_lookupmsg(f->symtab, upb_msgdef_fullname(m)) == m);
UPB_ASSERT(!upb_msgdef_mapentry(m));
if (upb_inttable_lookupptr(&f->layouts, m, &v)) {
UPB_ASSERT(upb_value_getptr(v));
return upb_value_getptr(v);
} else {
/* In case of circular dependency, layout has to be inserted first. */
upb_msglayout *l = upb_gmalloc(sizeof(*l));
upb_msgfactory *mutable_f = (void*)f;
upb_inttable_insertptr(&mutable_f->layouts, m, upb_value_ptr(l));
UPB_ASSERT(l);
if (!upb_msglayout_init(m, l, f)) {
upb_msglayout_free(l);
}
return l;
}
}

@ -0,0 +1,48 @@
#include "upb/def.h"
#include "upb/msg.h"
#ifndef UPB_MSGFACTORY_H_
#define UPB_MSGFACTORY_H_
/** upb_msgfactory ************************************************************/
struct upb_msgfactory;
typedef struct upb_msgfactory upb_msgfactory;
#ifdef __cplusplus
extern "C" {
#endif
/* A upb_msgfactory contains a cache of upb_msglayout, upb_handlers, and
* upb_visitorplan objects. These are the objects necessary to represent,
* populate, and and visit upb_msg objects.
*
* These caches are all populated by upb_msgdef, and lazily created on demand.
*/
/* Creates and destroys a msgfactory, respectively. The messages for this
* msgfactory must come from |symtab| (which should outlive the msgfactory). */
upb_msgfactory *upb_msgfactory_new(const upb_symtab *symtab);
void upb_msgfactory_free(upb_msgfactory *f);
const upb_symtab *upb_msgfactory_symtab(const upb_msgfactory *f);
/* The functions to get cached objects, lazily creating them on demand. These
* all require:
*
* - m is in upb_msgfactory_symtab(f)
* - upb_msgdef_mapentry(m) == false (since map messages can't have layouts).
*
* The returned objects will live for as long as the msgfactory does.
*
* TODO(haberman): consider making this thread-safe and take a const
* upb_msgfactory. */
const upb_msglayout *upb_msgfactory_getlayout(upb_msgfactory *f,
const upb_msgdef *m);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* UPB_MSGFACTORY_H_ */

@ -0,0 +1,919 @@
/*
** protobuf decoder bytecode compiler
**
** Code to compile a upb::Handlers into bytecode for decoding a protobuf
** according to that specific schema and destination handlers.
**
** Bytecode definition is in decoder.int.h.
*/
#include <stdarg.h>
#include "upb/pb/decoder.int.h"
#include "upb/pb/varint.int.h"
#ifdef UPB_DUMP_BYTECODE
#include <stdio.h>
#endif
#include "upb/port_def.inc"
#define MAXLABEL 5
#define EMPTYLABEL -1
/* upb_pbdecodermethod ********************************************************/
static void freemethod(upb_pbdecodermethod *method) {
upb_inttable_uninit(&method->dispatch);
upb_gfree(method);
}
static upb_pbdecodermethod *newmethod(const upb_handlers *dest_handlers,
mgroup *group) {
upb_pbdecodermethod *ret = upb_gmalloc(sizeof(*ret));
upb_byteshandler_init(&ret->input_handler_);
ret->group = group;
ret->dest_handlers_ = dest_handlers;
upb_inttable_init(&ret->dispatch, UPB_CTYPE_UINT64);
return ret;
}
const upb_handlers *upb_pbdecodermethod_desthandlers(
const upb_pbdecodermethod *m) {
return m->dest_handlers_;
}
const upb_byteshandler *upb_pbdecodermethod_inputhandler(
const upb_pbdecodermethod *m) {
return &m->input_handler_;
}
bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m) {
return m->is_native_;
}
/* mgroup *********************************************************************/
static void freegroup(mgroup *g) {
upb_inttable_iter i;
upb_inttable_begin(&i, &g->methods);
for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
freemethod(upb_value_getptr(upb_inttable_iter_value(&i)));
}
upb_inttable_uninit(&g->methods);
upb_gfree(g->bytecode);
upb_gfree(g);
}
mgroup *newgroup(void) {
mgroup *g = upb_gmalloc(sizeof(*g));
upb_inttable_init(&g->methods, UPB_CTYPE_PTR);
g->bytecode = NULL;
g->bytecode_end = NULL;
return g;
}
/* bytecode compiler **********************************************************/
/* Data used only at compilation time. */
typedef struct {
mgroup *group;
uint32_t *pc;
int fwd_labels[MAXLABEL];
int back_labels[MAXLABEL];
/* For fields marked "lazy", parse them lazily or eagerly? */
bool lazy;
} compiler;
static compiler *newcompiler(mgroup *group, bool lazy) {
compiler *ret = upb_gmalloc(sizeof(*ret));
int i;
ret->group = group;
ret->lazy = lazy;
for (i = 0; i < MAXLABEL; i++) {
ret->fwd_labels[i] = EMPTYLABEL;
ret->back_labels[i] = EMPTYLABEL;
}
return ret;
}
static void freecompiler(compiler *c) {
upb_gfree(c);
}
const size_t ptr_words = sizeof(void*) / sizeof(uint32_t);
/* How many words an instruction is. */
static int instruction_len(uint32_t instr) {
switch (getop(instr)) {
case OP_SETDISPATCH: return 1 + ptr_words;
case OP_TAGN: return 3;
case OP_SETBIGGROUPNUM: return 2;
default: return 1;
}
}
bool op_has_longofs(int32_t instruction) {
switch (getop(instruction)) {
case OP_CALL:
case OP_BRANCH:
case OP_CHECKDELIM:
return true;
/* The "tag" instructions only have 8 bytes available for the jump target,
* but that is ok because these opcodes only require short jumps. */
case OP_TAG1:
case OP_TAG2:
case OP_TAGN:
return false;
default:
UPB_ASSERT(false);
return false;
}
}
static int32_t getofs(uint32_t instruction) {
if (op_has_longofs(instruction)) {
return (int32_t)instruction >> 8;
} else {
return (int8_t)(instruction >> 8);
}
}
static void setofs(uint32_t *instruction, int32_t ofs) {
if (op_has_longofs(*instruction)) {
*instruction = getop(*instruction) | (uint32_t)ofs << 8;
} else {
*instruction = (*instruction & ~0xff00) | ((ofs & 0xff) << 8);
}
UPB_ASSERT(getofs(*instruction) == ofs); /* Would fail in cases of overflow. */
}
static uint32_t pcofs(compiler *c) { return c->pc - c->group->bytecode; }
/* Defines a local label at the current PC location. All previous forward
* references are updated to point to this location. The location is noted
* for any future backward references. */
static void label(compiler *c, unsigned int label) {
int val;
uint32_t *codep;
UPB_ASSERT(label < MAXLABEL);
val = c->fwd_labels[label];
codep = (val == EMPTYLABEL) ? NULL : c->group->bytecode + val;
while (codep) {
int ofs = getofs(*codep);
setofs(codep, c->pc - codep - instruction_len(*codep));
codep = ofs ? codep + ofs : NULL;
}
c->fwd_labels[label] = EMPTYLABEL;
c->back_labels[label] = pcofs(c);
}
/* Creates a reference to a numbered label; either a forward reference
* (positive arg) or backward reference (negative arg). For forward references
* the value returned now is actually a "next" pointer into a linked list of all
* instructions that use this label and will be patched later when the label is
* defined with label().
*
* The returned value is the offset that should be written into the instruction.
*/
static int32_t labelref(compiler *c, int label) {
UPB_ASSERT(label < MAXLABEL);
if (label == LABEL_DISPATCH) {
/* No resolving required. */
return 0;
} else if (label < 0) {
/* Backward local label. Relative to the next instruction. */
uint32_t from = (c->pc + 1) - c->group->bytecode;
return c->back_labels[-label] - from;
} else {
/* Forward local label: prepend to (possibly-empty) linked list. */
int *lptr = &c->fwd_labels[label];
int32_t ret = (*lptr == EMPTYLABEL) ? 0 : *lptr - pcofs(c);
*lptr = pcofs(c);
return ret;
}
}
static void put32(compiler *c, uint32_t v) {
mgroup *g = c->group;
if (c->pc == g->bytecode_end) {
int ofs = pcofs(c);
size_t oldsize = g->bytecode_end - g->bytecode;
size_t newsize = UPB_MAX(oldsize * 2, 64);
/* TODO(haberman): handle OOM. */
g->bytecode = upb_grealloc(g->bytecode, oldsize * sizeof(uint32_t),
newsize * sizeof(uint32_t));
g->bytecode_end = g->bytecode + newsize;
c->pc = g->bytecode + ofs;
}
*c->pc++ = v;
}
static void putop(compiler *c, int op, ...) {
va_list ap;
va_start(ap, op);
switch (op) {
case OP_SETDISPATCH: {
uintptr_t ptr = (uintptr_t)va_arg(ap, void*);
put32(c, OP_SETDISPATCH);
put32(c, ptr);
if (sizeof(uintptr_t) > sizeof(uint32_t))
put32(c, (uint64_t)ptr >> 32);
break;
}
case OP_STARTMSG:
case OP_ENDMSG:
case OP_PUSHLENDELIM:
case OP_POP:
case OP_SETDELIM:
case OP_HALT:
case OP_RET:
case OP_DISPATCH:
put32(c, op);
break;
case OP_PARSE_DOUBLE:
case OP_PARSE_FLOAT:
case OP_PARSE_INT64:
case OP_PARSE_UINT64:
case OP_PARSE_INT32:
case OP_PARSE_FIXED64:
case OP_PARSE_FIXED32:
case OP_PARSE_BOOL:
case OP_PARSE_UINT32:
case OP_PARSE_SFIXED32:
case OP_PARSE_SFIXED64:
case OP_PARSE_SINT32:
case OP_PARSE_SINT64:
case OP_STARTSEQ:
case OP_ENDSEQ:
case OP_STARTSUBMSG:
case OP_ENDSUBMSG:
case OP_STARTSTR:
case OP_STRING:
case OP_ENDSTR:
case OP_PUSHTAGDELIM:
put32(c, op | va_arg(ap, upb_selector_t) << 8);
break;
case OP_SETBIGGROUPNUM:
put32(c, op);
put32(c, va_arg(ap, int));
break;
case OP_CALL: {
const upb_pbdecodermethod *method = va_arg(ap, upb_pbdecodermethod *);
put32(c, op | (method->code_base.ofs - (pcofs(c) + 1)) << 8);
break;
}
case OP_CHECKDELIM:
case OP_BRANCH: {
uint32_t instruction = op;
int label = va_arg(ap, int);
setofs(&instruction, labelref(c, label));
put32(c, instruction);
break;
}
case OP_TAG1:
case OP_TAG2: {
int label = va_arg(ap, int);
uint64_t tag = va_arg(ap, uint64_t);
uint32_t instruction = op | (tag << 16);
UPB_ASSERT(tag <= 0xffff);
setofs(&instruction, labelref(c, label));
put32(c, instruction);
break;
}
case OP_TAGN: {
int label = va_arg(ap, int);
uint64_t tag = va_arg(ap, uint64_t);
uint32_t instruction = op | (upb_value_size(tag) << 16);
setofs(&instruction, labelref(c, label));
put32(c, instruction);
put32(c, tag);
put32(c, tag >> 32);
break;
}
}
va_end(ap);
}
#if defined(UPB_DUMP_BYTECODE)
const char *upb_pbdecoder_getopname(unsigned int op) {
#define QUOTE(x) #x
#define EXPAND_AND_QUOTE(x) QUOTE(x)
#define OPNAME(x) OP_##x
#define OP(x) case OPNAME(x): return EXPAND_AND_QUOTE(OPNAME(x));
#define T(x) OP(PARSE_##x)
/* Keep in sync with list in decoder.int.h. */
switch ((opcode)op) {
T(DOUBLE) T(FLOAT) T(INT64) T(UINT64) T(INT32) T(FIXED64) T(FIXED32)
T(BOOL) T(UINT32) T(SFIXED32) T(SFIXED64) T(SINT32) T(SINT64)
OP(STARTMSG) OP(ENDMSG) OP(STARTSEQ) OP(ENDSEQ) OP(STARTSUBMSG)
OP(ENDSUBMSG) OP(STARTSTR) OP(STRING) OP(ENDSTR) OP(CALL) OP(RET)
OP(PUSHLENDELIM) OP(PUSHTAGDELIM) OP(SETDELIM) OP(CHECKDELIM)
OP(BRANCH) OP(TAG1) OP(TAG2) OP(TAGN) OP(SETDISPATCH) OP(POP)
OP(SETBIGGROUPNUM) OP(DISPATCH) OP(HALT)
}
return "<unknown op>";
#undef OP
#undef T
}
#endif
#ifdef UPB_DUMP_BYTECODE
static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) {
uint32_t *begin = p;
while (p < end) {
fprintf(f, "%p %8tx", p, p - begin);
uint32_t instr = *p++;
uint8_t op = getop(instr);
fprintf(f, " %s", upb_pbdecoder_getopname(op));
switch ((opcode)op) {
case OP_SETDISPATCH: {
const upb_inttable *dispatch;
memcpy(&dispatch, p, sizeof(void*));
p += ptr_words;
const upb_pbdecodermethod *method =
(void *)((char *)dispatch -
offsetof(upb_pbdecodermethod, dispatch));
fprintf(f, " %s", upb_msgdef_fullname(
upb_handlers_msgdef(method->dest_handlers_)));
break;
}
case OP_DISPATCH:
case OP_STARTMSG:
case OP_ENDMSG:
case OP_PUSHLENDELIM:
case OP_POP:
case OP_SETDELIM:
case OP_HALT:
case OP_RET:
break;
case OP_PARSE_DOUBLE:
case OP_PARSE_FLOAT:
case OP_PARSE_INT64:
case OP_PARSE_UINT64:
case OP_PARSE_INT32:
case OP_PARSE_FIXED64:
case OP_PARSE_FIXED32:
case OP_PARSE_BOOL:
case OP_PARSE_UINT32:
case OP_PARSE_SFIXED32:
case OP_PARSE_SFIXED64:
case OP_PARSE_SINT32:
case OP_PARSE_SINT64:
case OP_STARTSEQ:
case OP_ENDSEQ:
case OP_STARTSUBMSG:
case OP_ENDSUBMSG:
case OP_STARTSTR:
case OP_STRING:
case OP_ENDSTR:
case OP_PUSHTAGDELIM:
fprintf(f, " %d", instr >> 8);
break;
case OP_SETBIGGROUPNUM:
fprintf(f, " %d", *p++);
break;
case OP_CHECKDELIM:
case OP_CALL:
case OP_BRANCH:
fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
break;
case OP_TAG1:
case OP_TAG2: {
fprintf(f, " tag:0x%x", instr >> 16);
if (getofs(instr)) {
fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
}
break;
}
case OP_TAGN: {
uint64_t tag = *p++;
tag |= (uint64_t)*p++ << 32;
fprintf(f, " tag:0x%llx", (long long)tag);
fprintf(f, " n:%d", instr >> 16);
if (getofs(instr)) {
fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
}
break;
}
}
fputs("\n", f);
}
}
#endif
static uint64_t get_encoded_tag(const upb_fielddef *f, int wire_type) {
uint32_t tag = (upb_fielddef_number(f) << 3) | wire_type;
uint64_t encoded_tag = upb_vencode32(tag);
/* No tag should be greater than 5 bytes. */
UPB_ASSERT(encoded_tag <= 0xffffffffff);
return encoded_tag;
}
static void putchecktag(compiler *c, const upb_fielddef *f,
int wire_type, int dest) {
uint64_t tag = get_encoded_tag(f, wire_type);
switch (upb_value_size(tag)) {
case 1:
putop(c, OP_TAG1, dest, tag);
break;
case 2:
putop(c, OP_TAG2, dest, tag);
break;
default:
putop(c, OP_TAGN, dest, tag);
break;
}
}
static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
upb_selector_t selector;
bool ok = upb_handlers_getselector(f, type, &selector);
UPB_ASSERT(ok);
return selector;
}
/* Takes an existing, primary dispatch table entry and repacks it with a
* different alternate wire type. Called when we are inserting a secondary
* dispatch table entry for an alternate wire type. */
static uint64_t repack(uint64_t dispatch, int new_wt2) {
uint64_t ofs;
uint8_t wt1;
uint8_t old_wt2;
upb_pbdecoder_unpackdispatch(dispatch, &ofs, &wt1, &old_wt2);
UPB_ASSERT(old_wt2 == NO_WIRE_TYPE); /* wt2 should not be set yet. */
return upb_pbdecoder_packdispatch(ofs, wt1, new_wt2);
}
/* Marks the current bytecode position as the dispatch target for this message,
* field, and wire type. */
static void dispatchtarget(compiler *c, upb_pbdecodermethod *method,
const upb_fielddef *f, int wire_type) {
/* Offset is relative to msg base. */
uint64_t ofs = pcofs(c) - method->code_base.ofs;
uint32_t fn = upb_fielddef_number(f);
upb_inttable *d = &method->dispatch;
upb_value v;
if (upb_inttable_remove(d, fn, &v)) {
/* TODO: prioritize based on packed setting in .proto file. */
uint64_t repacked = repack(upb_value_getuint64(v), wire_type);
upb_inttable_insert(d, fn, upb_value_uint64(repacked));
upb_inttable_insert(d, fn + UPB_MAX_FIELDNUMBER, upb_value_uint64(ofs));
} else {
uint64_t val = upb_pbdecoder_packdispatch(ofs, wire_type, NO_WIRE_TYPE);
upb_inttable_insert(d, fn, upb_value_uint64(val));
}
}
static void putpush(compiler *c, const upb_fielddef *f) {
if (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) {
putop(c, OP_PUSHLENDELIM);
} else {
uint32_t fn = upb_fielddef_number(f);
if (fn >= 1 << 24) {
putop(c, OP_PUSHTAGDELIM, 0);
putop(c, OP_SETBIGGROUPNUM, fn);
} else {
putop(c, OP_PUSHTAGDELIM, fn);
}
}
}
static upb_pbdecodermethod *find_submethod(const compiler *c,
const upb_pbdecodermethod *method,
const upb_fielddef *f) {
const upb_handlers *sub =
upb_handlers_getsubhandlers(method->dest_handlers_, f);
upb_value v;
return upb_inttable_lookupptr(&c->group->methods, sub, &v)
? upb_value_getptr(v)
: NULL;
}
static void putsel(compiler *c, opcode op, upb_selector_t sel,
const upb_handlers *h) {
if (upb_handlers_gethandler(h, sel, NULL)) {
putop(c, op, sel);
}
}
/* Puts an opcode to call a callback, but only if a callback actually exists for
* this field and handler type. */
static void maybeput(compiler *c, opcode op, const upb_handlers *h,
const upb_fielddef *f, upb_handlertype_t type) {
putsel(c, op, getsel(f, type), h);
}
static bool haslazyhandlers(const upb_handlers *h, const upb_fielddef *f) {
if (!upb_fielddef_lazy(f))
return false;
return upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STARTSTR), NULL) ||
upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STRING), NULL) ||
upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_ENDSTR), NULL);
}
/* bytecode compiler code generation ******************************************/
/* Symbolic names for our local labels. */
#define LABEL_LOOPSTART 1 /* Top of a repeated field loop. */
#define LABEL_LOOPBREAK 2 /* To jump out of a repeated loop */
#define LABEL_FIELD 3 /* Jump backward to find the most recent field. */
#define LABEL_ENDMSG 4 /* To reach the OP_ENDMSG instr for this msg. */
/* Generates bytecode to parse a single non-lazy message field. */
static void generate_msgfield(compiler *c, const upb_fielddef *f,
upb_pbdecodermethod *method) {
const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
const upb_pbdecodermethod *sub_m = find_submethod(c, method, f);
int wire_type;
if (!sub_m) {
/* Don't emit any code for this field at all; it will be parsed as an
* unknown field.
*
* TODO(haberman): we should change this to parse it as a string field
* instead. It will probably be faster, but more importantly, once we
* start vending unknown fields, a field shouldn't be treated as unknown
* just because it doesn't have subhandlers registered. */
return;
}
label(c, LABEL_FIELD);
wire_type =
(upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE)
? UPB_WIRE_TYPE_DELIMITED
: UPB_WIRE_TYPE_START_GROUP;
if (upb_fielddef_isseq(f)) {
putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
putchecktag(c, f, wire_type, LABEL_DISPATCH);
dispatchtarget(c, method, f, wire_type);
putop(c, OP_PUSHTAGDELIM, 0);
putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
label(c, LABEL_LOOPSTART);
putpush(c, f);
putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
putop(c, OP_CALL, sub_m);
putop(c, OP_POP);
maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
putop(c, OP_SETDELIM);
}
putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
putop(c, OP_BRANCH, -LABEL_LOOPSTART);
label(c, LABEL_LOOPBREAK);
putop(c, OP_POP);
maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
} else {
putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
putchecktag(c, f, wire_type, LABEL_DISPATCH);
dispatchtarget(c, method, f, wire_type);
putpush(c, f);
putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
putop(c, OP_CALL, sub_m);
putop(c, OP_POP);
maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
putop(c, OP_SETDELIM);
}
}
}
/* Generates bytecode to parse a single string or lazy submessage field. */
static void generate_delimfield(compiler *c, const upb_fielddef *f,
upb_pbdecodermethod *method) {
const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
label(c, LABEL_FIELD);
if (upb_fielddef_isseq(f)) {
putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
putop(c, OP_PUSHTAGDELIM, 0);
putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
label(c, LABEL_LOOPSTART);
putop(c, OP_PUSHLENDELIM);
putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
/* Need to emit even if no handler to skip past the string. */
putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
putop(c, OP_POP);
putop(c, OP_SETDELIM);
putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK);
putop(c, OP_BRANCH, -LABEL_LOOPSTART);
label(c, LABEL_LOOPBREAK);
putop(c, OP_POP);
maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
} else {
putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
putop(c, OP_PUSHLENDELIM);
putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
putop(c, OP_POP);
putop(c, OP_SETDELIM);
}
}
/* Generates bytecode to parse a single primitive field. */
static void generate_primitivefield(compiler *c, const upb_fielddef *f,
upb_pbdecodermethod *method) {
const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
upb_descriptortype_t descriptor_type = upb_fielddef_descriptortype(f);
opcode parse_type;
upb_selector_t sel;
int wire_type;
label(c, LABEL_FIELD);
/* From a decoding perspective, ENUM is the same as INT32. */
if (descriptor_type == UPB_DESCRIPTOR_TYPE_ENUM)
descriptor_type = UPB_DESCRIPTOR_TYPE_INT32;
parse_type = (opcode)descriptor_type;
/* TODO(haberman): generate packed or non-packed first depending on "packed"
* setting in the fielddef. This will favor (in speed) whichever was
* specified. */
UPB_ASSERT((int)parse_type >= 0 && parse_type <= OP_MAX);
sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
wire_type = upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
if (upb_fielddef_isseq(f)) {
putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
putop(c, OP_PUSHLENDELIM);
putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Packed */
label(c, LABEL_LOOPSTART);
putop(c, parse_type, sel);
putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
putop(c, OP_BRANCH, -LABEL_LOOPSTART);
dispatchtarget(c, method, f, wire_type);
putop(c, OP_PUSHTAGDELIM, 0);
putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Non-packed */
label(c, LABEL_LOOPSTART);
putop(c, parse_type, sel);
putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
putop(c, OP_BRANCH, -LABEL_LOOPSTART);
label(c, LABEL_LOOPBREAK);
putop(c, OP_POP); /* Packed and non-packed join. */
maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
putop(c, OP_SETDELIM); /* Could remove for non-packed by dup ENDSEQ. */
} else {
putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
putchecktag(c, f, wire_type, LABEL_DISPATCH);
dispatchtarget(c, method, f, wire_type);
putop(c, parse_type, sel);
}
}
/* Adds bytecode for parsing the given message to the given decoderplan,
* while adding all dispatch targets to this message's dispatch table. */
static void compile_method(compiler *c, upb_pbdecodermethod *method) {
const upb_handlers *h;
const upb_msgdef *md;
uint32_t* start_pc;
upb_msg_field_iter i;
upb_value val;
UPB_ASSERT(method);
/* Clear all entries in the dispatch table. */
upb_inttable_uninit(&method->dispatch);
upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64);
h = upb_pbdecodermethod_desthandlers(method);
md = upb_handlers_msgdef(h);
method->code_base.ofs = pcofs(c);
putop(c, OP_SETDISPATCH, &method->dispatch);
putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h);
label(c, LABEL_FIELD);
start_pc = c->pc;
for(upb_msg_field_begin(&i, md);
!upb_msg_field_done(&i);
upb_msg_field_next(&i)) {
const upb_fielddef *f = upb_msg_iter_field(&i);
upb_fieldtype_t type = upb_fielddef_type(f);
if (type == UPB_TYPE_MESSAGE && !(haslazyhandlers(h, f) && c->lazy)) {
generate_msgfield(c, f, method);
} else if (type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES ||
type == UPB_TYPE_MESSAGE) {
generate_delimfield(c, f, method);
} else {
generate_primitivefield(c, f, method);
}
}
/* If there were no fields, or if no handlers were defined, we need to
* generate a non-empty loop body so that we can at least dispatch for unknown
* fields and check for the end of the message. */
if (c->pc == start_pc) {
/* Check for end-of-message. */
putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
/* Unconditionally dispatch. */
putop(c, OP_DISPATCH, 0);
}
/* For now we just loop back to the last field of the message (or if none,
* the DISPATCH opcode for the message). */
putop(c, OP_BRANCH, -LABEL_FIELD);
/* Insert both a label and a dispatch table entry for this end-of-msg. */
label(c, LABEL_ENDMSG);
val = upb_value_uint64(pcofs(c) - method->code_base.ofs);
upb_inttable_insert(&method->dispatch, DISPATCH_ENDMSG, val);
putsel(c, OP_ENDMSG, UPB_ENDMSG_SELECTOR, h);
putop(c, OP_RET);
upb_inttable_compact(&method->dispatch);
}
/* Populate "methods" with new upb_pbdecodermethod objects reachable from "h".
* Returns the method for these handlers.
*
* Generates a new method for every destination handlers reachable from "h". */
static void find_methods(compiler *c, const upb_handlers *h) {
upb_value v;
upb_msg_field_iter i;
const upb_msgdef *md;
upb_pbdecodermethod *method;
if (upb_inttable_lookupptr(&c->group->methods, h, &v))
return;
method = newmethod(h, c->group);
upb_inttable_insertptr(&c->group->methods, h, upb_value_ptr(method));
/* Find submethods. */
md = upb_handlers_msgdef(h);
for(upb_msg_field_begin(&i, md);
!upb_msg_field_done(&i);
upb_msg_field_next(&i)) {
const upb_fielddef *f = upb_msg_iter_field(&i);
const upb_handlers *sub_h;
if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
(sub_h = upb_handlers_getsubhandlers(h, f)) != NULL) {
/* We only generate a decoder method for submessages with handlers.
* Others will be parsed as unknown fields. */
find_methods(c, sub_h);
}
}
}
/* (Re-)compile bytecode for all messages in "msgs."
* Overwrites any existing bytecode in "c". */
static void compile_methods(compiler *c) {
upb_inttable_iter i;
/* Start over at the beginning of the bytecode. */
c->pc = c->group->bytecode;
upb_inttable_begin(&i, &c->group->methods);
for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
compile_method(c, method);
}
}
static void set_bytecode_handlers(mgroup *g) {
upb_inttable_iter i;
upb_inttable_begin(&i, &g->methods);
for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
upb_pbdecodermethod *m = upb_value_getptr(upb_inttable_iter_value(&i));
upb_byteshandler *h = &m->input_handler_;
m->code_base.ptr = g->bytecode + m->code_base.ofs;
upb_byteshandler_setstartstr(h, upb_pbdecoder_startbc, m->code_base.ptr);
upb_byteshandler_setstring(h, upb_pbdecoder_decode, g);
upb_byteshandler_setendstr(h, upb_pbdecoder_end, m);
}
}
/* TODO(haberman): allow this to be constructed for an arbitrary set of dest
* handlers and other mgroups (but verify we have a transitive closure). */
const mgroup *mgroup_new(const upb_handlers *dest, bool lazy) {
mgroup *g;
compiler *c;
g = newgroup();
c = newcompiler(g, lazy);
find_methods(c, dest);
/* We compile in two passes:
* 1. all messages are assigned relative offsets from the beginning of the
* bytecode (saved in method->code_base).
* 2. forwards OP_CALL instructions can be correctly linked since message
* offsets have been previously assigned.
*
* Could avoid the second pass by linking OP_CALL instructions somehow. */
compile_methods(c);
compile_methods(c);
g->bytecode_end = c->pc;
freecompiler(c);
#ifdef UPB_DUMP_BYTECODE
{
FILE *f = fopen("/tmp/upb-bytecode", "w");
UPB_ASSERT(f);
dumpbc(g->bytecode, g->bytecode_end, stderr);
dumpbc(g->bytecode, g->bytecode_end, f);
fclose(f);
f = fopen("/tmp/upb-bytecode.bin", "wb");
UPB_ASSERT(f);
fwrite(g->bytecode, 1, g->bytecode_end - g->bytecode, f);
fclose(f);
}
#endif
set_bytecode_handlers(g);
return g;
}
/* upb_pbcodecache ************************************************************/
upb_pbcodecache *upb_pbcodecache_new(upb_handlercache *dest) {
upb_pbcodecache *c = upb_gmalloc(sizeof(*c));
if (!c) return NULL;
c->dest = dest;
c->lazy = false;
c->arena = upb_arena_new();
if (!upb_inttable_init(&c->groups, UPB_CTYPE_CONSTPTR)) return NULL;
return c;
}
void upb_pbcodecache_free(upb_pbcodecache *c) {
upb_inttable_iter i;
upb_inttable_begin(&i, &c->groups);
for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
upb_value val = upb_inttable_iter_value(&i);
freegroup((void*)upb_value_getconstptr(val));
}
upb_inttable_uninit(&c->groups);
upb_arena_free(c->arena);
upb_gfree(c);
}
void upb_pbdecodermethodopts_setlazy(upb_pbcodecache *c, bool lazy) {
UPB_ASSERT(upb_inttable_count(&c->groups) == 0);
c->lazy = lazy;
}
const upb_pbdecodermethod *upb_pbcodecache_get(upb_pbcodecache *c,
const upb_msgdef *md) {
upb_value v;
bool ok;
const upb_handlers *h;
const mgroup *g;
h = upb_handlercache_get(c->dest, md);
if (upb_inttable_lookupptr(&c->groups, md, &v)) {
g = upb_value_getconstptr(v);
} else {
g = mgroup_new(h, c->lazy);
ok = upb_inttable_insertptr(&c->groups, md, upb_value_constptr(g));
UPB_ASSERT(ok);
}
ok = upb_inttable_lookupptr(&g->methods, h, &v);
UPB_ASSERT(ok);
return upb_value_getptr(v);
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,240 @@
/*
** upb::pb::Decoder
**
** A high performance, streaming, resumable decoder for the binary protobuf
** format.
**
** This interface works the same regardless of what decoder backend is being
** used. A client of this class does not need to know whether decoding is using
** a JITted decoder (DynASM, LLVM, etc) or an interpreted decoder. By default,
** it will always use the fastest available decoder. However, you can call
** set_allow_jit(false) to disable any JIT decoder that might be available.
** This is primarily useful for testing purposes.
*/
#ifndef UPB_DECODER_H_
#define UPB_DECODER_H_
#include "upb/sink.h"
#ifdef __cplusplus
namespace upb {
namespace pb {
class CodeCache;
class DecoderPtr;
class DecoderMethodPtr;
class DecoderMethodOptions;
} /* namespace pb */
} /* namespace upb */
#endif
/* The maximum number of bytes we are required to buffer internally between
* calls to the decoder. The value is 14: a 5 byte unknown tag plus ten-byte
* varint, less one because we are buffering an incomplete value.
*
* Should only be used by unit tests. */
#define UPB_DECODER_MAX_RESIDUAL_BYTES 14
/* upb_pbdecodermethod ********************************************************/
struct upb_pbdecodermethod;
typedef struct upb_pbdecodermethod upb_pbdecodermethod;
#ifdef __cplusplus
extern "C" {
#endif
const upb_handlers *upb_pbdecodermethod_desthandlers(
const upb_pbdecodermethod *m);
const upb_byteshandler *upb_pbdecodermethod_inputhandler(
const upb_pbdecodermethod *m);
bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m);
#ifdef __cplusplus
} /* extern "C" */
/* Represents the code to parse a protobuf according to a destination
* Handlers. */
class upb::pb::DecoderMethodPtr {
public:
DecoderMethodPtr() : ptr_(nullptr) {}
DecoderMethodPtr(const upb_pbdecodermethod* ptr) : ptr_(ptr) {}
const upb_pbdecodermethod* ptr() { return ptr_; }
/* The destination handlers that are statically bound to this method.
* This method is only capable of outputting to a sink that uses these
* handlers. */
const Handlers *dest_handlers() const {
return upb_pbdecodermethod_desthandlers(ptr_);
}
/* The input handlers for this decoder method. */
const BytesHandler* input_handler() const {
return upb_pbdecodermethod_inputhandler(ptr_);
}
/* Whether this method is native. */
bool is_native() const {
return upb_pbdecodermethod_isnative(ptr_);
}
private:
const upb_pbdecodermethod* ptr_;
};
#endif
/* upb_pbdecoder **************************************************************/
/* Preallocation hint: decoder won't allocate more bytes than this when first
* constructed. This hint may be an overestimate for some build configurations.
* But if the decoder library is upgraded without recompiling the application,
* it may be an underestimate. */
#define UPB_PB_DECODER_SIZE 4416
struct upb_pbdecoder;
typedef struct upb_pbdecoder upb_pbdecoder;
#ifdef __cplusplus
extern "C" {
#endif
upb_pbdecoder *upb_pbdecoder_create(upb_arena *arena,
const upb_pbdecodermethod *method,
upb_sink output, upb_status *status);
const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d);
upb_bytessink upb_pbdecoder_input(upb_pbdecoder *d);
uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d);
size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d);
bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max);
void upb_pbdecoder_reset(upb_pbdecoder *d);
#ifdef __cplusplus
} /* extern "C" */
/* A Decoder receives binary protobuf data on its input sink and pushes the
* decoded data to its output sink. */
class upb::pb::DecoderPtr {
public:
DecoderPtr() : ptr_(nullptr) {}
DecoderPtr(upb_pbdecoder* ptr) : ptr_(ptr) {}
upb_pbdecoder* ptr() { return ptr_; }
/* Constructs a decoder instance for the given method, which must outlive this
* decoder. Any errors during parsing will be set on the given status, which
* must also outlive this decoder.
*
* The sink must match the given method. */
static DecoderPtr Create(Arena *arena, DecoderMethodPtr method,
upb::Sink output, Status *status) {
return DecoderPtr(upb_pbdecoder_create(arena->ptr(), method.ptr(),
output.sink(), status->ptr()));
}
/* Returns the DecoderMethod this decoder is parsing from. */
const DecoderMethodPtr method() const {
return DecoderMethodPtr(upb_pbdecoder_method(ptr_));
}
/* The sink on which this decoder receives input. */
BytesSink input() { return BytesSink(upb_pbdecoder_input(ptr())); }
/* Returns number of bytes successfully parsed.
*
* This can be useful for determining the stream position where an error
* occurred.
*
* This value may not be up-to-date when called from inside a parsing
* callback. */
uint64_t BytesParsed() { return upb_pbdecoder_bytesparsed(ptr()); }
/* Gets/sets the parsing nexting limit. If the total number of nested
* submessages and repeated fields hits this limit, parsing will fail. This
* is a resource limit that controls the amount of memory used by the parsing
* stack.
*
* Setting the limit will fail if the parser is currently suspended at a depth
* greater than this, or if memory allocation of the stack fails. */
size_t max_nesting() { return upb_pbdecoder_maxnesting(ptr()); }
bool set_max_nesting(size_t max) { return upb_pbdecoder_maxnesting(ptr()); }
void Reset() { upb_pbdecoder_reset(ptr()); }
static const size_t kSize = UPB_PB_DECODER_SIZE;
private:
upb_pbdecoder *ptr_;
};
#endif /* __cplusplus */
/* upb_pbcodecache ************************************************************/
/* Lazily builds and caches decoder methods that will push data to the given
* handlers. The destination handlercache must outlive this object. */
struct upb_pbcodecache;
typedef struct upb_pbcodecache upb_pbcodecache;
#ifdef __cplusplus
extern "C" {
#endif
upb_pbcodecache *upb_pbcodecache_new(upb_handlercache *dest);
void upb_pbcodecache_free(upb_pbcodecache *c);
bool upb_pbcodecache_allowjit(const upb_pbcodecache *c);
void upb_pbcodecache_setallowjit(upb_pbcodecache *c, bool allow);
void upb_pbcodecache_setlazy(upb_pbcodecache *c, bool lazy);
const upb_pbdecodermethod *upb_pbcodecache_get(upb_pbcodecache *c,
const upb_msgdef *md);
#ifdef __cplusplus
} /* extern "C" */
/* A class for caching protobuf processing code, whether bytecode for the
* interpreted decoder or machine code for the JIT.
*
* This class is not thread-safe. */
class upb::pb::CodeCache {
public:
CodeCache(upb::HandlerCache *dest)
: ptr_(upb_pbcodecache_new(dest->ptr()), upb_pbcodecache_free) {}
CodeCache(CodeCache&&) = default;
CodeCache& operator=(CodeCache&&) = default;
upb_pbcodecache* ptr() { return ptr_.get(); }
const upb_pbcodecache* ptr() const { return ptr_.get(); }
/* Whether the cache is allowed to generate machine code. Defaults to true.
* There is no real reason to turn it off except for testing or if you are
* having a specific problem with the JIT.
*
* Note that allow_jit = true does not *guarantee* that the code will be JIT
* compiled. If this platform is not supported or the JIT was not compiled
* in, the code may still be interpreted. */
bool allow_jit() const { return upb_pbcodecache_allowjit(ptr()); }
/* This may only be called when the object is first constructed, and prior to
* any code generation. */
void set_allow_jit(bool allow) { upb_pbcodecache_setallowjit(ptr(), allow); }
/* Should the decoder push submessages to lazy handlers for fields that have
* them? The caller should set this iff the lazy handlers expect data that is
* in protobuf binary format and the caller wishes to lazy parse it. */
void set_lazy(bool lazy) { upb_pbcodecache_setlazy(ptr(), lazy); }
/* Returns a DecoderMethod that can push data to the given handlers.
* If a suitable method already exists, it will be returned from the cache. */
const DecoderMethodPtr Get(MessageDefPtr md) {
return DecoderMethodPtr(upb_pbcodecache_get(ptr(), md.ptr()));
}
private:
std::unique_ptr<upb_pbcodecache, decltype(&upb_pbcodecache_free)> ptr_;
};
#endif /* __cplusplus */
#endif /* UPB_DECODER_H_ */

@ -0,0 +1,288 @@
/*
** Internal-only definitions for the decoder.
*/
#ifndef UPB_DECODER_INT_H_
#define UPB_DECODER_INT_H_
#include "upb/def.h"
#include "upb/handlers.h"
#include "upb/pb/decoder.h"
#include "upb/sink.h"
#include "upb/table.int.h"
#include "upb/port_def.inc"
/* Opcode definitions. The canonical meaning of each opcode is its
* implementation in the interpreter (the JIT is written to match this).
*
* All instructions have the opcode in the low byte.
* Instruction format for most instructions is:
*
* +-------------------+--------+
* | arg (24) | op (8) |
* +-------------------+--------+
*
* Exceptions are indicated below. A few opcodes are multi-word. */
typedef enum {
/* Opcodes 1-8, 13, 15-18 parse their respective descriptor types.
* Arg for all of these is the upb selector for this field. */
#define T(type) OP_PARSE_ ## type = UPB_DESCRIPTOR_TYPE_ ## type
T(DOUBLE), T(FLOAT), T(INT64), T(UINT64), T(INT32), T(FIXED64), T(FIXED32),
T(BOOL), T(UINT32), T(SFIXED32), T(SFIXED64), T(SINT32), T(SINT64),
#undef T
OP_STARTMSG = 9, /* No arg. */
OP_ENDMSG = 10, /* No arg. */
OP_STARTSEQ = 11,
OP_ENDSEQ = 12,
OP_STARTSUBMSG = 14,
OP_ENDSUBMSG = 19,
OP_STARTSTR = 20,
OP_STRING = 21,
OP_ENDSTR = 22,
OP_PUSHTAGDELIM = 23, /* No arg. */
OP_PUSHLENDELIM = 24, /* No arg. */
OP_POP = 25, /* No arg. */
OP_SETDELIM = 26, /* No arg. */
OP_SETBIGGROUPNUM = 27, /* two words:
* | unused (24) | opc (8) |
* | groupnum (32) | */
OP_CHECKDELIM = 28,
OP_CALL = 29,
OP_RET = 30,
OP_BRANCH = 31,
/* Different opcodes depending on how many bytes expected. */
OP_TAG1 = 32, /* | match tag (16) | jump target (8) | opc (8) | */
OP_TAG2 = 33, /* | match tag (16) | jump target (8) | opc (8) | */
OP_TAGN = 34, /* three words: */
/* | unused (16) | jump target(8) | opc (8) | */
/* | match tag 1 (32) | */
/* | match tag 2 (32) | */
OP_SETDISPATCH = 35, /* N words: */
/* | unused (24) | opc | */
/* | upb_inttable* (32 or 64) | */
OP_DISPATCH = 36, /* No arg. */
OP_HALT = 37 /* No arg. */
} opcode;
#define OP_MAX OP_HALT
UPB_INLINE opcode getop(uint32_t instr) { return (opcode)(instr & 0xff); }
struct upb_pbcodecache {
upb_arena *arena;
upb_handlercache *dest;
bool allow_jit;
bool lazy;
/* Map of upb_msgdef -> mgroup. */
upb_inttable groups;
};
/* Method group; represents a set of decoder methods that had their code
* emitted together. Immutable once created. */
typedef struct {
/* Maps upb_msgdef/upb_handlers -> upb_pbdecodermethod. Owned by us.
*
* Ideally this would be on pbcodecache (if we were actually caching code).
* Right now we don't actually cache anything, which is wasteful. */
upb_inttable methods;
/* The bytecode for our methods, if any exists. Owned by us. */
uint32_t *bytecode;
uint32_t *bytecode_end;
} mgroup;
/* The maximum that any submessages can be nested. Matches proto2's limit.
* This specifies the size of the decoder's statically-sized array and therefore
* setting it high will cause the upb::pb::Decoder object to be larger.
*
* If necessary we can add a runtime-settable property to Decoder that allow
* this to be larger than the compile-time setting, but this would add
* complexity, particularly since we would have to decide how/if to give users
* the ability to set a custom memory allocation function. */
#define UPB_DECODER_MAX_NESTING 64
/* Internal-only struct used by the decoder. */
typedef struct {
/* Space optimization note: we store two pointers here that the JIT
* doesn't need at all; the upb_handlers* inside the sink and
* the dispatch table pointer. We can optimze so that the JIT uses
* smaller stack frames than the interpreter. The only thing we need
* to guarantee is that the fallback routines can find end_ofs. */
upb_sink sink;
/* The absolute stream offset of the end-of-frame delimiter.
* Non-delimited frames (groups and non-packed repeated fields) reuse the
* delimiter of their parent, even though the frame may not end there.
*
* NOTE: the JIT stores a slightly different value here for non-top frames.
* It stores the value relative to the end of the enclosed message. But the
* top frame is still stored the same way, which is important for ensuring
* that calls from the JIT into C work correctly. */
uint64_t end_ofs;
const uint32_t *base;
/* 0 indicates a length-delimited field.
* A positive number indicates a known group.
* A negative number indicates an unknown group. */
int32_t groupnum;
upb_inttable *dispatch; /* Not used by the JIT. */
} upb_pbdecoder_frame;
struct upb_pbdecodermethod {
/* While compiling, the base is relative in "ofs", after compiling it is
* absolute in "ptr". */
union {
uint32_t ofs; /* PC offset of method. */
void *ptr; /* Pointer to bytecode or machine code for this method. */
} code_base;
/* The decoder method group to which this method belongs. */
const mgroup *group;
/* Whether this method is native code or bytecode. */
bool is_native_;
/* The handler one calls to invoke this method. */
upb_byteshandler input_handler_;
/* The destination handlers this method is bound to. We own a ref. */
const upb_handlers *dest_handlers_;
/* Dispatch table -- used by both bytecode decoder and JIT when encountering a
* field number that wasn't the one we were expecting to see. See
* decoder.int.h for the layout of this table. */
upb_inttable dispatch;
};
struct upb_pbdecoder {
upb_arena *arena;
/* Our input sink. */
upb_bytessink input_;
/* The decoder method we are parsing with (owned). */
const upb_pbdecodermethod *method_;
size_t call_len;
const uint32_t *pc, *last;
/* Current input buffer and its stream offset. */
const char *buf, *ptr, *end, *checkpoint;
/* End of the delimited region, relative to ptr, NULL if not in this buf. */
const char *delim_end;
/* End of the delimited region, relative to ptr, end if not in this buf. */
const char *data_end;
/* Overall stream offset of "buf." */
uint64_t bufstart_ofs;
/* Buffer for residual bytes not parsed from the previous buffer. */
char residual[UPB_DECODER_MAX_RESIDUAL_BYTES];
char *residual_end;
/* Bytes of data that should be discarded from the input beore we start
* parsing again. We set this when we internally determine that we can
* safely skip the next N bytes, but this region extends past the current
* user buffer. */
size_t skip;
/* Stores the user buffer passed to our decode function. */
const char *buf_param;
size_t size_param;
const upb_bufhandle *handle;
/* Our internal stack. */
upb_pbdecoder_frame *stack, *top, *limit;
const uint32_t **callstack;
size_t stack_size;
upb_status *status;
};
/* Decoder entry points; used as handlers. */
void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint);
size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
size_t size, const upb_bufhandle *handle);
bool upb_pbdecoder_end(void *closure, const void *handler_data);
/* Decoder-internal functions that the JIT calls to handle fallback paths. */
int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
size_t size, const upb_bufhandle *handle);
size_t upb_pbdecoder_suspend(upb_pbdecoder *d);
int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, int32_t fieldnum,
uint8_t wire_type);
int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d, uint64_t expected);
int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d, uint64_t *u64);
int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32);
int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64);
void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg);
/* Error messages that are shared between the bytecode and JIT decoders. */
extern const char *kPbDecoderStackOverflow;
extern const char *kPbDecoderSubmessageTooLong;
/* Access to decoderplan members needed by the decoder. */
const char *upb_pbdecoder_getopname(unsigned int op);
/* A special label that means "do field dispatch for this message and branch to
* wherever that takes you." */
#define LABEL_DISPATCH 0
/* A special slot in the dispatch table that stores the epilogue (ENDMSG and/or
* RET) for branching to when we find an appropriate ENDGROUP tag. */
#define DISPATCH_ENDMSG 0
/* It's important to use this invalid wire type instead of 0 (which is a valid
* wire type). */
#define NO_WIRE_TYPE 0xff
/* The dispatch table layout is:
* [field number] -> [ 48-bit offset ][ 8-bit wt2 ][ 8-bit wt1 ]
*
* If wt1 matches, jump to the 48-bit offset. If wt2 matches, lookup
* (UPB_MAX_FIELDNUMBER + fieldnum) and jump there.
*
* We need two wire types because of packed/non-packed compatibility. A
* primitive repeated field can use either wire type and be valid. While we
* could key the table on fieldnum+wiretype, the table would be 8x sparser.
*
* Storing two wire types in the primary value allows us to quickly rule out
* the second wire type without needing to do a separate lookup (this case is
* less common than an unknown field). */
UPB_INLINE uint64_t upb_pbdecoder_packdispatch(uint64_t ofs, uint8_t wt1,
uint8_t wt2) {
return (ofs << 16) | (wt2 << 8) | wt1;
}
UPB_INLINE void upb_pbdecoder_unpackdispatch(uint64_t dispatch, uint64_t *ofs,
uint8_t *wt1, uint8_t *wt2) {
*wt1 = (uint8_t)dispatch;
*wt2 = (uint8_t)(dispatch >> 8);
*ofs = dispatch >> 16;
}
/* All of the functions in decoder.c that return int32_t return values according
* to the following scheme:
* 1. negative values indicate a return code from the following list.
* 2. positive values indicate that error or end of buffer was hit, and
* that the decode function should immediately return the given value
* (the decoder state has already been suspended and is ready to be
* resumed). */
#define DECODE_OK -1
#define DECODE_MISMATCH -2 /* Used only from checktag_slow(). */
#define DECODE_ENDGROUP -3 /* Used only from checkunknown(). */
#define CHECK_RETURN(x) { int32_t ret = x; if (ret >= 0) return ret; }
#include "upb/port_undef.inc"
#endif /* UPB_DECODER_INT_H_ */

@ -0,0 +1,570 @@
/*
** upb::Encoder
**
** Since we are implementing pure handlers (ie. without any out-of-band access
** to pre-computed lengths), we have to buffer all submessages before we can
** emit even their first byte.
**
** Not knowing the size of submessages also means we can't write a perfect
** zero-copy implementation, even with buffering. Lengths are stored as
** varints, which means that we don't know how many bytes to reserve for the
** length until we know what the length is.
**
** This leaves us with three main choices:
**
** 1. buffer all submessage data in a temporary buffer, then copy it exactly
** once into the output buffer.
**
** 2. attempt to buffer data directly into the output buffer, estimating how
** many bytes each length will take. When our guesses are wrong, use
** memmove() to grow or shrink the allotted space.
**
** 3. buffer directly into the output buffer, allocating a max length
** ahead-of-time for each submessage length. If we overallocated, we waste
** space, but no memcpy() or memmove() is required. This approach requires
** defining a maximum size for submessages and rejecting submessages that
** exceed that size.
**
** (2) and (3) have the potential to have better performance, but they are more
** complicated and subtle to implement:
**
** (3) requires making an arbitrary choice of the maximum message size; it
** wastes space when submessages are shorter than this and fails
** completely when they are longer. This makes it more finicky and
** requires configuration based on the input. It also makes it impossible
** to perfectly match the output of reference encoders that always use the
** optimal amount of space for each length.
**
** (2) requires guessing the the size upfront, and if multiple lengths are
** guessed wrong the minimum required number of memmove() operations may
** be complicated to compute correctly. Implemented properly, it may have
** a useful amortized or average cost, but more investigation is required
** to determine this and what the optimal algorithm is to achieve it.
**
** (1) makes you always pay for exactly one copy, but its implementation is
** the simplest and its performance is predictable.
**
** So for now, we implement (1) only. If we wish to optimize later, we should
** be able to do it without affecting users.
**
** The strategy is to buffer the segments of data that do *not* depend on
** unknown lengths in one buffer, and keep a separate buffer of segment pointers
** and lengths. When the top-level submessage ends, we can go beginning to end,
** alternating the writing of lengths with memcpy() of the rest of the data.
** At the top level though, no buffering is required.
*/
#include "upb/pb/encoder.h"
#include "upb/pb/varint.int.h"
#include "upb/port_def.inc"
/* The output buffer is divided into segments; a segment is a string of data
* that is "ready to go" -- it does not need any varint lengths inserted into
* the middle. The seams between segments are where varints will be inserted
* once they are known.
*
* We also use the concept of a "run", which is a range of encoded bytes that
* occur at a single submessage level. Every segment contains one or more runs.
*
* A segment can span messages. Consider:
*
* .--Submessage lengths---------.
* | | |
* | V V
* V | |--------------- | |-----------------
* Submessages: | |-----------------------------------------------
* Top-level msg: ------------------------------------------------------------
*
* Segments: ----- ------------------- -----------------
* Runs: *---- *--------------*--- *----------------
* (* marks the start)
*
* Note that the top-level menssage is not in any segment because it does not
* have any length preceding it.
*
* A segment is only interrupted when another length needs to be inserted. So
* observe how the second segment spans both the inner submessage and part of
* the next enclosing message. */
typedef struct {
uint32_t msglen; /* The length to varint-encode before this segment. */
uint32_t seglen; /* Length of the segment. */
} upb_pb_encoder_segment;
struct upb_pb_encoder {
upb_arena *arena;
/* Our input and output. */
upb_sink input_;
upb_bytessink output_;
/* The "subclosure" -- used as the inner closure as part of the bytessink
* protocol. */
void *subc;
/* The output buffer and limit, and our current write position. "buf"
* initially points to "initbuf", but is dynamically allocated if we need to
* grow beyond the initial size. */
char *buf, *ptr, *limit;
/* The beginning of the current run, or undefined if we are at the top
* level. */
char *runbegin;
/* The list of segments we are accumulating. */
upb_pb_encoder_segment *segbuf, *segptr, *seglimit;
/* The stack of enclosing submessages. Each entry in the stack points to the
* segment where this submessage's length is being accumulated. */
int *stack, *top, *stacklimit;
/* Depth of startmsg/endmsg calls. */
int depth;
};
/* low-level buffering ********************************************************/
/* Low-level functions for interacting with the output buffer. */
/* TODO(haberman): handle pushback */
static void putbuf(upb_pb_encoder *e, const char *buf, size_t len) {
size_t n = upb_bytessink_putbuf(e->output_, e->subc, buf, len, NULL);
UPB_ASSERT(n == len);
}
static upb_pb_encoder_segment *top(upb_pb_encoder *e) {
return &e->segbuf[*e->top];
}
/* Call to ensure that at least "bytes" bytes are available for writing at
* e->ptr. Returns false if the bytes could not be allocated. */
static bool reserve(upb_pb_encoder *e, size_t bytes) {
if ((size_t)(e->limit - e->ptr) < bytes) {
/* Grow buffer. */
char *new_buf;
size_t needed = bytes + (e->ptr - e->buf);
size_t old_size = e->limit - e->buf;
size_t new_size = old_size;
while (new_size < needed) {
new_size *= 2;
}
new_buf = upb_arena_realloc(e->arena, e->buf, old_size, new_size);
if (new_buf == NULL) {
return false;
}
e->ptr = new_buf + (e->ptr - e->buf);
e->runbegin = new_buf + (e->runbegin - e->buf);
e->limit = new_buf + new_size;
e->buf = new_buf;
}
return true;
}
/* Call when "bytes" bytes have been writte at e->ptr. The caller *must* have
* previously called reserve() with at least this many bytes. */
static void encoder_advance(upb_pb_encoder *e, size_t bytes) {
UPB_ASSERT((size_t)(e->limit - e->ptr) >= bytes);
e->ptr += bytes;
}
/* Call when all of the bytes for a handler have been written. Flushes the
* bytes if possible and necessary, returning false if this failed. */
static bool commit(upb_pb_encoder *e) {
if (!e->top) {
/* We aren't inside a delimited region. Flush our accumulated bytes to
* the output.
*
* TODO(haberman): in the future we may want to delay flushing for
* efficiency reasons. */
putbuf(e, e->buf, e->ptr - e->buf);
e->ptr = e->buf;
}
return true;
}
/* Writes the given bytes to the buffer, handling reserve/advance. */
static bool encode_bytes(upb_pb_encoder *e, const void *data, size_t len) {
if (!reserve(e, len)) {
return false;
}
memcpy(e->ptr, data, len);
encoder_advance(e, len);
return true;
}
/* Finish the current run by adding the run totals to the segment and message
* length. */
static void accumulate(upb_pb_encoder *e) {
size_t run_len;
UPB_ASSERT(e->ptr >= e->runbegin);
run_len = e->ptr - e->runbegin;
e->segptr->seglen += run_len;
top(e)->msglen += run_len;
e->runbegin = e->ptr;
}
/* Call to indicate the start of delimited region for which the full length is
* not yet known. All data will be buffered until the length is known.
* Delimited regions may be nested; their lengths will all be tracked properly. */
static bool start_delim(upb_pb_encoder *e) {
if (e->top) {
/* We are already buffering, advance to the next segment and push it on the
* stack. */
accumulate(e);
if (++e->top == e->stacklimit) {
/* TODO(haberman): grow stack? */
return false;
}
if (++e->segptr == e->seglimit) {
/* Grow segment buffer. */
size_t old_size =
(e->seglimit - e->segbuf) * sizeof(upb_pb_encoder_segment);
size_t new_size = old_size * 2;
upb_pb_encoder_segment *new_buf =
upb_arena_realloc(e->arena, e->segbuf, old_size, new_size);
if (new_buf == NULL) {
return false;
}
e->segptr = new_buf + (e->segptr - e->segbuf);
e->seglimit = new_buf + (new_size / sizeof(upb_pb_encoder_segment));
e->segbuf = new_buf;
}
} else {
/* We were previously at the top level, start buffering. */
e->segptr = e->segbuf;
e->top = e->stack;
e->runbegin = e->ptr;
}
*e->top = e->segptr - e->segbuf;
e->segptr->seglen = 0;
e->segptr->msglen = 0;
return true;
}
/* Call to indicate the end of a delimited region. We now know the length of
* the delimited region. If we are not nested inside any other delimited
* regions, we can now emit all of the buffered data we accumulated. */
static bool end_delim(upb_pb_encoder *e) {
size_t msglen;
accumulate(e);
msglen = top(e)->msglen;
if (e->top == e->stack) {
/* All lengths are now available, emit all buffered data. */
char buf[UPB_PB_VARINT_MAX_LEN];
upb_pb_encoder_segment *s;
const char *ptr = e->buf;
for (s = e->segbuf; s <= e->segptr; s++) {
size_t lenbytes = upb_vencode64(s->msglen, buf);
putbuf(e, buf, lenbytes);
putbuf(e, ptr, s->seglen);
ptr += s->seglen;
}
e->ptr = e->buf;
e->top = NULL;
} else {
/* Need to keep buffering; propagate length info into enclosing
* submessages. */
--e->top;
top(e)->msglen += msglen + upb_varint_size(msglen);
}
return true;
}
/* tag_t **********************************************************************/
/* A precomputed (pre-encoded) tag and length. */
typedef struct {
uint8_t bytes;
char tag[7];
} tag_t;
/* Allocates a new tag for this field, and sets it in these handlerattr. */
static void new_tag(upb_handlers *h, const upb_fielddef *f, upb_wiretype_t wt,
upb_handlerattr *attr) {
uint32_t n = upb_fielddef_number(f);
tag_t *tag = upb_gmalloc(sizeof(tag_t));
tag->bytes = upb_vencode64((n << 3) | wt, tag->tag);
attr->handler_data = tag;
upb_handlers_addcleanup(h, tag, upb_gfree);
}
static bool encode_tag(upb_pb_encoder *e, const tag_t *tag) {
return encode_bytes(e, tag->tag, tag->bytes);
}
/* encoding of wire types *****************************************************/
static bool encode_fixed64(upb_pb_encoder *e, uint64_t val) {
/* TODO(haberman): byte-swap for big endian. */
return encode_bytes(e, &val, sizeof(uint64_t));
}
static bool encode_fixed32(upb_pb_encoder *e, uint32_t val) {
/* TODO(haberman): byte-swap for big endian. */
return encode_bytes(e, &val, sizeof(uint32_t));
}
static bool encode_varint(upb_pb_encoder *e, uint64_t val) {
if (!reserve(e, UPB_PB_VARINT_MAX_LEN)) {
return false;
}
encoder_advance(e, upb_vencode64(val, e->ptr));
return true;
}
static uint64_t dbl2uint64(double d) {
uint64_t ret;
memcpy(&ret, &d, sizeof(uint64_t));
return ret;
}
static uint32_t flt2uint32(float d) {
uint32_t ret;
memcpy(&ret, &d, sizeof(uint32_t));
return ret;
}
/* encoding of proto types ****************************************************/
static bool startmsg(void *c, const void *hd) {
upb_pb_encoder *e = c;
UPB_UNUSED(hd);
if (e->depth++ == 0) {
upb_bytessink_start(e->output_, 0, &e->subc);
}
return true;
}
static bool endmsg(void *c, const void *hd, upb_status *status) {
upb_pb_encoder *e = c;
UPB_UNUSED(hd);
UPB_UNUSED(status);
if (--e->depth == 0) {
upb_bytessink_end(e->output_);
}
return true;
}
static void *encode_startdelimfield(void *c, const void *hd) {
bool ok = encode_tag(c, hd) && commit(c) && start_delim(c);
return ok ? c : UPB_BREAK;
}
static bool encode_unknown(void *c, const void *hd, const char *buf,
size_t len) {
UPB_UNUSED(hd);
return encode_bytes(c, buf, len) && commit(c);
}
static bool encode_enddelimfield(void *c, const void *hd) {
UPB_UNUSED(hd);
return end_delim(c);
}
static void *encode_startgroup(void *c, const void *hd) {
return (encode_tag(c, hd) && commit(c)) ? c : UPB_BREAK;
}
static bool encode_endgroup(void *c, const void *hd) {
return encode_tag(c, hd) && commit(c);
}
static void *encode_startstr(void *c, const void *hd, size_t size_hint) {
UPB_UNUSED(size_hint);
return encode_startdelimfield(c, hd);
}
static size_t encode_strbuf(void *c, const void *hd, const char *buf,
size_t len, const upb_bufhandle *h) {
UPB_UNUSED(hd);
UPB_UNUSED(h);
return encode_bytes(c, buf, len) ? len : 0;
}
#define T(type, ctype, convert, encode) \
static bool encode_scalar_##type(void *e, const void *hd, ctype val) { \
return encode_tag(e, hd) && encode(e, (convert)(val)) && commit(e); \
} \
static bool encode_packed_##type(void *e, const void *hd, ctype val) { \
UPB_UNUSED(hd); \
return encode(e, (convert)(val)); \
}
T(double, double, dbl2uint64, encode_fixed64)
T(float, float, flt2uint32, encode_fixed32)
T(int64, int64_t, uint64_t, encode_varint)
T(int32, int32_t, int64_t, encode_varint)
T(fixed64, uint64_t, uint64_t, encode_fixed64)
T(fixed32, uint32_t, uint32_t, encode_fixed32)
T(bool, bool, bool, encode_varint)
T(uint32, uint32_t, uint32_t, encode_varint)
T(uint64, uint64_t, uint64_t, encode_varint)
T(enum, int32_t, uint32_t, encode_varint)
T(sfixed32, int32_t, uint32_t, encode_fixed32)
T(sfixed64, int64_t, uint64_t, encode_fixed64)
T(sint32, int32_t, upb_zzenc_32, encode_varint)
T(sint64, int64_t, upb_zzenc_64, encode_varint)
#undef T
/* code to build the handlers *************************************************/
#include <stdio.h>
static void newhandlers_callback(const void *closure, upb_handlers *h) {
const upb_msgdef *m;
upb_msg_field_iter i;
UPB_UNUSED(closure);
upb_handlers_setstartmsg(h, startmsg, NULL);
upb_handlers_setendmsg(h, endmsg, NULL);
upb_handlers_setunknown(h, encode_unknown, NULL);
m = upb_handlers_msgdef(h);
for(upb_msg_field_begin(&i, m);
!upb_msg_field_done(&i);
upb_msg_field_next(&i)) {
const upb_fielddef *f = upb_msg_iter_field(&i);
bool packed = upb_fielddef_isseq(f) && upb_fielddef_isprimitive(f) &&
upb_fielddef_packed(f);
upb_handlerattr attr = UPB_HANDLERATTR_INIT;
upb_wiretype_t wt =
packed ? UPB_WIRE_TYPE_DELIMITED
: upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
/* Pre-encode the tag for this field. */
new_tag(h, f, wt, &attr);
if (packed) {
upb_handlers_setstartseq(h, f, encode_startdelimfield, &attr);
upb_handlers_setendseq(h, f, encode_enddelimfield, &attr);
}
#define T(upper, lower, upbtype) \
case UPB_DESCRIPTOR_TYPE_##upper: \
if (packed) { \
upb_handlers_set##upbtype(h, f, encode_packed_##lower, &attr); \
} else { \
upb_handlers_set##upbtype(h, f, encode_scalar_##lower, &attr); \
} \
break;
switch (upb_fielddef_descriptortype(f)) {
T(DOUBLE, double, double);
T(FLOAT, float, float);
T(INT64, int64, int64);
T(INT32, int32, int32);
T(FIXED64, fixed64, uint64);
T(FIXED32, fixed32, uint32);
T(BOOL, bool, bool);
T(UINT32, uint32, uint32);
T(UINT64, uint64, uint64);
T(ENUM, enum, int32);
T(SFIXED32, sfixed32, int32);
T(SFIXED64, sfixed64, int64);
T(SINT32, sint32, int32);
T(SINT64, sint64, int64);
case UPB_DESCRIPTOR_TYPE_STRING:
case UPB_DESCRIPTOR_TYPE_BYTES:
upb_handlers_setstartstr(h, f, encode_startstr, &attr);
upb_handlers_setendstr(h, f, encode_enddelimfield, &attr);
upb_handlers_setstring(h, f, encode_strbuf, &attr);
break;
case UPB_DESCRIPTOR_TYPE_MESSAGE:
upb_handlers_setstartsubmsg(h, f, encode_startdelimfield, &attr);
upb_handlers_setendsubmsg(h, f, encode_enddelimfield, &attr);
break;
case UPB_DESCRIPTOR_TYPE_GROUP: {
/* Endgroup takes a different tag (wire_type = END_GROUP). */
upb_handlerattr attr2 = UPB_HANDLERATTR_INIT;
new_tag(h, f, UPB_WIRE_TYPE_END_GROUP, &attr2);
upb_handlers_setstartsubmsg(h, f, encode_startgroup, &attr);
upb_handlers_setendsubmsg(h, f, encode_endgroup, &attr2);
break;
}
}
#undef T
}
}
void upb_pb_encoder_reset(upb_pb_encoder *e) {
e->segptr = NULL;
e->top = NULL;
e->depth = 0;
}
/* public API *****************************************************************/
upb_handlercache *upb_pb_encoder_newcache(void) {
return upb_handlercache_new(newhandlers_callback, NULL);
}
upb_pb_encoder *upb_pb_encoder_create(upb_arena *arena, const upb_handlers *h,
upb_bytessink output) {
const size_t initial_bufsize = 256;
const size_t initial_segbufsize = 16;
/* TODO(haberman): make this configurable. */
const size_t stack_size = 64;
#ifndef NDEBUG
const size_t size_before = upb_arena_bytesallocated(arena);
#endif
upb_pb_encoder *e = upb_arena_malloc(arena, sizeof(upb_pb_encoder));
if (!e) return NULL;
e->buf = upb_arena_malloc(arena, initial_bufsize);
e->segbuf = upb_arena_malloc(arena, initial_segbufsize * sizeof(*e->segbuf));
e->stack = upb_arena_malloc(arena, stack_size * sizeof(*e->stack));
if (!e->buf || !e->segbuf || !e->stack) {
return NULL;
}
e->limit = e->buf + initial_bufsize;
e->seglimit = e->segbuf + initial_segbufsize;
e->stacklimit = e->stack + stack_size;
upb_pb_encoder_reset(e);
upb_sink_reset(&e->input_, h, e);
e->arena = arena;
e->output_ = output;
e->subc = output.closure;
e->ptr = e->buf;
/* If this fails, increase the value in encoder.h. */
UPB_ASSERT_DEBUGVAR(upb_arena_bytesallocated(arena) - size_before <=
UPB_PB_ENCODER_SIZE);
return e;
}
upb_sink upb_pb_encoder_input(upb_pb_encoder *e) { return e->input_; }

@ -0,0 +1,83 @@
/*
** upb::pb::Encoder (upb_pb_encoder)
**
** Implements a set of upb_handlers that write protobuf data to the binary wire
** format.
**
** This encoder implementation does not have any access to any out-of-band or
** precomputed lengths for submessages, so it must buffer submessages internally
** before it can emit the first byte.
*/
#ifndef UPB_ENCODER_H_
#define UPB_ENCODER_H_
#include "upb/sink.h"
#ifdef __cplusplus
namespace upb {
namespace pb {
class EncoderPtr;
} /* namespace pb */
} /* namespace upb */
#endif
#define UPB_PBENCODER_MAX_NESTING 100
/* upb_pb_encoder *************************************************************/
/* Preallocation hint: decoder won't allocate more bytes than this when first
* constructed. This hint may be an overestimate for some build configurations.
* But if the decoder library is upgraded without recompiling the application,
* it may be an underestimate. */
#define UPB_PB_ENCODER_SIZE 784
struct upb_pb_encoder;
typedef struct upb_pb_encoder upb_pb_encoder;
#ifdef __cplusplus
extern "C" {
#endif
upb_sink upb_pb_encoder_input(upb_pb_encoder *p);
upb_pb_encoder* upb_pb_encoder_create(upb_arena* a, const upb_handlers* h,
upb_bytessink output);
/* Lazily builds and caches handlers that will push encoded data to a bytessink.
* Any msgdef objects used with this object must outlive it. */
upb_handlercache *upb_pb_encoder_newcache(void);
#ifdef __cplusplus
} /* extern "C" { */
class upb::pb::EncoderPtr {
public:
EncoderPtr(upb_pb_encoder* ptr) : ptr_(ptr) {}
upb_pb_encoder* ptr() { return ptr_; }
/* Creates a new encoder in the given environment. The Handlers must have
* come from NewHandlers() below. */
static EncoderPtr Create(Arena* arena, const Handlers* handlers,
BytesSink output) {
return EncoderPtr(
upb_pb_encoder_create(arena->ptr(), handlers, output.sink()));
}
/* The input to the encoder. */
upb::Sink input() { return upb_pb_encoder_input(ptr()); }
/* Creates a new set of handlers for this MessageDef. */
static HandlerCache NewCache() {
return HandlerCache(upb_pb_encoder_newcache());
}
static const size_t kSize = UPB_PB_ENCODER_SIZE;
private:
upb_pb_encoder* ptr_;
};
#endif /* __cplusplus */
#endif /* UPB_ENCODER_H_ */

@ -0,0 +1,36 @@
#!/usr/bin/ruby
puts "set width 0
set height 0
set verbose off\n\n"
IO.popen("nm -S /tmp/upb-jit-code.so").each_line { |line|
# Input lines look like this:
# 000000000000575a T X.0x10.OP_CHECKDELIM
#
# For each one we want to emit a command that looks like:
# b X.0x10.OP_CHECKDELIM
# commands
# silent
# printf "buf_ofs=%d data_rem=%d delim_rem=%d X.0x10.OP_CHECKDELIM\n", $rbx - (long)((upb_pbdecoder*)($r15))->buf, $r12 - $rbx, $rbp - $rbx
# continue
# end
parts = line.split
next if parts[1] != "T"
sym = parts[2]
next if sym !~ /X\./;
if sym =~ /OP_/ then
printcmd = "printf \"buf_ofs=%d data_rem=%d delim_rem=%d #{sym}\\n\", $rbx - (long)((upb_pbdecoder*)($r15))->buf, $r12 - $rbx, $rbp - $rbx"
elsif sym =~ /enterjit/ then
printcmd = "printf \"#{sym} bytes=%d\\n\", $rcx"
else
printcmd = "printf \"#{sym}\\n\""
end
puts "b #{sym}
commands
silent
#{printcmd}
continue
end\n\n"
}

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save