Merge branch 'master' into optdefload

4 years ago · 15e20636c3
parent 4f901b6430 901744a97e
commit 15e20636c3
53 changed files with 3037 additions and 1215 deletions
--- a/652
+++ b/652
@ -1,13 +1,12 @@
 load(
    "//bazel:build_defs.bzl",
-    "generated_file_staleness_test",
-    "licenses",  # copybara:strip_for_google3
-    "make_shell_script",
-    "upb_amalgamation",
+    "UPB_DEFAULT_COPTS",
+    "upb_amalgamation",  # copybara:strip_for_google3
 )
 load(
    "//bazel:upb_proto_library.bzl",
    "upb_proto_library",
+    "upb_proto_library_copts",
    "upb_proto_reflection_library",
 )

@ -16,10 +15,7 @@ load(
    "@rules_proto//proto:defs.bzl",
    "proto_library",
 )
-load(
-    "//:upb/bindings/lua/lua_proto_library.bzl",
-    "lua_proto_library",
-)
+
 # copybara:strip_end

 licenses(["notice"])  # BSD (Google-authored w/ possible external contributions)
@ -29,23 +25,6 @@ exports_files([
    "build_defs",
 ])

-CPPOPTS = [
-    # copybara:strip_for_google3_begin
-    "-Wextra",
-    # "-Wshorten-64-to-32",  # not in GCC (and my Kokoro images doesn't have Clang)
-    "-Werror",
-    "-Wno-long-long",
-    # copybara:strip_end
-]
-
-COPTS = CPPOPTS + [
-    # copybara:strip_for_google3_begin
-    "-pedantic",
-    "-Werror=pedantic",
-    "-Wstrict-prototypes",
-    # copybara:strip_end
-]
-
 config_setting(
    name = "darwin",
    values = {"cpu": "darwin"},
@ -57,9 +36,10 @@ config_setting(
    constraint_values = ["@bazel_tools//platforms:windows"],
 )

-config_setting(
-    name = "fuzz",
-    values = {"define": "fuzz=true"},
+upb_proto_library_copts(
+    name = "upb_proto_library_copts__for_generated_code_only_do_not_use",
+    copts = UPB_DEFAULT_COPTS,
+    visibility = ["//visibility:public"],
 )

 # Public C/C++ libraries #######################################################
@ -69,10 +49,12 @@ cc_library(
    srcs = [
        "upb/port.c",
    ],
+    copts = UPB_DEFAULT_COPTS,
    textual_hdrs = [
        "upb/port_def.inc",
        "upb/port_undef.inc",
    ],
+    visibility = ["//tests:__pkg__"],
 )

 cc_library(
@ -94,10 +76,7 @@ cc_library(
        "upb/upb.h",
        "upb/upb.hpp",
    ],
-    copts = select({
-        ":windows": [],
-        "//conditions:default": COPTS,
-    }),
+    copts = UPB_DEFAULT_COPTS,
    visibility = ["//visibility:public"],
    deps = [":port"],
 )
@ -115,10 +94,7 @@ cc_library(
        "upb/port_def.inc",
        "upb/port_undef.inc",
    ],
-    copts = select({
-        ":windows": [],
-        "//conditions:default": COPTS,
-    }),
+    copts = UPB_DEFAULT_COPTS,
    visibility = ["//visibility:public"],
    deps = [
        ":table",
@ -150,10 +126,7 @@ cc_library(
        "upb/def.hpp",
        "upb/reflection.h",
    ],
-    copts = select({
-        ":windows": [],
-        "//conditions:default": COPTS,
-    }),
+    copts = UPB_DEFAULT_COPTS,
    visibility = ["//visibility:public"],
    deps = [
        ":descriptor_upb_proto",
@ -171,10 +144,7 @@ cc_library(
    hdrs = [
        "upb/text_encode.h",
    ],
-    copts = select({
-        ":windows": [],
-        "//conditions:default": COPTS,
-    }),
+    copts = UPB_DEFAULT_COPTS,
    visibility = ["//visibility:public"],
    deps = [
        ":port",
@ -192,10 +162,8 @@ cc_library(
        "upb/json_decode.h",
        "upb/json_encode.h",
    ],
-    copts = select({
-        ":windows": [],
-        "//conditions:default": COPTS,
-    }),
+    copts = UPB_DEFAULT_COPTS,
+    visibility = ["//tests:__pkg__"],
    deps = [
        ":port",
        ":reflection",
@ -208,6 +176,7 @@ cc_library(
 cc_library(
    name = "table",
    hdrs = ["upb/table.int.h"],
+    visibility = ["//tests:__pkg__"],
    deps = [
        ":port",
        ":upb",
@ -227,10 +196,8 @@ cc_library(
        "upb/handlers.h",
        "upb/sink.h",
    ],
-    copts = select({
-        ":windows": [],
-        "//conditions:default": COPTS,
-    }),
+    copts = UPB_DEFAULT_COPTS,
+    visibility = ["//tests:__pkg__"],
    deps = [
        ":port",
        ":reflection",
@ -255,10 +222,8 @@ cc_library(
        "upb/pb/encoder.h",
        "upb/pb/textprinter.h",
    ],
-    copts = select({
-        ":windows": [],
-        "//conditions:default": COPTS,
-    }),
+    copts = UPB_DEFAULT_COPTS,
+    visibility = ["//tests:__pkg__"],
    deps = [
        ":descriptor_upb_proto",
        ":handlers",
@ -280,401 +245,23 @@ cc_library(
        "upb/json/parser.h",
        "upb/json/printer.h",
    ],
-    copts = select({
-        ":windows": [],
-        "//conditions:default": COPTS,
-    }),
-    deps = [
-        ":upb",
-        ":upb_pb",
-    ],
-)
-# copybara:strip_end
-
-cc_library(
-    name = "upb_cc_bindings",
-    hdrs = [
-        "upb/bindings/stdc++/string.h",
-    ],
-    deps = [
-        ":descriptor_upb_proto",
-        ":handlers",
-        ":port",
-        ":upb",
-    ],
-)
-
-# upb compiler #################################################################
-
-cc_library(
-    name = "upbc_generator",
-    srcs = [
-        "upbc/generator.cc",
-        "upbc/message_layout.cc",
-        "upbc/message_layout.h",
-    ],
-    hdrs = ["upbc/generator.h"],
-    copts = select({
-        ":windows": [],
-        "//conditions:default": CPPOPTS,
-    }),
-    deps = [
-        "@com_google_absl//absl/base:core_headers",
-        "@com_google_absl//absl/container:flat_hash_map",
-        "@com_google_absl//absl/strings",
-        "@com_google_protobuf//:protobuf",
-        "@com_google_protobuf//:protoc_lib",
-    ],
-)
-
-cc_binary(
-    name = "protoc-gen-upb",
-    srcs = ["upbc/main.cc"],
-    copts = select({
-        ":windows": [],
-        "//conditions:default": CPPOPTS,
-    }),
-    visibility = ["//visibility:public"],
-    deps = [
-        ":upbc_generator",
-        "@com_google_protobuf//:protoc_lib",
-    ],
-)
-
-# We strip the tests and remaining rules from google3 until the upb_proto_library()
-# and upb_proto_reflection_library() rules are fixed.
-
-# C/C++ tests ##################################################################
-
-upb_proto_reflection_library(
-    name = "descriptor_upbreflection",
-    deps = ["@com_google_protobuf//:descriptor_proto"],
-)
-
-cc_binary(
-    name = "benchmark",
-    testonly = 1,
-    srcs = ["tests/benchmark.cc"],
-    deps = [
-        ":descriptor_upb_proto",
-        ":descriptor_upbreflection",
-        "@com_github_google_benchmark//:benchmark_main",
-        "@com_google_protobuf//:protobuf",
-    ],
-)
-
-cc_library(
-    name = "upb_test",
-    testonly = 1,
-    srcs = [
-        "tests/testmain.cc",
-    ],
-    hdrs = [
-        "tests/test_util.h",
-        "tests/upb_test.h",
-    ],
-    copts = select({
-        ":windows": [],
-        "//conditions:default": CPPOPTS,
-    }),
-    deps = [
-        ":handlers",
-        ":port",
-        ":upb",
-    ],
-)
-
-cc_test(
-    name = "test_varint",
-    srcs = [
-        "tests/pb/test_varint.c",
-        "upb/pb/varint.int.h",
-    ],
-    copts = select({
-        ":windows": [],
-        "//conditions:default": COPTS,
-    }),
-    deps = [
-        ":port",
-        ":upb",
-        ":upb_pb",
-        ":upb_test",
-    ],
-)
-
-proto_library(
-    name = "test_proto",
-    testonly = 1,
-    srcs = ["tests/test.proto"],
-)
-
-upb_proto_library(
-    name = "test_upb_proto",
-    testonly = 1,
-    deps = [":test_proto"],
-)
-
-cc_test(
-    name = "test_generated_code",
-    srcs = ["tests/test_generated_code.c"],
+    copts = UPB_DEFAULT_COPTS,
+    visibility = ["//tests:__pkg__"],
    deps = [
-        ":test_messages_proto3_proto_upb",
-        ":empty_upbdefs_proto",
-        ":test_upb_proto",
-        ":upb_test",
-    ],
-)
-
-proto_library(
-    name = "empty_proto",
-    srcs = ["tests/empty.proto"],
-)
-
-upb_proto_reflection_library(
-    name = "empty_upbdefs_proto",
-    testonly = 1,
-    deps = [":empty_proto"],
-)
-
-upb_proto_library(
-    name = "test_messages_proto3_proto_upb",
-    testonly = 1,
-    deps = ["@com_google_protobuf//:test_messages_proto3_proto"],
-)
-
-proto_library(
-    name = "test_decoder_proto",
-    srcs = [
-        "tests/pb/test_decoder.proto",
-    ],
-)
-
-upb_proto_reflection_library(
-    name = "test_decoder_upb_proto",
-    deps = [":test_decoder_proto"],
-)
-
-cc_test(
-    name = "test_decoder",
-    srcs = [
-        "tests/pb/test_decoder.cc",
-        "upb/pb/varint.int.h",
-    ],
-    copts = select({
-        ":windows": [],
-        "//conditions:default": CPPOPTS,
-    }),
-    deps = [
-        ":handlers",
-        ":port",
-        ":test_decoder_upb_proto",
        ":upb",
        ":upb_pb",
-        ":upb_test",
    ],
 )

-proto_library(
-    name = "test_cpp_proto",
-    srcs = [
-        "tests/test_cpp.proto",
-    ],
-)
-
-upb_proto_reflection_library(
-    name = "test_cpp_upb_proto",
-    deps = ["test_cpp_proto"],
-)
-
-cc_test(
-    name = "test_cpp",
-    srcs = ["tests/test_cpp.cc"],
-    copts = select({
-        ":windows": [],
-        "//conditions:default": CPPOPTS,
-    }),
-    deps = [
-        ":handlers",
-        ":port",
-        ":reflection",
-        ":test_cpp_upb_proto",
-        ":upb",
-        ":upb_pb",
-        ":upb_test",
-    ],
-)
-
-cc_test(
-    name = "test_table",
-    srcs = ["tests/test_table.cc"],
-    copts = select({
-        ":windows": [],
-        "//conditions:default": CPPOPTS,
-    }),
-    deps = [
-        ":port",
-        ":table",
-        ":upb",
-        ":upb_test",
-    ],
-)
-
-# OSS-Fuzz test
-cc_binary(
-    name = "file_descriptor_parsenew_fuzzer",
-    testonly = 1,
-    srcs = ["tests/file_descriptor_parsenew_fuzzer.cc"],
-    copts = select({
-        ":windows": [],
-        "//conditions:default": CPPOPTS,
-    }) + select({
-        "//conditions:default": [],
-        ":fuzz": ["-fsanitize=fuzzer,address"],
-    }),
-    defines = select({
-        "//conditions:default": [],
-        ":fuzz": ["HAVE_FUZZER"],
-    }),
-    deps = [
-        ":descriptor_upb_proto",
-        ":upb",
-    ],
-)
-
-# copybara:strip_for_google3_begin
-cc_test(
-    name = "test_encoder",
-    srcs = ["tests/pb/test_encoder.cc"],
-    copts = select({
-        ":windows": [],
-        "//conditions:default": CPPOPTS,
-    }),
-    deps = [
-        ":descriptor_upb_proto",
-        ":descriptor_upbreflection",
-        ":upb",
-        ":upb_cc_bindings",
-        ":upb_pb",
-        ":upb_test",
-    ],
-)
-
-proto_library(
-    name = "test_json_enum_from_separate",
-    srcs = ["tests/json/enum_from_separate_file.proto"],
-    deps = [":test_json_proto"],
-)
-
-proto_library(
-    name = "test_json_proto",
-    srcs = ["tests/json/test.proto"],
-)
-
-upb_proto_reflection_library(
-    name = "test_json_upb_proto_reflection",
-    deps = ["test_json_proto"],
-)
-
-upb_proto_library(
-    name = "test_json_enum_from_separate_upb_proto",
-    deps = [":test_json_enum_from_separate"],
-)
-
-upb_proto_library(
-    name = "test_json_upb_proto",
-    deps = [":test_json_proto"],
-)
-
-cc_test(
-    name = "test_json",
-    srcs = [
-        "tests/json/test_json.cc",
-    ],
-    copts = select({
-        ":windows": [],
-        "//conditions:default": CPPOPTS,
-    }),
-    deps = [
-        ":test_json_upb_proto",
-        ":test_json_upb_proto_reflection",
-        ":upb_json",
-        ":upb_test",
-    ],
-)
-# copybara:strip_end
-
-upb_proto_library(
-    name = "conformance_proto_upb",
-    testonly = 1,
-    deps = ["@com_google_protobuf//:conformance_proto"],
-)
-
-upb_proto_reflection_library(
-    name = "conformance_proto_upbdefs",
-    testonly = 1,
-    deps = ["@com_google_protobuf//:conformance_proto"],
-)
-
-upb_proto_reflection_library(
-    name = "test_messages_proto2_upbdefs",
-    testonly = 1,
-    deps = ["@com_google_protobuf//:test_messages_proto2_proto"],
-)
-
-upb_proto_reflection_library(
-    name = "test_messages_proto3_upbdefs",
-    testonly = 1,
-    deps = ["@com_google_protobuf//:test_messages_proto3_proto"],
-)
-
-cc_binary(
-    name = "conformance_upb",
-    testonly = 1,
-    srcs = [
-        "tests/conformance_upb.c",
-    ],
-    data = [
-        "tests/conformance_upb_failures.txt",
-    ],
-    copts = select({
-        ":windows": [],
-        "//conditions:default": COPTS,
-    }) + ["-Ibazel-out/k8-fastbuild/bin"],
-    deps = [
-        ":port",
-        ":conformance_proto_upb",
-        ":conformance_proto_upbdefs",
-        ":json",
-        ":reflection",
-        ":test_messages_proto2_upbdefs",
-        ":test_messages_proto3_upbdefs",
-        ":textformat",
-        ":upb",
-    ],
-)
-
-make_shell_script(
-    name = "gen_test_conformance_upb",
-    out = "test_conformance_upb.sh",
-    contents = "external/com_google_protobuf/conformance_test_runner " +
-               " --enforce_recommended " +
-               " --failure_list ./tests/conformance_upb_failures.txt" +
-               " ./conformance_upb",
-)
-
-sh_test(
-    name = "test_conformance_upb",
-    srcs = ["test_conformance_upb.sh"],
-    data = [
-        "tests/conformance_upb_failures.txt",
-        ":conformance_upb",
-        "@com_google_protobuf//:conformance_test_runner",
-    ],
-    deps = ["@bazel_tools//tools/bash/runfiles"],
+genrule(
+    name = "generate_json_ragel",
+    srcs = ["//:upb/json/parser.rl"],
+    outs = ["upb/json/parser.c"],
+    cmd = "$(location @ragel//:ragelc) -C -o upb/json/parser.c $< && mv upb/json/parser.c $@",
+    tools = ["@ragel//:ragelc"],
+    visibility = ["//cmake:__pkg__"],
 )

-# copybara:strip_for_google3_begin
-
 # Amalgamation #################################################################

 py_binary(
@ -704,15 +291,11 @@ cc_library(
    name = "amalgamation",
    srcs = ["upb.c"],
    hdrs = ["upb.h"],
-    copts = select({
-        ":windows": [],
-        "//conditions:default": COPTS,
-    }),
+    copts = UPB_DEFAULT_COPTS,
 )

 upb_amalgamation(
    name = "gen_php_amalgamation",
-    prefix = "php-",
    outs = [
        "php-upb.c",
        "php-upb.h",
@ -726,21 +309,18 @@ upb_amalgamation(
        ":port",
        ":json",
    ],
+    prefix = "php-",
 )

 cc_library(
    name = "php_amalgamation",
    srcs = ["php-upb.c"],
    hdrs = ["php-upb.h"],
-    copts = select({
-        ":windows": [],
-        "//conditions:default": COPTS,
-    }),
+    copts = UPB_DEFAULT_COPTS,
 )

 upb_amalgamation(
    name = "gen_ruby_amalgamation",
-    prefix = "ruby-",
    outs = [
        "ruby-upb.c",
        "ruby-upb.h",
@ -753,102 +333,37 @@ upb_amalgamation(
        ":port",
        ":json",
    ],
+    prefix = "ruby-",
 )

 cc_library(
    name = "ruby_amalgamation",
    srcs = ["ruby-upb.c"],
    hdrs = ["ruby-upb.h"],
-    copts = select({
-        ":windows": [],
-        "//conditions:default": COPTS,
-    }),
+    copts = UPB_DEFAULT_COPTS,
 )

-# Lua ##########################################################################
-
-cc_library(
-    name = "lupb",
-    srcs = [
-        "upb/bindings/lua/def.c",
-        "upb/bindings/lua/msg.c",
-        "upb/bindings/lua/upb.c",
-    ],
-    hdrs = [
-        "upb/bindings/lua/upb.h",
-    ],
-    deps = [
-        ":reflection",
-        ":textformat",
-        ":upb",
-        "@lua//:liblua",
+exports_files(
+    [
+        "upb/json/parser.rl",
+        "BUILD",
+        "WORKSPACE",
    ],
+    visibility = ["//cmake:__pkg__"],
 )

-cc_test(
-    name = "test_lua",
-    srcs = ["tests/bindings/lua/main.c"],
-    data = [
-        "tests/bindings/lua/test_upb.lua",
+exports_files(
+    [
        "third_party/lunit/console.lua",
        "third_party/lunit/lunit.lua",
-        "upb/bindings/lua/upb.lua",
-        ":descriptor_proto_lua",
-        ":test_messages_proto3_proto_lua",
-        ":test_messages_proto2_proto_lua",
-        ":test_proto_lua",
-        "@com_google_protobuf//:conformance_proto",
-        "@com_google_protobuf//:descriptor_proto",
-    ],
-    linkstatic = 1,
-    deps = [
-        ":lupb",
-        "@lua//:liblua",
-    ],
-)
-
-cc_binary(
-    name = "protoc-gen-lua",
-    srcs = ["upb/bindings/lua/upbc.cc"],
-    copts = select({
-        ":windows": [],
-        "//conditions:default": CPPOPTS,
-    }),
-    visibility = ["//visibility:public"],
-    deps = [
-        "@com_google_absl//absl/strings",
-        "@com_google_protobuf//:protoc_lib",
    ],
+    visibility = ["//tests/bindings/lua:__pkg__"],
 )

-lua_proto_library(
-    name = "test_proto_lua",
-    testonly = 1,
-    deps = [":test_proto"],
-)
-
-lua_proto_library(
-    name = "descriptor_proto_lua",
-    deps = ["@com_google_protobuf//:descriptor_proto"],
-)
-
-lua_proto_library(
-    name = "test_messages_proto3_proto_lua",
-    testonly = 1,
-    deps = ["@com_google_protobuf//:test_messages_proto3_proto"],
-)
-
-lua_proto_library(
-    name = "test_messages_proto2_proto_lua",
-    testonly = 1,
-    deps = ["@com_google_protobuf//:test_messages_proto2_proto"],
-)
-
-# Test the CMake build #########################################################
-
 filegroup(
    name = "cmake_files",
    srcs = glob([
+        "upb/json/parser.c",
        "CMakeLists.txt",
        "generated_for_cmake/**/*",
        "google/**/*",
@ -857,82 +372,7 @@ filegroup(
        "tests/**/*",
        "third_party/**/*",
    ]),
-)
-
-make_shell_script(
-    name = "gen_run_cmake_build",
-    out = "run_cmake_build.sh",
-    contents = "find . && mkdir build && cd build && cmake .. && make -j8 && make test",
-)
-
-sh_test(
-    name = "cmake_build",
-    srcs = ["run_cmake_build.sh"],
-    data = [":cmake_files"],
-    deps = ["@bazel_tools//tools/bash/runfiles"],
-)
-
-# Generated files ##############################################################
-
-exports_files(["tools/staleness_test.py"])
-
-py_library(
-    name = "staleness_test_lib",
-    testonly = 1,
-    srcs = ["tools/staleness_test_lib.py"],
-)
-
-py_binary(
-    name = "make_cmakelists",
-    srcs = ["tools/make_cmakelists.py"],
-)
-
-genrule(
-    name = "gen_cmakelists",
-    srcs = [
-        "BUILD",
-        "WORKSPACE",
-        ":cmake_files",
-    ],
-    outs = ["generated-in/CMakeLists.txt"],
-    cmd = "$(location :make_cmakelists) $@",
-    tools = [":make_cmakelists"],
-)
-
-genrule(
-    name = "generate_json_ragel",
-    srcs = ["upb/json/parser.rl"],
-    outs = ["upb/json/parser.c"],
-    cmd = "$(location @ragel//:ragelc) -C -o upb/json/parser.c $< && mv upb/json/parser.c $@",
-    tools = ["@ragel//:ragelc"],
-)
-
-genrule(
-    name = "copy_json_ragel",
-    srcs = ["upb/json/parser.c"],
-    outs = ["generated-in/generated_for_cmake/upb/json/parser.c"],
-    cmd = "cp $< $@",
-)
-
-genrule(
-    name = "copy_protos",
-    srcs = [":descriptor_upb_proto"],
-    outs = [
-        "generated-in/generated_for_cmake/google/protobuf/descriptor.upb.c",
-        "generated-in/generated_for_cmake/google/protobuf/descriptor.upb.h",
-    ],
-    cmd = "cp $(SRCS) $(@D)/generated-in/generated_for_cmake/google/protobuf",
-)
-
-generated_file_staleness_test(
-    name = "test_generated_files",
-    outs = [
-        "CMakeLists.txt",
-        "generated_for_cmake/google/protobuf/descriptor.upb.c",
-        "generated_for_cmake/google/protobuf/descriptor.upb.h",
-        "generated_for_cmake/upb/json/parser.c",
-    ],
-    generated_pattern = "generated-in/%s",
+    visibility = ["//cmake:__pkg__"],
 )

 # copybara:strip_end
--- a/bazel/build_defs.bzl
+++ b/bazel/build_defs.bzl
@ -2,6 +2,30 @@

 load(":upb_proto_library.bzl", "GeneratedSrcsInfo")

+UPB_DEFAULT_CPPOPTS = select({
+    "//:windows": [],
+    "//conditions:default": [
+        # copybara:strip_for_google3_begin
+        "-Wextra",
+        # "-Wshorten-64-to-32",  # not in GCC (and my Kokoro images doesn't have Clang)
+        "-Werror",
+        "-Wno-long-long",
+        # copybara:strip_end
+    ],
+})
+
+UPB_DEFAULT_COPTS = select({
+    "//:windows": [],
+    "//conditions:default": [
+        # copybara:strip_for_google3_begin
+        "-std=c99",
+        "-pedantic",
+        "-Werror=pedantic",
+        "-Wstrict-prototypes",
+        # copybara:strip_end
+    ],
+})
+
 def _librule(name):
    return name + "_lib"

@ -58,50 +82,6 @@ def make_shell_script(name, contents, out):
        cmd = "(cat <<'HEREDOC'\n%s\nHEREDOC\n) > $@" % contents,
    )

-def generated_file_staleness_test(name, outs, generated_pattern):
-    """Tests that checked-in file(s) match the contents of generated file(s).
-
-    The resulting test will verify that all output files exist and have the
-    correct contents.  If the test fails, it can be invoked with --fix to
-    bring the checked-in files up to date.
-
-    Args:
-      name: Name of the rule.
-      outs: the checked-in files that are copied from generated files.
-      generated_pattern: the pattern for transforming each "out" file into a
-        generated file.  For example, if generated_pattern="generated/%s" then
-        a file foo.txt will look for generated file generated/foo.txt.
-    """
-
-    script_name = name + ".py"
-    script_src = "//:tools/staleness_test.py"
-
-    # Filter out non-existing rules so Blaze doesn't error out before we even
-    # run the test.
-    existing_outs = native.glob(include = outs)
-
-    # The file list contains a few extra bits of information at the end.
-    # These get unpacked by the Config class in staleness_test_lib.py.
-    file_list = outs + [generated_pattern, native.package_name() or ".", name]
-
-    native.genrule(
-        name = name + "_makescript",
-        outs = [script_name],
-        srcs = [script_src],
-        testonly = 1,
-        cmd = "cat $(location " + script_src + ") > $@; " +
-              "sed -i.bak -e 's|INSERT_FILE_LIST_HERE|" + "\\\n  ".join(file_list) + "|' $@",
-    )
-
-    native.py_test(
-        name = name,
-        srcs = [script_name],
-        data = existing_outs + [generated_pattern % file for file in outs],
-        deps = [
-            "//:staleness_test_lib",
-        ],
-    )
-
 # upb_amalgamation() rule, with file_list aspect.

 SrcList = provider(
@ -156,7 +136,3 @@ upb_amalgamation = rule(
    },
    implementation = _upb_amalgamation,
 )
-
-def licenses(*args):
-    # No-op (for Google-internal usage).
-    pass
--- a/bazel/lua.BUILD
+++ b/bazel/lua.BUILD
@ -4,7 +4,6 @@ package(

 cc_library(
    name = "liblua_headers",
-    defines = ["LUA_USE_LINUX"],
    hdrs = [
        "src/lauxlib.h",
        "src/lua.h",
@ -12,6 +11,7 @@ cc_library(
        "src/luaconf.h",
        "src/lualib.h",
    ],
+    defines = ["LUA_USE_LINUX"],
    includes = ["src"],
 )

@ -72,7 +72,6 @@ cc_library(
        "src/lzio.c",
        "src/lzio.h",
    ],
-    defines = ["LUA_USE_LINUX"],
    hdrs = [
        "src/lauxlib.h",
        "src/lua.h",
@ -80,6 +79,7 @@ cc_library(
        "src/luaconf.h",
        "src/lualib.h",
    ],
+    defines = ["LUA_USE_LINUX"],
    includes = ["src"],
    linkopts = [
        "-lm",
@ -92,11 +92,11 @@ cc_binary(
    srcs = [
        "src/lua.c",
    ],
-    deps = [
-        ":liblua",
-    ],
    linkopts = [
        "-lreadline",
        "-rdynamic",
    ],
+    deps = [
+        ":liblua",
+    ],
 )
--- a/bazel/ragel.BUILD
+++ b/bazel/ragel.BUILD
@ -1,4 +1,3 @@
-
 package(
    default_visibility = ["//visibility:public"],
 )
@ -158,7 +157,10 @@ cc_binary(
        "aapl/avlimelkey.h",
        "aapl/avltree.h",
    ],
-    includes = ["ragel", "aapl"],
+    includes = [
+        "aapl",
+        "ragel",
+    ],
 )

 config_h_contents = """
--- a/bazel/upb_proto_library.bzl
+++ b/bazel/upb_proto_library.bzl
@ -18,6 +18,7 @@ def _get_real_short_path(file):
    if short_path.startswith("../"):
        second_slash = short_path.index("/", 3)
        short_path = short_path[second_slash + 1:]
+
    # Sometimes it has another few prefixes like:
    #   _virtual_imports/any_proto/google/protobuf/any.proto
    # We want just google/protobuf/any.proto.
@ -51,7 +52,7 @@ def _filter_none(elems):
            out.append(elem)
    return out

-def _cc_library_func(ctx, name, hdrs, srcs, dep_ccinfos):
+def _cc_library_func(ctx, name, hdrs, srcs, copts, dep_ccinfos):
    """Like cc_library(), but callable from rules.

    Args:
@ -87,6 +88,7 @@ def _cc_library_func(ctx, name, hdrs, srcs, dep_ccinfos):
        name = name,
        srcs = srcs,
        public_hdrs = hdrs,
+        user_compile_flags = copts,
        compilation_contexts = compilation_contexts,
        **blaze_only_args
    )
@ -105,6 +107,22 @@ def _cc_library_func(ctx, name, hdrs, srcs, dep_ccinfos):
        linking_context = linking_context,
    )

+# Dummy rule to expose select() copts to aspects  ##############################
+
+_UpbProtoLibraryCopts = provider(
+    fields = {
+        "copts": "copts for upb_proto_library()",
+    },
+)
+
+def upb_proto_library_copts_impl(ctx):
+    return _UpbProtoLibraryCopts(copts = ctx.attr.copts)
+
+upb_proto_library_copts = rule(
+    implementation = upb_proto_library_copts_impl,
+    attrs = {"copts": attr.string_list(default = [])},
+)
+
 # upb_proto_library / upb_proto_reflection_library shared code #################

 GeneratedSrcsInfo = provider(
@ -198,6 +216,7 @@ def _upb_proto_aspect_impl(target, ctx, cc_provider, file_provider):
        name = ctx.rule.attr.name + ctx.attr._ext,
        hdrs = files.hdrs,
        srcs = files.srcs,
+        copts = ctx.attr._copts[_UpbProtoLibraryCopts].copts,
        dep_ccinfos = dep_ccinfos,
    )
    return [cc_provider(cc_info = cc_info), file_provider(srcs = files)]
@ -221,10 +240,13 @@ def _maybe_add(d):

 _upb_proto_library_aspect = aspect(
    attrs = _maybe_add({
+        "_copts": attr.label(
+            default = "//:upb_proto_library_copts__for_generated_code_only_do_not_use",
+        ),
        "_upbc": attr.label(
            executable = True,
            cfg = "host",
-            default = "//:protoc-gen-upb",
+            default = "//upbc:protoc-gen-upb",
        ),
        "_protoc": attr.label(
            executable = True,
@ -266,10 +288,13 @@ upb_proto_library = rule(

 _upb_proto_reflection_library_aspect = aspect(
    attrs = _maybe_add({
+        "_copts": attr.label(
+            default = "//:upb_proto_library_copts__for_generated_code_only_do_not_use",
+        ),
        "_upbc": attr.label(
            executable = True,
            cfg = "host",
-            default = "//:protoc-gen-upb",
+            default = "//upbc:protoc-gen-upb",
        ),
        "_protoc": attr.label(
            executable = True,
--- a/bazel/workspace_deps.bzl
+++ b/bazel/workspace_deps.bzl
@ -1,4 +1,3 @@
-
 load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
 load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository")
 load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe")
@ -9,7 +8,7 @@ def upb_deps():
        name = "com_google_absl",
        commit = "df3ea785d8c30a9503321a3d35ee7d35808f190d",  # LTS 2020-02-25
        remote = "https://github.com/abseil/abseil-cpp.git",
-        shallow_since = "1583355457 -0500"
+        shallow_since = "1583355457 -0500",
    )

    maybe(
--- a/benchmarks/BUILD
+++ b/benchmarks/BUILD
@ -0,0 +1,53 @@
+load(
+    "//bazel:upb_proto_library.bzl",
+    "upb_proto_library",
+    "upb_proto_reflection_library",
+)
+
+licenses(["notice"])
+
+proto_library(
+    name = "benchmark_descriptor_proto",
+    srcs = ["descriptor.proto"],
+)
+
+upb_proto_library(
+    name = "benchmark_descriptor_upb_proto",
+    deps = [":benchmark_descriptor_proto"],
+)
+
+upb_proto_reflection_library(
+    name = "benchmark_descriptor_upb_proto_reflection",
+    deps = [":benchmark_descriptor_proto"],
+)
+
+cc_proto_library(
+    name = "benchmark_descriptor_cc_proto",
+    deps = [":benchmark_descriptor_proto"],
+)
+
+proto_library(
+    name = "benchmark_descriptor_sv_proto",
+    srcs = ["descriptor_sv.proto"],
+)
+
+cc_proto_library(
+    name = "benchmark_descriptor_sv_cc_proto",
+    deps = [":benchmark_descriptor_sv_proto"],
+)
+
+cc_binary(
+    name = "benchmark",
+    testonly = 1,
+    srcs = ["benchmark.cc"],
+    deps = [
+        ":benchmark_descriptor_cc_proto",
+        ":benchmark_descriptor_sv_cc_proto",
+        ":benchmark_descriptor_upb_proto",
+        ":benchmark_descriptor_upb_proto_reflection",
+        "//:descriptor_upb_proto",
+        "//:reflection",
+        "@com_github_google_benchmark//:benchmark_main",
+        "@com_google_protobuf//:protobuf",
+    ],
+)
--- a/benchmarks/benchmark.cc
+++ b/benchmarks/benchmark.cc
@ -1,12 +1,20 @@

-#include <string.h>
 #include <benchmark/benchmark.h>
+#include <string.h>
+
+// For benchmarks of parsing speed.
+#include "benchmarks/descriptor.pb.h"
+#include "benchmarks/descriptor.upb.h"
+#include "benchmarks/descriptor.upbdefs.h"
+#include "benchmarks/descriptor_sv.pb.h"
+
+// For for benchmarks of building descriptors.
 #include "google/protobuf/descriptor.upb.h"
-#include "google/protobuf/descriptor.upbdefs.h"
 #include "google/protobuf/descriptor.pb.h"
+
 #include "upb/def.hpp"

-upb_strview descriptor = google_protobuf_descriptor_proto_upbdefinit.descriptor;
+upb_strview descriptor = benchmarks_descriptor_proto_upbdefinit.descriptor;
 namespace protobuf = ::google::protobuf;

 /* A buffer big enough to parse descriptor.proto without going to heap. */
@ -93,12 +101,12 @@ static void BM_LoadDescriptor_Proto2(benchmark::State& state) {
 }
 BENCHMARK(BM_LoadDescriptor_Proto2);

-static void BM_ParseDescriptor_Upb_LargeInitialBlock(benchmark::State& state) {
+static void BM_Parse_Upb_FileDesc_WithArena(benchmark::State& state) {
  size_t bytes = 0;
  for (auto _ : state) {
-    upb_arena* arena = upb_arena_init(buf, sizeof(buf), NULL);
-    google_protobuf_FileDescriptorProto* set =
-        google_protobuf_FileDescriptorProto_parse(descriptor.data,
+    upb_arena* arena = upb_arena_new();
+    upb_benchmark_FileDescriptorProto* set =
+        upb_benchmark_FileDescriptorProto_parse(descriptor.data,
                                                descriptor.size, arena);
    if (!set) {
      printf("Failed to parse.\n");
@ -109,14 +117,14 @@ static void BM_ParseDescriptor_Upb_LargeInitialBlock(benchmark::State& state) {
  }
  state.SetBytesProcessed(state.iterations() * descriptor.size);
 }
-BENCHMARK(BM_ParseDescriptor_Upb_LargeInitialBlock);
+BENCHMARK(BM_Parse_Upb_FileDesc_WithArena);

-static void BM_ParseDescriptor_Upb(benchmark::State& state) {
+static void BM_Parse_Upb_FileDesc_WithInitialBlock(benchmark::State& state) {
  size_t bytes = 0;
  for (auto _ : state) {
-    upb_arena* arena = upb_arena_new();
-    google_protobuf_FileDescriptorProto* set =
-        google_protobuf_FileDescriptorProto_parse(descriptor.data,
+    upb_arena* arena = upb_arena_init(buf, sizeof(buf), NULL);
+    upb_benchmark_FileDescriptorProto* set =
+        upb_benchmark_FileDescriptorProto_parse(descriptor.data,
                                                descriptor.size, arena);
    if (!set) {
      printf("Failed to parse.\n");
@ -127,55 +135,60 @@ static void BM_ParseDescriptor_Upb(benchmark::State& state) {
  }
  state.SetBytesProcessed(state.iterations() * descriptor.size);
 }
-BENCHMARK(BM_ParseDescriptor_Upb);
-
-static void BM_ParseDescriptor_Proto2_NoArena(benchmark::State& state) {
-  size_t bytes = 0;
-  for (auto _ : state) {
-    protobuf::FileDescriptorProto proto;
-    protobuf::StringPiece input(descriptor.data,descriptor.size);
-    bool ok = proto.ParseFrom<protobuf::MessageLite::kMergePartial>(input);
-    if (!ok) {
-      printf("Failed to parse.\n");
-      exit(1);
-    }
-    bytes += descriptor.size;
+BENCHMARK(BM_Parse_Upb_FileDesc_WithInitialBlock);
+
+template <class P>
+struct NoArena {
+ public:
+  P* GetProto() { return &proto_; }
+
+ private:
+  P proto_;
+};
+
+template <class P>
+struct WithArena {
+ public:
+  P* GetProto() { return protobuf::Arena::CreateMessage<P>(&arena_); }
+
+ private:
+  protobuf::Arena arena_;
+};
+
+template <class P>
+struct WithInitialBlock {
+ public:
+  WithInitialBlock() : arena_(GetOptions()) {}
+  P* GetProto() { return protobuf::Arena::CreateMessage<P>(&arena_); }
+
+ private:
+  protobuf::ArenaOptions GetOptions() {
+    protobuf::ArenaOptions opts;
+    opts.initial_block = buf;
+    opts.initial_block_size = sizeof(buf);
+    return opts;
  }
-  state.SetBytesProcessed(state.iterations() * descriptor.size);
-}
-BENCHMARK(BM_ParseDescriptor_Proto2_NoArena);

-static void BM_ParseDescriptor_Proto2_Arena(benchmark::State& state) {
-  size_t bytes = 0;
-  for (auto _ : state) {
-    protobuf::Arena arena;
-    protobuf::StringPiece input(descriptor.data,descriptor.size);
-    auto proto = protobuf::Arena::CreateMessage<protobuf::FileDescriptorProto>(
-        &arena);
-    bool ok = proto->ParseFrom<protobuf::MessageLite::kMergePartial>(input);
+  protobuf::Arena arena_;
+};

-    if (!ok) {
-      printf("Failed to parse.\n");
-      exit(1);
-    }
-    bytes += descriptor.size;
-  }
-  state.SetBytesProcessed(state.iterations() * descriptor.size);
-}
-BENCHMARK(BM_ParseDescriptor_Proto2_Arena);
+using FileDesc = ::upb_benchmark::FileDescriptorProto;
+using FileDescSV = ::upb_benchmark::sv::FileDescriptorProto;

-static void BM_ParseDescriptor_Proto2_Arena_LargeInitialBlock(benchmark::State& state) {
+const protobuf::MessageLite::ParseFlags kMergePartial =
+    protobuf::MessageLite::ParseFlags::kMergePartial;
+const protobuf::MessageLite::ParseFlags kAliasStrings =
+    protobuf::MessageLite::ParseFlags::kMergePartialWithAliasing;
+
+template <class P, template <class> class Factory,
+          protobuf::MessageLite::ParseFlags kParseFlags = kMergePartial>
+void BM_Parse_Proto2(benchmark::State& state) {
  size_t bytes = 0;
-  protobuf::ArenaOptions opts;
-  opts.initial_block = buf;
-  opts.initial_block_size = sizeof(buf);
  for (auto _ : state) {
-    protobuf::Arena arena(opts);
+    Factory<P> proto_factory;
+    auto proto = proto_factory.GetProto();
    protobuf::StringPiece input(descriptor.data,descriptor.size);
-    auto proto = protobuf::Arena::CreateMessage<protobuf::FileDescriptorProto>(
-        &arena);
-    bool ok = proto->ParseFrom<protobuf::MessageLite::kMergePartial>(input);
-
+    bool ok = proto->template ParseFrom<kParseFlags>(input);
    if (!ok) {
      printf("Failed to parse.\n");
      exit(1);
@ -184,11 +197,19 @@ static void BM_ParseDescriptor_Proto2_Arena_LargeInitialBlock(benchmark::State&
  }
  state.SetBytesProcessed(state.iterations() * descriptor.size);
 }
-BENCHMARK(BM_ParseDescriptor_Proto2_Arena_LargeInitialBlock);
+BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDesc, NoArena);
+BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDesc, WithArena);
+BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDesc, WithInitialBlock);
+//BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescSV, NoArena);
+//BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescSV, WithArena);
+BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescSV, WithInitialBlock);
+//BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescSV, NoArena, kAliasStrings);
+//BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescSV, WithArena, kAliasStrings);
+BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescSV, WithInitialBlock, kAliasStrings);

 static void BM_SerializeDescriptor_Proto2(benchmark::State& state) {
  size_t bytes = 0;
-  protobuf::FileDescriptorProto proto;
+  upb_benchmark::FileDescriptorProto proto;
  proto.ParseFromArray(descriptor.data, descriptor.size);
  for (auto _ : state) {
    proto.SerializePartialToArray(buf, sizeof(buf));
@ -201,9 +222,9 @@ BENCHMARK(BM_SerializeDescriptor_Proto2);
 static void BM_SerializeDescriptor_Upb(benchmark::State& state) {
  int64_t total = 0;
  upb_arena* arena = upb_arena_new();
-  google_protobuf_FileDescriptorProto* set =
-      google_protobuf_FileDescriptorProto_parse(descriptor.data,
-                                                descriptor.size, arena);
+  upb_benchmark_FileDescriptorProto* set =
+      upb_benchmark_FileDescriptorProto_parse(descriptor.data, descriptor.size,
+                                              arena);
  if (!set) {
    printf("Failed to parse.\n");
    exit(1);
@ -211,7 +232,8 @@ static void BM_SerializeDescriptor_Upb(benchmark::State& state) {
  for (auto _ : state) {
    upb_arena* enc_arena = upb_arena_init(buf, sizeof(buf), NULL);
    size_t size;
-    char *data = google_protobuf_FileDescriptorProto_serialize(set, enc_arena, &size);
+    char* data =
+        upb_benchmark_FileDescriptorProto_serialize(set, enc_arena, &size);
    if (!data) {
      printf("Failed to serialize.\n");
      exit(1);
--- a/benchmarks/compare.py
+++ b/benchmarks/compare.py
@ -30,12 +30,12 @@ def Run(cmd):
 def Benchmark(outbase, bench_cpu=True, runs=12):
  tmpfile = "/tmp/bench-output.json"
  Run("rm -rf {}".format(tmpfile))
-  Run("CC=clang bazel test :all")
+  Run("CC=clang bazel test ...")

  if bench_cpu:
-    Run("CC=clang bazel build -c opt --copt=-march=native :benchmark")
+    Run("CC=clang bazel build -c opt --copt=-march=native benchmarks:benchmark")

-    Run("./bazel-bin/benchmark --benchmark_out_format=json --benchmark_out={} --benchmark_repetitions={}".format(tmpfile, runs))
+    Run("./bazel-bin/benchmarks/benchmark --benchmark_out_format=json --benchmark_out={} --benchmark_repetitions={}".format(tmpfile, runs))
    with open(tmpfile) as f:
      bench_json = json.load(f)

@ -48,8 +48,8 @@ def Benchmark(outbase, bench_cpu=True, runs=12):
        values = (name, run["iterations"], run["cpu_time"])
        print("{} {} {} ns/op".format(*values), file=f)

-  Run("CC=clang bazel build -c opt --copt=-g :conformance_upb")
-  Run("cp -f bazel-bin/conformance_upb {}.bin".format(outbase))
+  Run("CC=clang bazel build -c opt --copt=-g tests:conformance_upb")
+  Run("cp -f bazel-bin/tests/conformance_upb {}.bin".format(outbase))


 baseline = "master"
--- a/benchmarks/descriptor.proto
+++ b/benchmarks/descriptor.proto
@ -0,0 +1,909 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: kenton@google.com (Kenton Varda)
+//  Based on original Protocol Buffers design by
+//  Sanjay Ghemawat, Jeff Dean, and others.
+//
+// The messages in this file describe the definitions found in .proto files.
+// A valid .proto file can be translated directly to a FileDescriptorProto
+// without any other information (e.g. without reading its imports).
+
+
+syntax = "proto2";
+
+package upb_benchmark;
+
+option go_package = "google.golang.org/protobuf/types/descriptorpb";
+option java_package = "com.google.protobuf";
+option java_outer_classname = "DescriptorProtos";
+option csharp_namespace = "Google.Protobuf.Reflection";
+option objc_class_prefix = "GPB";
+option cc_enable_arenas = true;
+
+// descriptor.proto must be optimized for speed because reflection-based
+// algorithms don't work during bootstrapping.
+option optimize_for = SPEED;
+
+// The protocol compiler can output a FileDescriptorSet containing the .proto
+// files it parses.
+message FileDescriptorSet {
+  repeated FileDescriptorProto file = 1;
+}
+
+// Describes a complete .proto file.
+message FileDescriptorProto {
+  optional string name = 1;     // file name, relative to root of source tree
+  optional string package = 2;  // e.g. "foo", "foo.bar", etc.
+
+  // Names of files imported by this file.
+  repeated string dependency = 3;
+  // Indexes of the public imported files in the dependency list above.
+  repeated int32 public_dependency = 10;
+  // Indexes of the weak imported files in the dependency list.
+  // For Google-internal migration only. Do not use.
+  repeated int32 weak_dependency = 11;
+
+  // All top-level definitions in this file.
+  repeated DescriptorProto message_type = 4;
+  repeated EnumDescriptorProto enum_type = 5;
+  repeated ServiceDescriptorProto service = 6;
+  repeated FieldDescriptorProto extension = 7;
+
+  optional FileOptions options = 8;
+
+  // This field contains optional information about the original source code.
+  // You may safely remove this entire field without harming runtime
+  // functionality of the descriptors -- the information is needed only by
+  // development tools.
+  optional SourceCodeInfo source_code_info = 9;
+
+  // The syntax of the proto file.
+  // The supported values are "proto2" and "proto3".
+  optional string syntax = 12;
+}
+
+// Describes a message type.
+message DescriptorProto {
+  optional string name = 1;
+
+  repeated FieldDescriptorProto field = 2;
+  repeated FieldDescriptorProto extension = 6;
+
+  repeated DescriptorProto nested_type = 3;
+  repeated EnumDescriptorProto enum_type = 4;
+
+  message ExtensionRange {
+    optional int32 start = 1;  // Inclusive.
+    optional int32 end = 2;    // Exclusive.
+
+    optional ExtensionRangeOptions options = 3;
+  }
+  repeated ExtensionRange extension_range = 5;
+
+  repeated OneofDescriptorProto oneof_decl = 8;
+
+  optional MessageOptions options = 7;
+
+  // Range of reserved tag numbers. Reserved tag numbers may not be used by
+  // fields or extension ranges in the same message. Reserved ranges may
+  // not overlap.
+  message ReservedRange {
+    optional int32 start = 1;  // Inclusive.
+    optional int32 end = 2;    // Exclusive.
+  }
+  repeated ReservedRange reserved_range = 9;
+  // Reserved field names, which may not be used by fields in the same message.
+  // A given name may only be reserved once.
+  repeated string reserved_name = 10;
+}
+
+message ExtensionRangeOptions {
+  // The parser stores options it doesn't recognize here. See above.
+  repeated UninterpretedOption uninterpreted_option = 999;
+
+
+  // Clients can define custom options in extensions of this message. See above.
+  extensions 1000 to max;
+}
+
+// Describes a field within a message.
+message FieldDescriptorProto {
+  enum Type {
+    // 0 is reserved for errors.
+    // Order is weird for historical reasons.
+    TYPE_DOUBLE = 1;
+    TYPE_FLOAT = 2;
+    // Not ZigZag encoded.  Negative numbers take 10 bytes.  Use TYPE_SINT64 if
+    // negative values are likely.
+    TYPE_INT64 = 3;
+    TYPE_UINT64 = 4;
+    // Not ZigZag encoded.  Negative numbers take 10 bytes.  Use TYPE_SINT32 if
+    // negative values are likely.
+    TYPE_INT32 = 5;
+    TYPE_FIXED64 = 6;
+    TYPE_FIXED32 = 7;
+    TYPE_BOOL = 8;
+    TYPE_STRING = 9;
+    // Tag-delimited aggregate.
+    // Group type is deprecated and not supported in proto3. However, Proto3
+    // implementations should still be able to parse the group wire format and
+    // treat group fields as unknown fields.
+    TYPE_GROUP = 10;
+    TYPE_MESSAGE = 11;  // Length-delimited aggregate.
+
+    // New in version 2.
+    TYPE_BYTES = 12;
+    TYPE_UINT32 = 13;
+    TYPE_ENUM = 14;
+    TYPE_SFIXED32 = 15;
+    TYPE_SFIXED64 = 16;
+    TYPE_SINT32 = 17;  // Uses ZigZag encoding.
+    TYPE_SINT64 = 18;  // Uses ZigZag encoding.
+  }
+
+  enum Label {
+    // 0 is reserved for errors
+    LABEL_OPTIONAL = 1;
+    LABEL_REQUIRED = 2;
+    LABEL_REPEATED = 3;
+  }
+
+  optional string name = 1;
+  optional int32 number = 3;
+  optional Label label = 4;
+
+  // If type_name is set, this need not be set.  If both this and type_name
+  // are set, this must be one of TYPE_ENUM, TYPE_MESSAGE or TYPE_GROUP.
+  optional Type type = 5;
+
+  // For message and enum types, this is the name of the type.  If the name
+  // starts with a '.', it is fully-qualified.  Otherwise, C++-like scoping
+  // rules are used to find the type (i.e. first the nested types within this
+  // message are searched, then within the parent, on up to the root
+  // namespace).
+  optional string type_name = 6;
+
+  // For extensions, this is the name of the type being extended.  It is
+  // resolved in the same manner as type_name.
+  optional string extendee = 2;
+
+  // For numeric types, contains the original text representation of the value.
+  // For booleans, "true" or "false".
+  // For strings, contains the default text contents (not escaped in any way).
+  // For bytes, contains the C escaped value.  All bytes >= 128 are escaped.
+  // TODO(kenton):  Base-64 encode?
+  optional string default_value = 7;
+
+  // If set, gives the index of a oneof in the containing type's oneof_decl
+  // list.  This field is a member of that oneof.
+  optional int32 oneof_index = 9;
+
+  // JSON name of this field. The value is set by protocol compiler. If the
+  // user has set a "json_name" option on this field, that option's value
+  // will be used. Otherwise, it's deduced from the field's name by converting
+  // it to camelCase.
+  optional string json_name = 10;
+
+  optional FieldOptions options = 8;
+
+  // If true, this is a proto3 "optional". When a proto3 field is optional, it
+  // tracks presence regardless of field type.
+  //
+  // When proto3_optional is true, this field must be belong to a oneof to
+  // signal to old proto3 clients that presence is tracked for this field. This
+  // oneof is known as a "synthetic" oneof, and this field must be its sole
+  // member (each proto3 optional field gets its own synthetic oneof). Synthetic
+  // oneofs exist in the descriptor only, and do not generate any API. Synthetic
+  // oneofs must be ordered after all "real" oneofs.
+  //
+  // For message fields, proto3_optional doesn't create any semantic change,
+  // since non-repeated message fields always track presence. However it still
+  // indicates the semantic detail of whether the user wrote "optional" or not.
+  // This can be useful for round-tripping the .proto file. For consistency we
+  // give message fields a synthetic oneof also, even though it is not required
+  // to track presence. This is especially important because the parser can't
+  // tell if a field is a message or an enum, so it must always create a
+  // synthetic oneof.
+  //
+  // Proto2 optional fields do not set this flag, because they already indicate
+  // optional with `LABEL_OPTIONAL`.
+  optional bool proto3_optional = 17;
+}
+
+// Describes a oneof.
+message OneofDescriptorProto {
+  optional string name = 1;
+  optional OneofOptions options = 2;
+}
+
+// Describes an enum type.
+message EnumDescriptorProto {
+  optional string name = 1;
+
+  repeated EnumValueDescriptorProto value = 2;
+
+  optional EnumOptions options = 3;
+
+  // Range of reserved numeric values. Reserved values may not be used by
+  // entries in the same enum. Reserved ranges may not overlap.
+  //
+  // Note that this is distinct from DescriptorProto.ReservedRange in that it
+  // is inclusive such that it can appropriately represent the entire int32
+  // domain.
+  message EnumReservedRange {
+    optional int32 start = 1;  // Inclusive.
+    optional int32 end = 2;    // Inclusive.
+  }
+
+  // Range of reserved numeric values. Reserved numeric values may not be used
+  // by enum values in the same enum declaration. Reserved ranges may not
+  // overlap.
+  repeated EnumReservedRange reserved_range = 4;
+
+  // Reserved enum value names, which may not be reused. A given name may only
+  // be reserved once.
+  repeated string reserved_name = 5;
+}
+
+// Describes a value within an enum.
+message EnumValueDescriptorProto {
+  optional string name = 1;
+  optional int32 number = 2;
+
+  optional EnumValueOptions options = 3;
+}
+
+// Describes a service.
+message ServiceDescriptorProto {
+  optional string name = 1;
+  repeated MethodDescriptorProto method = 2;
+
+  optional ServiceOptions options = 3;
+}
+
+// Describes a method of a service.
+message MethodDescriptorProto {
+  optional string name = 1;
+
+  // Input and output type names.  These are resolved in the same way as
+  // FieldDescriptorProto.type_name, but must refer to a message type.
+  optional string input_type = 2;
+  optional string output_type = 3;
+
+  optional MethodOptions options = 4;
+
+  // Identifies if client streams multiple client messages
+  optional bool client_streaming = 5 [default = false];
+  // Identifies if server streams multiple server messages
+  optional bool server_streaming = 6 [default = false];
+}
+
+
+// ===================================================================
+// Options
+
+// Each of the definitions above may have "options" attached.  These are
+// just annotations which may cause code to be generated slightly differently
+// or may contain hints for code that manipulates protocol messages.
+//
+// Clients may define custom options as extensions of the *Options messages.
+// These extensions may not yet be known at parsing time, so the parser cannot
+// store the values in them.  Instead it stores them in a field in the *Options
+// message called uninterpreted_option. This field must have the same name
+// across all *Options messages. We then use this field to populate the
+// extensions when we build a descriptor, at which point all protos have been
+// parsed and so all extensions are known.
+//
+// Extension numbers for custom options may be chosen as follows:
+// * For options which will only be used within a single application or
+//   organization, or for experimental options, use field numbers 50000
+//   through 99999.  It is up to you to ensure that you do not use the
+//   same number for multiple options.
+// * For options which will be published and used publicly by multiple
+//   independent entities, e-mail protobuf-global-extension-registry@google.com
+//   to reserve extension numbers. Simply provide your project name (e.g.
+//   Objective-C plugin) and your project website (if available) -- there's no
+//   need to explain how you intend to use them. Usually you only need one
+//   extension number. You can declare multiple options with only one extension
+//   number by putting them in a sub-message. See the Custom Options section of
+//   the docs for examples:
+//   https://developers.google.com/protocol-buffers/docs/proto#options
+//   If this turns out to be popular, a web service will be set up
+//   to automatically assign option numbers.
+
+message FileOptions {
+
+  // Sets the Java package where classes generated from this .proto will be
+  // placed.  By default, the proto package is used, but this is often
+  // inappropriate because proto packages do not normally start with backwards
+  // domain names.
+  optional string java_package = 1;
+
+
+  // If set, all the classes from the .proto file are wrapped in a single
+  // outer class with the given name.  This applies to both Proto1
+  // (equivalent to the old "--one_java_file" option) and Proto2 (where
+  // a .proto always translates to a single class, but you may want to
+  // explicitly choose the class name).
+  optional string java_outer_classname = 8;
+
+  // If set true, then the Java code generator will generate a separate .java
+  // file for each top-level message, enum, and service defined in the .proto
+  // file.  Thus, these types will *not* be nested inside the outer class
+  // named by java_outer_classname.  However, the outer class will still be
+  // generated to contain the file's getDescriptor() method as well as any
+  // top-level extensions defined in the file.
+  optional bool java_multiple_files = 10 [default = false];
+
+  // This option does nothing.
+  optional bool java_generate_equals_and_hash = 20 [deprecated=true];
+
+  // If set true, then the Java2 code generator will generate code that
+  // throws an exception whenever an attempt is made to assign a non-UTF-8
+  // byte sequence to a string field.
+  // Message reflection will do the same.
+  // However, an extension field still accepts non-UTF-8 byte sequences.
+  // This option has no effect on when used with the lite runtime.
+  optional bool java_string_check_utf8 = 27 [default = false];
+
+
+  // Generated classes can be optimized for speed or code size.
+  enum OptimizeMode {
+    SPEED = 1;         // Generate complete code for parsing, serialization,
+                       // etc.
+    CODE_SIZE = 2;     // Use ReflectionOps to implement these methods.
+    LITE_RUNTIME = 3;  // Generate code using MessageLite and the lite runtime.
+  }
+  optional OptimizeMode optimize_for = 9 [default = SPEED];
+
+  // Sets the Go package where structs generated from this .proto will be
+  // placed. If omitted, the Go package will be derived from the following:
+  //   - The basename of the package import path, if provided.
+  //   - Otherwise, the package statement in the .proto file, if present.
+  //   - Otherwise, the basename of the .proto file, without extension.
+  optional string go_package = 11;
+
+
+
+
+  // Should generic services be generated in each language?  "Generic" services
+  // are not specific to any particular RPC system.  They are generated by the
+  // main code generators in each language (without additional plugins).
+  // Generic services were the only kind of service generation supported by
+  // early versions of google.protobuf.
+  //
+  // Generic services are now considered deprecated in favor of using plugins
+  // that generate code specific to your particular RPC system.  Therefore,
+  // these default to false.  Old code which depends on generic services should
+  // explicitly set them to true.
+  optional bool cc_generic_services = 16 [default = false];
+  optional bool java_generic_services = 17 [default = false];
+  optional bool py_generic_services = 18 [default = false];
+  optional bool php_generic_services = 42 [default = false];
+
+  // Is this file deprecated?
+  // Depending on the target platform, this can emit Deprecated annotations
+  // for everything in the file, or it will be completely ignored; in the very
+  // least, this is a formalization for deprecating files.
+  optional bool deprecated = 23 [default = false];
+
+  // Enables the use of arenas for the proto messages in this file. This applies
+  // only to generated classes for C++.
+  optional bool cc_enable_arenas = 31 [default = true];
+
+
+  // Sets the objective c class prefix which is prepended to all objective c
+  // generated classes from this .proto. There is no default.
+  optional string objc_class_prefix = 36;
+
+  // Namespace for generated classes; defaults to the package.
+  optional string csharp_namespace = 37;
+
+  // By default Swift generators will take the proto package and CamelCase it
+  // replacing '.' with underscore and use that to prefix the types/symbols
+  // defined. When this options is provided, they will use this value instead
+  // to prefix the types/symbols defined.
+  optional string swift_prefix = 39;
+
+  // Sets the php class prefix which is prepended to all php generated classes
+  // from this .proto. Default is empty.
+  optional string php_class_prefix = 40;
+
+  // Use this option to change the namespace of php generated classes. Default
+  // is empty. When this option is empty, the package name will be used for
+  // determining the namespace.
+  optional string php_namespace = 41;
+
+  // Use this option to change the namespace of php generated metadata classes.
+  // Default is empty. When this option is empty, the proto file name will be
+  // used for determining the namespace.
+  optional string php_metadata_namespace = 44;
+
+  // Use this option to change the package of ruby generated classes. Default
+  // is empty. When this option is not set, the package name will be used for
+  // determining the ruby package.
+  optional string ruby_package = 45;
+
+
+  // The parser stores options it doesn't recognize here.
+  // See the documentation for the "Options" section above.
+  repeated UninterpretedOption uninterpreted_option = 999;
+
+  // Clients can define custom options in extensions of this message.
+  // See the documentation for the "Options" section above.
+  extensions 1000 to max;
+
+  reserved 38;
+}
+
+message MessageOptions {
+  // Set true to use the old proto1 MessageSet wire format for extensions.
+  // This is provided for backwards-compatibility with the MessageSet wire
+  // format.  You should not use this for any other reason:  It's less
+  // efficient, has fewer features, and is more complicated.
+  //
+  // The message must be defined exactly as follows:
+  //   message Foo {
+  //     option message_set_wire_format = true;
+  //     extensions 4 to max;
+  //   }
+  // Note that the message cannot have any defined fields; MessageSets only
+  // have extensions.
+  //
+  // All extensions of your type must be singular messages; e.g. they cannot
+  // be int32s, enums, or repeated messages.
+  //
+  // Because this is an option, the above two restrictions are not enforced by
+  // the protocol compiler.
+  optional bool message_set_wire_format = 1 [default = false];
+
+  // Disables the generation of the standard "descriptor()" accessor, which can
+  // conflict with a field of the same name.  This is meant to make migration
+  // from proto1 easier; new code should avoid fields named "descriptor".
+  optional bool no_standard_descriptor_accessor = 2 [default = false];
+
+  // Is this message deprecated?
+  // Depending on the target platform, this can emit Deprecated annotations
+  // for the message, or it will be completely ignored; in the very least,
+  // this is a formalization for deprecating messages.
+  optional bool deprecated = 3 [default = false];
+
+  // Whether the message is an automatically generated map entry type for the
+  // maps field.
+  //
+  // For maps fields:
+  //     map<KeyType, ValueType> map_field = 1;
+  // The parsed descriptor looks like:
+  //     message MapFieldEntry {
+  //         option map_entry = true;
+  //         optional KeyType key = 1;
+  //         optional ValueType value = 2;
+  //     }
+  //     repeated MapFieldEntry map_field = 1;
+  //
+  // Implementations may choose not to generate the map_entry=true message, but
+  // use a native map in the target language to hold the keys and values.
+  // The reflection APIs in such implementations still need to work as
+  // if the field is a repeated message field.
+  //
+  // NOTE: Do not set the option in .proto files. Always use the maps syntax
+  // instead. The option should only be implicitly set by the proto compiler
+  // parser.
+  optional bool map_entry = 7;
+
+  reserved 8;  // javalite_serializable
+  reserved 9;  // javanano_as_lite
+
+
+  // The parser stores options it doesn't recognize here. See above.
+  repeated UninterpretedOption uninterpreted_option = 999;
+
+  // Clients can define custom options in extensions of this message. See above.
+  extensions 1000 to max;
+}
+
+message FieldOptions {
+  // The ctype option instructs the C++ code generator to use a different
+  // representation of the field than it normally would.  See the specific
+  // options below.  This option is not yet implemented in the open source
+  // release -- sorry, we'll try to include it in a future version!
+  optional CType ctype = 1 [default = STRING];
+  enum CType {
+    // Default mode.
+    STRING = 0;
+
+    CORD = 1;
+
+    STRING_PIECE = 2;
+  }
+  // The packed option can be enabled for repeated primitive fields to enable
+  // a more efficient representation on the wire. Rather than repeatedly
+  // writing the tag and type for each element, the entire array is encoded as
+  // a single length-delimited blob. In proto3, only explicit setting it to
+  // false will avoid using packed encoding.
+  optional bool packed = 2;
+
+  // The jstype option determines the JavaScript type used for values of the
+  // field.  The option is permitted only for 64 bit integral and fixed types
+  // (int64, uint64, sint64, fixed64, sfixed64).  A field with jstype JS_STRING
+  // is represented as JavaScript string, which avoids loss of precision that
+  // can happen when a large value is converted to a floating point JavaScript.
+  // Specifying JS_NUMBER for the jstype causes the generated JavaScript code to
+  // use the JavaScript "number" type.  The behavior of the default option
+  // JS_NORMAL is implementation dependent.
+  //
+  // This option is an enum to permit additional types to be added, e.g.
+  // goog.math.Integer.
+  optional JSType jstype = 6 [default = JS_NORMAL];
+  enum JSType {
+    // Use the default type.
+    JS_NORMAL = 0;
+
+    // Use JavaScript strings.
+    JS_STRING = 1;
+
+    // Use JavaScript numbers.
+    JS_NUMBER = 2;
+  }
+
+  // Should this field be parsed lazily?  Lazy applies only to message-type
+  // fields.  It means that when the outer message is initially parsed, the
+  // inner message's contents will not be parsed but instead stored in encoded
+  // form.  The inner message will actually be parsed when it is first accessed.
+  //
+  // This is only a hint.  Implementations are free to choose whether to use
+  // eager or lazy parsing regardless of the value of this option.  However,
+  // setting this option true suggests that the protocol author believes that
+  // using lazy parsing on this field is worth the additional bookkeeping
+  // overhead typically needed to implement it.
+  //
+  // This option does not affect the public interface of any generated code;
+  // all method signatures remain the same.  Furthermore, thread-safety of the
+  // interface is not affected by this option; const methods remain safe to
+  // call from multiple threads concurrently, while non-const methods continue
+  // to require exclusive access.
+  //
+  //
+  // Note that implementations may choose not to check required fields within
+  // a lazy sub-message.  That is, calling IsInitialized() on the outer message
+  // may return true even if the inner message has missing required fields.
+  // This is necessary because otherwise the inner message would have to be
+  // parsed in order to perform the check, defeating the purpose of lazy
+  // parsing.  An implementation which chooses not to check required fields
+  // must be consistent about it.  That is, for any particular sub-message, the
+  // implementation must either *always* check its required fields, or *never*
+  // check its required fields, regardless of whether or not the message has
+  // been parsed.
+  optional bool lazy = 5 [default = false];
+
+  // Is this field deprecated?
+  // Depending on the target platform, this can emit Deprecated annotations
+  // for accessors, or it will be completely ignored; in the very least, this
+  // is a formalization for deprecating fields.
+  optional bool deprecated = 3 [default = false];
+
+  // For Google-internal migration only. Do not use.
+  optional bool weak = 10 [default = false];
+
+
+  // The parser stores options it doesn't recognize here. See above.
+  repeated UninterpretedOption uninterpreted_option = 999;
+
+  // Clients can define custom options in extensions of this message. See above.
+  extensions 1000 to max;
+
+  reserved 4;  // removed jtype
+}
+
+message OneofOptions {
+  // The parser stores options it doesn't recognize here. See above.
+  repeated UninterpretedOption uninterpreted_option = 999;
+
+  // Clients can define custom options in extensions of this message. See above.
+  extensions 1000 to max;
+}
+
+message EnumOptions {
+
+  // Set this option to true to allow mapping different tag names to the same
+  // value.
+  optional bool allow_alias = 2;
+
+  // Is this enum deprecated?
+  // Depending on the target platform, this can emit Deprecated annotations
+  // for the enum, or it will be completely ignored; in the very least, this
+  // is a formalization for deprecating enums.
+  optional bool deprecated = 3 [default = false];
+
+  reserved 5;  // javanano_as_lite
+
+  // The parser stores options it doesn't recognize here. See above.
+  repeated UninterpretedOption uninterpreted_option = 999;
+
+  // Clients can define custom options in extensions of this message. See above.
+  extensions 1000 to max;
+}
+
+message EnumValueOptions {
+  // Is this enum value deprecated?
+  // Depending on the target platform, this can emit Deprecated annotations
+  // for the enum value, or it will be completely ignored; in the very least,
+  // this is a formalization for deprecating enum values.
+  optional bool deprecated = 1 [default = false];
+
+  // The parser stores options it doesn't recognize here. See above.
+  repeated UninterpretedOption uninterpreted_option = 999;
+
+  // Clients can define custom options in extensions of this message. See above.
+  extensions 1000 to max;
+}
+
+message ServiceOptions {
+
+  // Note:  Field numbers 1 through 32 are reserved for Google's internal RPC
+  //   framework.  We apologize for hoarding these numbers to ourselves, but
+  //   we were already using them long before we decided to release Protocol
+  //   Buffers.
+
+  // Is this service deprecated?
+  // Depending on the target platform, this can emit Deprecated annotations
+  // for the service, or it will be completely ignored; in the very least,
+  // this is a formalization for deprecating services.
+  optional bool deprecated = 33 [default = false];
+
+  // The parser stores options it doesn't recognize here. See above.
+  repeated UninterpretedOption uninterpreted_option = 999;
+
+  // Clients can define custom options in extensions of this message. See above.
+  extensions 1000 to max;
+}
+
+message MethodOptions {
+
+  // Note:  Field numbers 1 through 32 are reserved for Google's internal RPC
+  //   framework.  We apologize for hoarding these numbers to ourselves, but
+  //   we were already using them long before we decided to release Protocol
+  //   Buffers.
+
+  // Is this method deprecated?
+  // Depending on the target platform, this can emit Deprecated annotations
+  // for the method, or it will be completely ignored; in the very least,
+  // this is a formalization for deprecating methods.
+  optional bool deprecated = 33 [default = false];
+
+  // Is this method side-effect-free (or safe in HTTP parlance), or idempotent,
+  // or neither? HTTP based RPC implementation may choose GET verb for safe
+  // methods, and PUT verb for idempotent methods instead of the default POST.
+  enum IdempotencyLevel {
+    IDEMPOTENCY_UNKNOWN = 0;
+    NO_SIDE_EFFECTS = 1;  // implies idempotent
+    IDEMPOTENT = 2;       // idempotent, but may have side effects
+  }
+  optional IdempotencyLevel idempotency_level = 34
+      [default = IDEMPOTENCY_UNKNOWN];
+
+  // The parser stores options it doesn't recognize here. See above.
+  repeated UninterpretedOption uninterpreted_option = 999;
+
+  // Clients can define custom options in extensions of this message. See above.
+  extensions 1000 to max;
+}
+
+
+// A message representing a option the parser does not recognize. This only
+// appears in options protos created by the compiler::Parser class.
+// DescriptorPool resolves these when building Descriptor objects. Therefore,
+// options protos in descriptor objects (e.g. returned by Descriptor::options(),
+// or produced by Descriptor::CopyTo()) will never have UninterpretedOptions
+// in them.
+message UninterpretedOption {
+  // The name of the uninterpreted option.  Each string represents a segment in
+  // a dot-separated name.  is_extension is true iff a segment represents an
+  // extension (denoted with parentheses in options specs in .proto files).
+  // E.g.,{ ["foo", false], ["bar.baz", true], ["qux", false] } represents
+  // "foo.(bar.baz).qux".
+  message NamePart {
+    optional string name_part = 1;
+    optional bool is_extension = 2;
+  }
+  repeated NamePart name = 2;
+
+  // The value of the uninterpreted option, in whatever type the tokenizer
+  // identified it as during parsing. Exactly one of these should be set.
+  optional string identifier_value = 3;
+  optional uint64 positive_int_value = 4;
+  optional int64 negative_int_value = 5;
+  optional double double_value = 6;
+  optional bytes string_value = 7;
+  optional string aggregate_value = 8;
+}
+
+// ===================================================================
+// Optional source code info
+
+// Encapsulates information about the original source file from which a
+// FileDescriptorProto was generated.
+message SourceCodeInfo {
+  // A Location identifies a piece of source code in a .proto file which
+  // corresponds to a particular definition.  This information is intended
+  // to be useful to IDEs, code indexers, documentation generators, and similar
+  // tools.
+  //
+  // For example, say we have a file like:
+  //   message Foo {
+  //     optional string foo = 1;
+  //   }
+  // Let's look at just the field definition:
+  //   optional string foo = 1;
+  //   ^       ^^     ^^  ^  ^^^
+  //   a       bc     de  f  ghi
+  // We have the following locations:
+  //   span   path               represents
+  //   [a,i)  [ 4, 0, 2, 0 ]     The whole field definition.
+  //   [a,b)  [ 4, 0, 2, 0, 4 ]  The label (optional).
+  //   [c,d)  [ 4, 0, 2, 0, 5 ]  The type (string).
+  //   [e,f)  [ 4, 0, 2, 0, 1 ]  The name (foo).
+  //   [g,h)  [ 4, 0, 2, 0, 3 ]  The number (1).
+  //
+  // Notes:
+  // - A location may refer to a repeated field itself (i.e. not to any
+  //   particular index within it).  This is used whenever a set of elements are
+  //   logically enclosed in a single code segment.  For example, an entire
+  //   extend block (possibly containing multiple extension definitions) will
+  //   have an outer location whose path refers to the "extensions" repeated
+  //   field without an index.
+  // - Multiple locations may have the same path.  This happens when a single
+  //   logical declaration is spread out across multiple places.  The most
+  //   obvious example is the "extend" block again -- there may be multiple
+  //   extend blocks in the same scope, each of which will have the same path.
+  // - A location's span is not always a subset of its parent's span.  For
+  //   example, the "extendee" of an extension declaration appears at the
+  //   beginning of the "extend" block and is shared by all extensions within
+  //   the block.
+  // - Just because a location's span is a subset of some other location's span
+  //   does not mean that it is a descendant.  For example, a "group" defines
+  //   both a type and a field in a single declaration.  Thus, the locations
+  //   corresponding to the type and field and their components will overlap.
+  // - Code which tries to interpret locations should probably be designed to
+  //   ignore those that it doesn't understand, as more types of locations could
+  //   be recorded in the future.
+  repeated Location location = 1;
+  message Location {
+    // Identifies which part of the FileDescriptorProto was defined at this
+    // location.
+    //
+    // Each element is a field number or an index.  They form a path from
+    // the root FileDescriptorProto to the place where the definition.  For
+    // example, this path:
+    //   [ 4, 3, 2, 7, 1 ]
+    // refers to:
+    //   file.message_type(3)  // 4, 3
+    //       .field(7)         // 2, 7
+    //       .name()           // 1
+    // This is because FileDescriptorProto.message_type has field number 4:
+    //   repeated DescriptorProto message_type = 4;
+    // and DescriptorProto.field has field number 2:
+    //   repeated FieldDescriptorProto field = 2;
+    // and FieldDescriptorProto.name has field number 1:
+    //   optional string name = 1;
+    //
+    // Thus, the above path gives the location of a field name.  If we removed
+    // the last element:
+    //   [ 4, 3, 2, 7 ]
+    // this path refers to the whole field declaration (from the beginning
+    // of the label to the terminating semicolon).
+    repeated int32 path = 1 [packed = true];
+
+    // Always has exactly three or four elements: start line, start column,
+    // end line (optional, otherwise assumed same as start line), end column.
+    // These are packed into a single field for efficiency.  Note that line
+    // and column numbers are zero-based -- typically you will want to add
+    // 1 to each before displaying to a user.
+    repeated int32 span = 2 [packed = true];
+
+    // If this SourceCodeInfo represents a complete declaration, these are any
+    // comments appearing before and after the declaration which appear to be
+    // attached to the declaration.
+    //
+    // A series of line comments appearing on consecutive lines, with no other
+    // tokens appearing on those lines, will be treated as a single comment.
+    //
+    // leading_detached_comments will keep paragraphs of comments that appear
+    // before (but not connected to) the current element. Each paragraph,
+    // separated by empty lines, will be one comment element in the repeated
+    // field.
+    //
+    // Only the comment content is provided; comment markers (e.g. //) are
+    // stripped out.  For block comments, leading whitespace and an asterisk
+    // will be stripped from the beginning of each line other than the first.
+    // Newlines are included in the output.
+    //
+    // Examples:
+    //
+    //   optional int32 foo = 1;  // Comment attached to foo.
+    //   // Comment attached to bar.
+    //   optional int32 bar = 2;
+    //
+    //   optional string baz = 3;
+    //   // Comment attached to baz.
+    //   // Another line attached to baz.
+    //
+    //   // Comment attached to qux.
+    //   //
+    //   // Another line attached to qux.
+    //   optional double qux = 4;
+    //
+    //   // Detached comment for corge. This is not leading or trailing comments
+    //   // to qux or corge because there are blank lines separating it from
+    //   // both.
+    //
+    //   // Detached comment for corge paragraph 2.
+    //
+    //   optional string corge = 5;
+    //   /* Block comment attached
+    //    * to corge.  Leading asterisks
+    //    * will be removed. */
+    //   /* Block comment attached to
+    //    * grault. */
+    //   optional int32 grault = 6;
+    //
+    //   // ignored detached comments.
+    optional string leading_comments = 3;
+    optional string trailing_comments = 4;
+    repeated string leading_detached_comments = 6;
+  }
+}
+
+// Describes the relationship between generated code and its original source
+// file. A GeneratedCodeInfo message is associated with only one generated
+// source file, but may contain references to different source .proto files.
+message GeneratedCodeInfo {
+  // An Annotation connects some span of text in generated code to an element
+  // of its generating .proto file.
+  repeated Annotation annotation = 1;
+  message Annotation {
+    // Identifies the element in the original source .proto file. This field
+    // is formatted the same as SourceCodeInfo.Location.path.
+    repeated int32 path = 1 [packed = true];
+
+    // Identifies the filesystem path to the original source .proto.
+    optional string source_file = 2;
+
+    // Identifies the starting offset in bytes in the generated code
+    // that relates to the identified object.
+    optional int32 begin = 3;
+
+    // Identifies the ending offset in bytes in the generated code that
+    // relates to the identified offset. The end offset should be one past
+    // the last relevant byte (so the length of the text = end - begin).
+    optional int32 end = 4;
+  }
+}
--- a/benchmarks/descriptor_sv.proto
+++ b/benchmarks/descriptor_sv.proto
@ -0,0 +1,894 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: kenton@google.com (Kenton Varda)
+//  Based on original Protocol Buffers design by
+//  Sanjay Ghemawat, Jeff Dean, and others.
+//
+// The messages in this file describe the definitions found in .proto files.
+// A valid .proto file can be translated directly to a FileDescriptorProto
+// without any other information (e.g. without reading its imports).
+
+syntax = "proto2";
+
+package upb_benchmark.sv;
+
+option go_package = "google.golang.org/protobuf/types/descriptorpb";
+option java_package = "com.google.protobuf";
+option java_outer_classname = "DescriptorProtos";
+option csharp_namespace = "Google.Protobuf.Reflection";
+option objc_class_prefix = "GPB";
+option cc_enable_arenas = true;
+
+// descriptor.proto must be optimized for speed because reflection-based
+// algorithms don't work during bootstrapping.
+option optimize_for = SPEED;
+
+// The protocol compiler can output a FileDescriptorSet containing the .proto
+// files it parses.
+message FileDescriptorSet {
+  repeated FileDescriptorProto file = 1;
+}
+
+// Describes a complete .proto file.
+message FileDescriptorProto {
+  optional string name = 1
+      [ctype = STRING_PIECE];  // file name, relative to root of source tree
+  optional string package = 2
+      [ctype = STRING_PIECE];  // e.g. "foo", "foo.bar", etc.
+
+  // Names of files imported by this file.
+  repeated string dependency = 3 [ctype = STRING_PIECE];
+  // Indexes of the public imported files in the dependency list above.
+  repeated int32 public_dependency = 10;
+  // Indexes of the weak imported files in the dependency list.
+  // For Google-internal migration only. Do not use.
+  repeated int32 weak_dependency = 11;
+
+  // All top-level definitions in this file.
+  repeated DescriptorProto message_type = 4;
+  repeated EnumDescriptorProto enum_type = 5;
+  repeated ServiceDescriptorProto service = 6;
+  repeated FieldDescriptorProto extension = 7;
+
+  optional FileOptions options = 8;
+
+  // This field contains optional information about the original source code.
+  // You may safely remove this entire field without harming runtime
+  // functionality of the descriptors -- the information is needed only by
+  // development tools.
+  optional SourceCodeInfo source_code_info = 9;
+
+  // The syntax of the proto file.
+  // The supported values are "proto2" and "proto3".
+  optional string syntax = 12 [ctype = STRING_PIECE];
+}
+
+// Describes a message type.
+message DescriptorProto {
+  optional string name = 1 [ctype = STRING_PIECE];
+
+  repeated FieldDescriptorProto field = 2;
+  repeated FieldDescriptorProto extension = 6;
+
+  repeated DescriptorProto nested_type = 3;
+  repeated EnumDescriptorProto enum_type = 4;
+
+  message ExtensionRange {
+    optional int32 start = 1;  // Inclusive.
+    optional int32 end = 2;    // Exclusive.
+
+    optional ExtensionRangeOptions options = 3;
+  }
+  repeated ExtensionRange extension_range = 5;
+
+  repeated OneofDescriptorProto oneof_decl = 8;
+
+  optional MessageOptions options = 7;
+
+  // Range of reserved tag numbers. Reserved tag numbers may not be used by
+  // fields or extension ranges in the same message. Reserved ranges may
+  // not overlap.
+  message ReservedRange {
+    optional int32 start = 1;  // Inclusive.
+    optional int32 end = 2;    // Exclusive.
+  }
+  repeated ReservedRange reserved_range = 9;
+  // Reserved field names, which may not be used by fields in the same message.
+  // A given name may only be reserved once.
+  repeated string reserved_name = 10 [ctype = STRING_PIECE];
+}
+
+message ExtensionRangeOptions {
+  // The parser stores options it doesn't recognize here. See above.
+  repeated UninterpretedOption uninterpreted_option = 999;
+
+  // Clients can define custom options in extensions of this message. See above.
+  extensions 1000 to max;
+}
+
+// Describes a field within a message.
+message FieldDescriptorProto {
+  enum Type {
+    // 0 is reserved for errors.
+    // Order is weird for historical reasons.
+    TYPE_DOUBLE = 1;
+    TYPE_FLOAT = 2;
+    // Not ZigZag encoded.  Negative numbers take 10 bytes.  Use TYPE_SINT64 if
+    // negative values are likely.
+    TYPE_INT64 = 3;
+    TYPE_UINT64 = 4;
+    // Not ZigZag encoded.  Negative numbers take 10 bytes.  Use TYPE_SINT32 if
+    // negative values are likely.
+    TYPE_INT32 = 5;
+    TYPE_FIXED64 = 6;
+    TYPE_FIXED32 = 7;
+    TYPE_BOOL = 8;
+    TYPE_STRING = 9;
+    // Tag-delimited aggregate.
+    // Group type is deprecated and not supported in proto3. However, Proto3
+    // implementations should still be able to parse the group wire format and
+    // treat group fields as unknown fields.
+    TYPE_GROUP = 10;
+    TYPE_MESSAGE = 11;  // Length-delimited aggregate.
+
+    // New in version 2.
+    TYPE_BYTES = 12;
+    TYPE_UINT32 = 13;
+    TYPE_ENUM = 14;
+    TYPE_SFIXED32 = 15;
+    TYPE_SFIXED64 = 16;
+    TYPE_SINT32 = 17;  // Uses ZigZag encoding.
+    TYPE_SINT64 = 18;  // Uses ZigZag encoding.
+  }
+
+  enum Label {
+    // 0 is reserved for errors
+    LABEL_OPTIONAL = 1;
+    LABEL_REQUIRED = 2;
+    LABEL_REPEATED = 3;
+  }
+
+  optional string name = 1 [ctype = STRING_PIECE];
+  optional int32 number = 3;
+  optional Label label = 4;
+
+  // If type_name is set, this need not be set.  If both this and type_name
+  // are set, this must be one of TYPE_ENUM, TYPE_MESSAGE or TYPE_GROUP.
+  optional Type type = 5;
+
+  // For message and enum types, this is the name of the type.  If the name
+  // starts with a '.', it is fully-qualified.  Otherwise, C++-like scoping
+  // rules are used to find the type (i.e. first the nested types within this
+  // message are searched, then within the parent, on up to the root
+  // namespace).
+  optional string type_name = 6 [ctype = STRING_PIECE];
+
+  // For extensions, this is the name of the type being extended.  It is
+  // resolved in the same manner as type_name.
+  optional string extendee = 2 [ctype = STRING_PIECE];
+
+  // For numeric types, contains the original text representation of the value.
+  // For booleans, "true" or "false".
+  // For strings, contains the default text contents (not escaped in any way).
+  // For bytes, contains the C escaped value.  All bytes >= 128 are escaped.
+  // TODO(kenton):  Base-64 encode?
+  optional string default_value = 7 [ctype = STRING_PIECE];
+
+  // If set, gives the index of a oneof in the containing type's oneof_decl
+  // list.  This field is a member of that oneof.
+  optional int32 oneof_index = 9;
+
+  // JSON name of this field. The value is set by protocol compiler. If the
+  // user has set a "json_name" option on this field, that option's value
+  // will be used. Otherwise, it's deduced from the field's name by converting
+  // it to camelCase.
+  optional string json_name = 10 [ctype = STRING_PIECE];
+
+  optional FieldOptions options = 8;
+
+  // If true, this is a proto3 "optional". When a proto3 field is optional, it
+  // tracks presence regardless of field type.
+  //
+  // When proto3_optional is true, this field must be belong to a oneof to
+  // signal to old proto3 clients that presence is tracked for this field. This
+  // oneof is known as a "synthetic" oneof, and this field must be its sole
+  // member (each proto3 optional field gets its own synthetic oneof). Synthetic
+  // oneofs exist in the descriptor only, and do not generate any API. Synthetic
+  // oneofs must be ordered after all "real" oneofs.
+  //
+  // For message fields, proto3_optional doesn't create any semantic change,
+  // since non-repeated message fields always track presence. However it still
+  // indicates the semantic detail of whether the user wrote "optional" or not.
+  // This can be useful for round-tripping the .proto file. For consistency we
+  // give message fields a synthetic oneof also, even though it is not required
+  // to track presence. This is especially important because the parser can't
+  // tell if a field is a message or an enum, so it must always create a
+  // synthetic oneof.
+  //
+  // Proto2 optional fields do not set this flag, because they already indicate
+  // optional with `LABEL_OPTIONAL`.
+  optional bool proto3_optional = 17;
+}
+
+// Describes a oneof.
+message OneofDescriptorProto {
+  optional string name = 1 [ctype = STRING_PIECE];
+  optional OneofOptions options = 2;
+}
+
+// Describes an enum type.
+message EnumDescriptorProto {
+  optional string name = 1 [ctype = STRING_PIECE];
+
+  repeated EnumValueDescriptorProto value = 2;
+
+  optional EnumOptions options = 3;
+
+  // Range of reserved numeric values. Reserved values may not be used by
+  // entries in the same enum. Reserved ranges may not overlap.
+  //
+  // Note that this is distinct from DescriptorProto.ReservedRange in that it
+  // is inclusive such that it can appropriately represent the entire int32
+  // domain.
+  message EnumReservedRange {
+    optional int32 start = 1;  // Inclusive.
+    optional int32 end = 2;    // Inclusive.
+  }
+
+  // Range of reserved numeric values. Reserved numeric values may not be used
+  // by enum values in the same enum declaration. Reserved ranges may not
+  // overlap.
+  repeated EnumReservedRange reserved_range = 4;
+
+  // Reserved enum value names, which may not be reused. A given name may only
+  // be reserved once.
+  repeated string reserved_name = 5 [ctype = STRING_PIECE];
+}
+
+// Describes a value within an enum.
+message EnumValueDescriptorProto {
+  optional string name = 1 [ctype = STRING_PIECE];
+  optional int32 number = 2;
+
+  optional EnumValueOptions options = 3;
+}
+
+// Describes a service.
+message ServiceDescriptorProto {
+  optional string name = 1 [ctype = STRING_PIECE];
+  repeated MethodDescriptorProto method = 2;
+
+  optional ServiceOptions options = 3;
+}
+
+// Describes a method of a service.
+message MethodDescriptorProto {
+  optional string name = 1 [ctype = STRING_PIECE];
+
+  // Input and output type names.  These are resolved in the same way as
+  // FieldDescriptorProto.type_name, but must refer to a message type.
+  optional string input_type = 2 [ctype = STRING_PIECE];
+  optional string output_type = 3 [ctype = STRING_PIECE];
+
+  optional MethodOptions options = 4;
+
+  // Identifies if client streams multiple client messages
+  optional bool client_streaming = 5 [default = false];
+  // Identifies if server streams multiple server messages
+  optional bool server_streaming = 6 [default = false];
+}
+
+// ===================================================================
+// Options
+
+// Each of the definitions above may have "options" attached.  These are
+// just annotations which may cause code to be generated slightly differently
+// or may contain hints for code that manipulates protocol messages.
+//
+// Clients may define custom options as extensions of the *Options messages.
+// These extensions may not yet be known at parsing time, so the parser cannot
+// store the values in them.  Instead it stores them in a field in the *Options
+// message called uninterpreted_option. This field must have the same name
+// across all *Options messages. We then use this field to populate the
+// extensions when we build a descriptor, at which point all protos have been
+// parsed and so all extensions are known.
+//
+// Extension numbers for custom options may be chosen as follows:
+// * For options which will only be used within a single application or
+//   organization, or for experimental options, use field numbers 50000
+//   through 99999.  It is up to you to ensure that you do not use the
+//   same number for multiple options.
+// * For options which will be published and used publicly by multiple
+//   independent entities, e-mail protobuf-global-extension-registry@google.com
+//   to reserve extension numbers. Simply provide your project name (e.g.
+//   Objective-C plugin) and your project website (if available) -- there's no
+//   need to explain how you intend to use them. Usually you only need one
+//   extension number. You can declare multiple options with only one extension
+//   number by putting them in a sub-message. See the Custom Options section of
+//   the docs for examples:
+//   https://developers.google.com/protocol-buffers/docs/proto#options
+//   If this turns out to be popular, a web service will be set up
+//   to automatically assign option numbers.
+
+message FileOptions {
+  // Sets the Java package where classes generated from this .proto will be
+  // placed.  By default, the proto package is used, but this is often
+  // inappropriate because proto packages do not normally start with backwards
+  // domain names.
+  optional string java_package = 1 [ctype = STRING_PIECE];
+
+  // If set, all the classes from the .proto file are wrapped in a single
+  // outer class with the given name.  This applies to both Proto1
+  // (equivalent to the old "--one_java_file" option) and Proto2 (where
+  // a .proto always translates to a single class, but you may want to
+  // explicitly choose the class name).
+  optional string java_outer_classname = 8 [ctype = STRING_PIECE];
+
+  // If set true, then the Java code generator will generate a separate .java
+  // file for each top-level message, enum, and service defined in the .proto
+  // file.  Thus, these types will *not* be nested inside the outer class
+  // named by java_outer_classname.  However, the outer class will still be
+  // generated to contain the file's getDescriptor() method as well as any
+  // top-level extensions defined in the file.
+  optional bool java_multiple_files = 10 [default = false];
+
+  // This option does nothing.
+  optional bool java_generate_equals_and_hash = 20 [deprecated = true];
+
+  // If set true, then the Java2 code generator will generate code that
+  // throws an exception whenever an attempt is made to assign a non-UTF-8
+  // byte sequence to a string field.
+  // Message reflection will do the same.
+  // However, an extension field still accepts non-UTF-8 byte sequences.
+  // This option has no effect on when used with the lite runtime.
+  optional bool java_string_check_utf8 = 27 [default = false];
+
+  // Generated classes can be optimized for speed or code size.
+  enum OptimizeMode {
+    SPEED = 1;         // Generate complete code for parsing, serialization,
+                       // etc.
+    CODE_SIZE = 2;     // Use ReflectionOps to implement these methods.
+    LITE_RUNTIME = 3;  // Generate code using MessageLite and the lite runtime.
+  }
+  optional OptimizeMode optimize_for = 9 [default = SPEED];
+
+  // Sets the Go package where structs generated from this .proto will be
+  // placed. If omitted, the Go package will be derived from the following:
+  //   - The basename of the package import path, if provided.
+  //   - Otherwise, the package statement in the .proto file, if present.
+  //   - Otherwise, the basename of the .proto file, without extension.
+  optional string go_package = 11 [ctype = STRING_PIECE];
+
+  // Should generic services be generated in each language?  "Generic" services
+  // are not specific to any particular RPC system.  They are generated by the
+  // main code generators in each language (without additional plugins).
+  // Generic services were the only kind of service generation supported by
+  // early versions of google.protobuf.
+  //
+  // Generic services are now considered deprecated in favor of using plugins
+  // that generate code specific to your particular RPC system.  Therefore,
+  // these default to false.  Old code which depends on generic services should
+  // explicitly set them to true.
+  optional bool cc_generic_services = 16 [default = false];
+  optional bool java_generic_services = 17 [default = false];
+  optional bool py_generic_services = 18 [default = false];
+  optional bool php_generic_services = 42 [default = false];
+
+  // Is this file deprecated?
+  // Depending on the target platform, this can emit Deprecated annotations
+  // for everything in the file, or it will be completely ignored; in the very
+  // least, this is a formalization for deprecating files.
+  optional bool deprecated = 23 [default = false];
+
+  // Enables the use of arenas for the proto messages in this file. This applies
+  // only to generated classes for C++.
+  optional bool cc_enable_arenas = 31 [default = true];
+
+  // Sets the objective c class prefix which is prepended to all objective c
+  // generated classes from this .proto. There is no default.
+  optional string objc_class_prefix = 36 [ctype = STRING_PIECE];
+
+  // Namespace for generated classes; defaults to the package.
+  optional string csharp_namespace = 37 [ctype = STRING_PIECE];
+
+  // By default Swift generators will take the proto package and CamelCase it
+  // replacing '.' with underscore and use that to prefix the types/symbols
+  // defined. When this options is provided, they will use this value instead
+  // to prefix the types/symbols defined.
+  optional string swift_prefix = 39 [ctype = STRING_PIECE];
+
+  // Sets the php class prefix which is prepended to all php generated classes
+  // from this .proto. Default is empty.
+  optional string php_class_prefix = 40 [ctype = STRING_PIECE];
+
+  // Use this option to change the namespace of php generated classes. Default
+  // is empty. When this option is empty, the package name will be used for
+  // determining the namespace.
+  optional string php_namespace = 41 [ctype = STRING_PIECE];
+
+  // Use this option to change the namespace of php generated metadata classes.
+  // Default is empty. When this option is empty, the proto file name will be
+  // used for determining the namespace.
+  optional string php_metadata_namespace = 44 [ctype = STRING_PIECE];
+
+  // Use this option to change the package of ruby generated classes. Default
+  // is empty. When this option is not set, the package name will be used for
+  // determining the ruby package.
+  optional string ruby_package = 45 [ctype = STRING_PIECE];
+
+  // The parser stores options it doesn't recognize here.
+  // See the documentation for the "Options" section above.
+  repeated UninterpretedOption uninterpreted_option = 999;
+
+  // Clients can define custom options in extensions of this message.
+  // See the documentation for the "Options" section above.
+  extensions 1000 to max;
+
+  reserved 38;
+}
+
+message MessageOptions {
+  // Set true to use the old proto1 MessageSet wire format for extensions.
+  // This is provided for backwards-compatibility with the MessageSet wire
+  // format.  You should not use this for any other reason:  It's less
+  // efficient, has fewer features, and is more complicated.
+  //
+  // The message must be defined exactly as follows:
+  //   message Foo {
+  //     option message_set_wire_format = true;
+  //     extensions 4 to max;
+  //   }
+  // Note that the message cannot have any defined fields; MessageSets only
+  // have extensions.
+  //
+  // All extensions of your type must be singular messages; e.g. they cannot
+  // be int32s, enums, or repeated messages.
+  //
+  // Because this is an option, the above two restrictions are not enforced by
+  // the protocol compiler.
+  optional bool message_set_wire_format = 1 [default = false];
+
+  // Disables the generation of the standard "descriptor()" accessor, which can
+  // conflict with a field of the same name.  This is meant to make migration
+  // from proto1 easier; new code should avoid fields named "descriptor".
+  optional bool no_standard_descriptor_accessor = 2 [default = false];
+
+  // Is this message deprecated?
+  // Depending on the target platform, this can emit Deprecated annotations
+  // for the message, or it will be completely ignored; in the very least,
+  // this is a formalization for deprecating messages.
+  optional bool deprecated = 3 [default = false];
+
+  // Whether the message is an automatically generated map entry type for the
+  // maps field.
+  //
+  // For maps fields:
+  //     map<KeyType, ValueType> map_field = 1;
+  // The parsed descriptor looks like:
+  //     message MapFieldEntry {
+  //         option map_entry = true;
+  //         optional KeyType key = 1;
+  //         optional ValueType value = 2;
+  //     }
+  //     repeated MapFieldEntry map_field = 1;
+  //
+  // Implementations may choose not to generate the map_entry=true message, but
+  // use a native map in the target language to hold the keys and values.
+  // The reflection APIs in such implementations still need to work as
+  // if the field is a repeated message field.
+  //
+  // NOTE: Do not set the option in .proto files. Always use the maps syntax
+  // instead. The option should only be implicitly set by the proto compiler
+  // parser.
+  optional bool map_entry = 7;
+
+  reserved 8;  // javalite_serializable
+  reserved 9;  // javanano_as_lite
+
+  // The parser stores options it doesn't recognize here. See above.
+  repeated UninterpretedOption uninterpreted_option = 999;
+
+  // Clients can define custom options in extensions of this message. See above.
+  extensions 1000 to max;
+}
+
+message FieldOptions {
+  // The ctype option instructs the C++ code generator to use a different
+  // representation of the field than it normally would.  See the specific
+  // options below.  This option is not yet implemented in the open source
+  // release -- sorry, we'll try to include it in a future version!
+  optional CType ctype = 1 [default = STRING];
+  enum CType {
+    // Default mode.
+    STRING = 0;
+
+    CORD = 1;
+
+    STRING_PIECE = 2;
+  }
+  // The packed option can be enabled for repeated primitive fields to enable
+  // a more efficient representation on the wire. Rather than repeatedly
+  // writing the tag and type for each element, the entire array is encoded as
+  // a single length-delimited blob. In proto3, only explicit setting it to
+  // false will avoid using packed encoding.
+  optional bool packed = 2;
+
+  // The jstype option determines the JavaScript type used for values of the
+  // field.  The option is permitted only for 64 bit integral and fixed types
+  // (int64, uint64, sint64, fixed64, sfixed64).  A field with jstype JS_STRING
+  // is represented as JavaScript string, which avoids loss of precision that
+  // can happen when a large value is converted to a floating point JavaScript.
+  // Specifying JS_NUMBER for the jstype causes the generated JavaScript code to
+  // use the JavaScript "number" type.  The behavior of the default option
+  // JS_NORMAL is implementation dependent.
+  //
+  // This option is an enum to permit additional types to be added, e.g.
+  // goog.math.Integer.
+  optional JSType jstype = 6 [default = JS_NORMAL];
+  enum JSType {
+    // Use the default type.
+    JS_NORMAL = 0;
+
+    // Use JavaScript strings.
+    JS_STRING = 1;
+
+    // Use JavaScript numbers.
+    JS_NUMBER = 2;
+  }
+
+  // Should this field be parsed lazily?  Lazy applies only to message-type
+  // fields.  It means that when the outer message is initially parsed, the
+  // inner message's contents will not be parsed but instead stored in encoded
+  // form.  The inner message will actually be parsed when it is first accessed.
+  //
+  // This is only a hint.  Implementations are free to choose whether to use
+  // eager or lazy parsing regardless of the value of this option.  However,
+  // setting this option true suggests that the protocol author believes that
+  // using lazy parsing on this field is worth the additional bookkeeping
+  // overhead typically needed to implement it.
+  //
+  // This option does not affect the public interface of any generated code;
+  // all method signatures remain the same.  Furthermore, thread-safety of the
+  // interface is not affected by this option; const methods remain safe to
+  // call from multiple threads concurrently, while non-const methods continue
+  // to require exclusive access.
+  //
+  //
+  // Note that implementations may choose not to check required fields within
+  // a lazy sub-message.  That is, calling IsInitialized() on the outer message
+  // may return true even if the inner message has missing required fields.
+  // This is necessary because otherwise the inner message would have to be
+  // parsed in order to perform the check, defeating the purpose of lazy
+  // parsing.  An implementation which chooses not to check required fields
+  // must be consistent about it.  That is, for any particular sub-message, the
+  // implementation must either *always* check its required fields, or *never*
+  // check its required fields, regardless of whether or not the message has
+  // been parsed.
+  optional bool lazy = 5 [default = false];
+
+  // Is this field deprecated?
+  // Depending on the target platform, this can emit Deprecated annotations
+  // for accessors, or it will be completely ignored; in the very least, this
+  // is a formalization for deprecating fields.
+  optional bool deprecated = 3 [default = false];
+
+  // For Google-internal migration only. Do not use.
+  optional bool weak = 10 [default = false];
+
+  // The parser stores options it doesn't recognize here. See above.
+  repeated UninterpretedOption uninterpreted_option = 999;
+
+  // Clients can define custom options in extensions of this message. See above.
+  extensions 1000 to max;
+
+  reserved 4;  // removed jtype
+}
+
+message OneofOptions {
+  // The parser stores options it doesn't recognize here. See above.
+  repeated UninterpretedOption uninterpreted_option = 999;
+
+  // Clients can define custom options in extensions of this message. See above.
+  extensions 1000 to max;
+}
+
+message EnumOptions {
+  // Set this option to true to allow mapping different tag names to the same
+  // value.
+  optional bool allow_alias = 2;
+
+  // Is this enum deprecated?
+  // Depending on the target platform, this can emit Deprecated annotations
+  // for the enum, or it will be completely ignored; in the very least, this
+  // is a formalization for deprecating enums.
+  optional bool deprecated = 3 [default = false];
+
+  reserved 5;  // javanano_as_lite
+
+  // The parser stores options it doesn't recognize here. See above.
+  repeated UninterpretedOption uninterpreted_option = 999;
+
+  // Clients can define custom options in extensions of this message. See above.
+  extensions 1000 to max;
+}
+
+message EnumValueOptions {
+  // Is this enum value deprecated?
+  // Depending on the target platform, this can emit Deprecated annotations
+  // for the enum value, or it will be completely ignored; in the very least,
+  // this is a formalization for deprecating enum values.
+  optional bool deprecated = 1 [default = false];
+
+  // The parser stores options it doesn't recognize here. See above.
+  repeated UninterpretedOption uninterpreted_option = 999;
+
+  // Clients can define custom options in extensions of this message. See above.
+  extensions 1000 to max;
+}
+
+message ServiceOptions {
+  // Note:  Field numbers 1 through 32 are reserved for Google's internal RPC
+  //   framework.  We apologize for hoarding these numbers to ourselves, but
+  //   we were already using them long before we decided to release Protocol
+  //   Buffers.
+
+  // Is this service deprecated?
+  // Depending on the target platform, this can emit Deprecated annotations
+  // for the service, or it will be completely ignored; in the very least,
+  // this is a formalization for deprecating services.
+  optional bool deprecated = 33 [default = false];
+
+  // The parser stores options it doesn't recognize here. See above.
+  repeated UninterpretedOption uninterpreted_option = 999;
+
+  // Clients can define custom options in extensions of this message. See above.
+  extensions 1000 to max;
+}
+
+message MethodOptions {
+  // Note:  Field numbers 1 through 32 are reserved for Google's internal RPC
+  //   framework.  We apologize for hoarding these numbers to ourselves, but
+  //   we were already using them long before we decided to release Protocol
+  //   Buffers.
+
+  // Is this method deprecated?
+  // Depending on the target platform, this can emit Deprecated annotations
+  // for the method, or it will be completely ignored; in the very least,
+  // this is a formalization for deprecating methods.
+  optional bool deprecated = 33 [default = false];
+
+  // Is this method side-effect-free (or safe in HTTP parlance), or idempotent,
+  // or neither? HTTP based RPC implementation may choose GET verb for safe
+  // methods, and PUT verb for idempotent methods instead of the default POST.
+  enum IdempotencyLevel {
+    IDEMPOTENCY_UNKNOWN = 0;
+    NO_SIDE_EFFECTS = 1;  // implies idempotent
+    IDEMPOTENT = 2;       // idempotent, but may have side effects
+  }
+  optional IdempotencyLevel idempotency_level = 34
+      [default = IDEMPOTENCY_UNKNOWN];
+
+  // The parser stores options it doesn't recognize here. See above.
+  repeated UninterpretedOption uninterpreted_option = 999;
+
+  // Clients can define custom options in extensions of this message. See above.
+  extensions 1000 to max;
+}
+
+// A message representing a option the parser does not recognize. This only
+// appears in options protos created by the compiler::Parser class.
+// DescriptorPool resolves these when building Descriptor objects. Therefore,
+// options protos in descriptor objects (e.g. returned by Descriptor::options(),
+// or produced by Descriptor::CopyTo()) will never have UninterpretedOptions
+// in them.
+message UninterpretedOption {
+  // The name of the uninterpreted option.  Each string represents a segment in
+  // a dot-separated name.  is_extension is true iff a segment represents an
+  // extension (denoted with parentheses in options specs in .proto files).
+  // E.g.,{ ["foo", false], ["bar.baz", true], ["qux", false] } represents
+  // "foo.(bar.baz).qux".
+  message NamePart {
+    optional string name_part = 1 [ctype = STRING_PIECE];
+    optional bool is_extension = 2;
+  }
+  repeated NamePart name = 2;
+
+  // The value of the uninterpreted option, in whatever type the tokenizer
+  // identified it as during parsing. Exactly one of these should be set.
+  optional string identifier_value = 3 [ctype = STRING_PIECE];
+  optional uint64 positive_int_value = 4;
+  optional int64 negative_int_value = 5;
+  optional double double_value = 6;
+  optional bytes string_value = 7;
+  optional string aggregate_value = 8 [ctype = STRING_PIECE];
+}
+
+// ===================================================================
+// Optional source code info
+
+// Encapsulates information about the original source file from which a
+// FileDescriptorProto was generated.
+message SourceCodeInfo {
+  // A Location identifies a piece of source code in a .proto file which
+  // corresponds to a particular definition.  This information is intended
+  // to be useful to IDEs, code indexers, documentation generators, and similar
+  // tools.
+  //
+  // For example, say we have a file like:
+  //   message Foo {
+  //     optional string foo = 1 [ctype = STRING_PIECE];
+  //   }
+  // Let's look at just the field definition:
+  //   optional string foo = 1 [ctype = STRING_PIECE];
+  //   ^       ^^     ^^  ^  ^^^
+  //   a       bc     de  f  ghi
+  // We have the following locations:
+  //   span   path               represents
+  //   [a,i)  [ 4, 0, 2, 0 ]     The whole field definition.
+  //   [a,b)  [ 4, 0, 2, 0, 4 ]  The label (optional).
+  //   [c,d)  [ 4, 0, 2, 0, 5 ]  The type (string).
+  //   [e,f)  [ 4, 0, 2, 0, 1 ]  The name (foo).
+  //   [g,h)  [ 4, 0, 2, 0, 3 ]  The number (1).
+  //
+  // Notes:
+  // - A location may refer to a repeated field itself (i.e. not to any
+  //   particular index within it).  This is used whenever a set of elements are
+  //   logically enclosed in a single code segment.  For example, an entire
+  //   extend block (possibly containing multiple extension definitions) will
+  //   have an outer location whose path refers to the "extensions" repeated
+  //   field without an index.
+  // - Multiple locations may have the same path.  This happens when a single
+  //   logical declaration is spread out across multiple places.  The most
+  //   obvious example is the "extend" block again -- there may be multiple
+  //   extend blocks in the same scope, each of which will have the same path.
+  // - A location's span is not always a subset of its parent's span.  For
+  //   example, the "extendee" of an extension declaration appears at the
+  //   beginning of the "extend" block and is shared by all extensions within
+  //   the block.
+  // - Just because a location's span is a subset of some other location's span
+  //   does not mean that it is a descendant.  For example, a "group" defines
+  //   both a type and a field in a single declaration.  Thus, the locations
+  //   corresponding to the type and field and their components will overlap.
+  // - Code which tries to interpret locations should probably be designed to
+  //   ignore those that it doesn't understand, as more types of locations could
+  //   be recorded in the future.
+  repeated Location location = 1;
+  message Location {
+    // Identifies which part of the FileDescriptorProto was defined at this
+    // location.
+    //
+    // Each element is a field number or an index.  They form a path from
+    // the root FileDescriptorProto to the place where the definition.  For
+    // example, this path:
+    //   [ 4, 3, 2, 7, 1 ]
+    // refers to:
+    //   file.message_type(3)  // 4, 3
+    //       .field(7)         // 2, 7
+    //       .name()           // 1
+    // This is because FileDescriptorProto.message_type has field number 4:
+    //   repeated DescriptorProto message_type = 4;
+    // and DescriptorProto.field has field number 2:
+    //   repeated FieldDescriptorProto field = 2;
+    // and FieldDescriptorProto.name has field number 1:
+    //   optional string name = 1 [ctype = STRING_PIECE];
+    //
+    // Thus, the above path gives the location of a field name.  If we removed
+    // the last element:
+    //   [ 4, 3, 2, 7 ]
+    // this path refers to the whole field declaration (from the beginning
+    // of the label to the terminating semicolon).
+    repeated int32 path = 1 [packed = true];
+
+    // Always has exactly three or four elements: start line, start column,
+    // end line (optional, otherwise assumed same as start line), end column.
+    // These are packed into a single field for efficiency.  Note that line
+    // and column numbers are zero-based -- typically you will want to add
+    // 1 to each before displaying to a user.
+    repeated int32 span = 2 [packed = true];
+
+    // If this SourceCodeInfo represents a complete declaration, these are any
+    // comments appearing before and after the declaration which appear to be
+    // attached to the declaration.
+    //
+    // A series of line comments appearing on consecutive lines, with no other
+    // tokens appearing on those lines, will be treated as a single comment.
+    //
+    // leading_detached_comments will keep paragraphs of comments that appear
+    // before (but not connected to) the current element. Each paragraph,
+    // separated by empty lines, will be one comment element in the repeated
+    // field.
+    //
+    // Only the comment content is provided; comment markers (e.g. //) are
+    // stripped out.  For block comments, leading whitespace and an asterisk
+    // will be stripped from the beginning of each line other than the first.
+    // Newlines are included in the output.
+    //
+    // Examples:
+    //
+    //   optional int32 foo = 1;  // Comment attached to foo.
+    //   // Comment attached to bar.
+    //   optional int32 bar = 2;
+    //
+    //   optional string baz = 3 [ctype = STRING_PIECE];
+    //   // Comment attached to baz.
+    //   // Another line attached to baz.
+    //
+    //   // Comment attached to qux.
+    //   //
+    //   // Another line attached to qux.
+    //   optional double qux = 4;
+    //
+    //   // Detached comment for corge. This is not leading or trailing comments
+    //   // to qux or corge because there are blank lines separating it from
+    //   // both.
+    //
+    //   // Detached comment for corge paragraph 2.
+    //
+    //   optional string corge = 5 [ctype = STRING_PIECE];
+    //   /* Block comment attached
+    //    * to corge.  Leading asterisks
+    //    * will be removed. */
+    //   /* Block comment attached to
+    //    * grault. */
+    //   optional int32 grault = 6;
+    //
+    //   // ignored detached comments.
+    optional string leading_comments = 3 [ctype = STRING_PIECE];
+    optional string trailing_comments = 4 [ctype = STRING_PIECE];
+    repeated string leading_detached_comments = 6 [ctype = STRING_PIECE];
+  }
+}
+
+// Describes the relationship between generated code and its original source
+// file. A GeneratedCodeInfo message is associated with only one generated
+// source file, but may contain references to different source .proto files.
+message GeneratedCodeInfo {
+  // An Annotation connects some span of text in generated code to an element
+  // of its generating .proto file.
+  repeated Annotation annotation = 1;
+  message Annotation {
+    // Identifies the element in the original source .proto file. This field
+    // is formatted the same as SourceCodeInfo.Location.path.
+    repeated int32 path = 1 [packed = true];
+
+    // Identifies the filesystem path to the original source .proto.
+    optional string source_file = 2 [ctype = STRING_PIECE];
+
+    // Identifies the starting offset in bytes in the generated code
+    // that relates to the identified object.
+    optional int32 begin = 3;
+
+    // Identifies the ending offset in bytes in the generated code that
+    // relates to the identified offset. The end offset should be one past
+    // the last relevant byte (so the length of the text = end - begin).
+    optional int32 end = 4;
+  }
+}
--- a/cmake/BUILD
+++ b/cmake/BUILD
@ -0,0 +1,89 @@
+load(
+    ":build_defs.bzl",
+    "generated_file_staleness_test",
+)
+load(
+    "//bazel:build_defs.bzl",
+    "make_shell_script",
+)
+
+licenses(["notice"])
+
+exports_files(["staleness_test.py"])
+
+py_library(
+    name = "staleness_test_lib",
+    testonly = 1,
+    srcs = ["staleness_test_lib.py"],
+)
+
+py_binary(
+    name = "make_cmakelists",
+    srcs = ["make_cmakelists.py"],
+)
+
+genrule(
+    name = "gen_cmakelists",
+    srcs = [
+        "//:BUILD",
+        "//:WORKSPACE",
+        "//:cmake_files",
+        ":cmake_files",
+    ],
+    outs = ["generated-in/CMakeLists.txt"],
+    cmd = "$(location :make_cmakelists) $@",
+    tools = [":make_cmakelists"],
+)
+
+genrule(
+    name = "copy_json_ragel",
+    srcs = ["//:upb/json/parser.c"],
+    outs = ["generated-in/upb/json/parser.c"],
+    cmd = "cp $< $@",
+)
+
+genrule(
+    name = "copy_protos",
+    srcs = ["//:descriptor_upb_proto"],
+    outs = [
+        "generated-in/google/protobuf/descriptor.upb.c",
+        "generated-in/google/protobuf/descriptor.upb.h",
+    ],
+    cmd = "cp $(SRCS) $(@D)/generated-in/google/protobuf",
+)
+
+generated_file_staleness_test(
+    name = "test_generated_files",
+    outs = [
+        "CMakeLists.txt",
+        "google/protobuf/descriptor.upb.c",
+        "google/protobuf/descriptor.upb.h",
+        "upb/json/parser.c",
+    ],
+    generated_pattern = "generated-in/%s",
+)
+
+# Test the CMake build #########################################################
+
+filegroup(
+    name = "cmake_files",
+    srcs = glob([
+        "**/*",
+    ]),
+)
+
+make_shell_script(
+    name = "gen_run_cmake_build",
+    out = "run_cmake_build.sh",
+    contents = "find . && mkdir build && cd build && cmake ../cmake && make -j8 && make test",
+)
+
+sh_test(
+    name = "cmake_build",
+    srcs = ["run_cmake_build.sh"],
+    data = [
+        ":cmake_files",
+        "//:cmake_files",
+    ],
+    deps = ["@bazel_tools//tools/bash/runfiles"],
+)
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@ -12,6 +12,7 @@ cmake_minimum_required (VERSION 3.0)
 cmake_policy(SET CMP0048 NEW)

 project(upb)
+set(CMAKE_C_STANDARD 99)


 # Prevent CMake from setting -rdynamic on Linux (!!).
@ -48,8 +49,8 @@ if(UPB_ENABLE_UBSAN)
  set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fsanitize=address")
 endif()

-include_directories(.)
-include_directories(generated_for_cmake)
+include_directories(..)
+include_directories(../cmake)
 include_directories(${CMAKE_CURRENT_BINARY_DIR})

 if(APPLE)
@ -61,21 +62,21 @@ endif()
 enable_testing()

 add_library(port
-  upb/port.c)
+  ../upb/port.c)
 add_library(upb
-  upb/decode.c
-  upb/encode.c
-  upb/msg.c
-  upb/msg.h
-  upb/table.c
-  upb/table.int.h
-  upb/upb.c
-  upb/upb.int.h
-  third_party/wyhash/wyhash.h
-  upb/decode.h
-  upb/encode.h
-  upb/upb.h
-  upb/upb.hpp)
+  ../upb/decode.c
+  ../upb/encode.c
+  ../upb/msg.c
+  ../upb/msg.h
+  ../upb/table.c
+  ../upb/table.int.h
+  ../upb/upb.c
+  ../upb/upb.int.h
+  ../third_party/wyhash/wyhash.h
+  ../upb/decode.h
+  ../upb/encode.h
+  ../upb/upb.h
+  ../upb/upb.hpp)
 target_link_libraries(upb
  port)
 add_library(generated_code_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me INTERFACE)
@ -83,28 +84,28 @@ target_link_libraries(generated_code_support__only_for_generated_code_do_not_use
  table
  upb)
 add_library(reflection
-  upb/def.c
-  upb/msg.h
-  upb/reflection.c
-  upb/def.h
-  upb/def.hpp
-  upb/reflection.h)
+  ../upb/def.c
+  ../upb/msg.h
+  ../upb/reflection.c
+  ../upb/def.h
+  ../upb/def.hpp
+  ../upb/reflection.h)
 target_link_libraries(reflection
  descriptor_upb_proto
  port
  table
  upb)
 add_library(textformat
-  upb/text_encode.c
-  upb/text_encode.h)
+  ../upb/text_encode.c
+  ../upb/text_encode.h)
 target_link_libraries(textformat
  port
  reflection)
 add_library(json
-  upb/json_decode.c
-  upb/json_encode.c
-  upb/json_decode.h
-  upb/json_encode.h)
+  ../upb/json_decode.c
+  ../upb/json_encode.c
+  ../upb/json_decode.h
+  ../upb/json_encode.h)
 target_link_libraries(json
  port
  reflection
@ -114,27 +115,27 @@ target_link_libraries(table INTERFACE
  port
  upb)
 add_library(handlers
-  upb/handlers.c
-  upb/handlers-inl.h
-  upb/sink.c
-  upb/handlers.h
-  upb/sink.h)
+  ../upb/handlers.c
+  ../upb/handlers-inl.h
+  ../upb/sink.c
+  ../upb/handlers.h
+  ../upb/sink.h)
 target_link_libraries(handlers
  port
  reflection
  table
  upb)
 add_library(upb_pb
-  upb/pb/compile_decoder.c
-  upb/pb/decoder.c
-  upb/pb/decoder.int.h
-  upb/pb/encoder.c
-  upb/pb/textprinter.c
-  upb/pb/varint.c
-  upb/pb/varint.int.h
-  upb/pb/decoder.h
-  upb/pb/encoder.h
-  upb/pb/textprinter.h)
+  ../upb/pb/compile_decoder.c
+  ../upb/pb/decoder.c
+  ../upb/pb/decoder.int.h
+  ../upb/pb/encoder.c
+  ../upb/pb/textprinter.c
+  ../upb/pb/varint.c
+  ../upb/pb/varint.int.h
+  ../upb/pb/decoder.h
+  ../upb/pb/encoder.h
+  ../upb/pb/textprinter.h)
 target_link_libraries(upb_pb
  descriptor_upb_proto
  handlers
@ -143,26 +144,12 @@ target_link_libraries(upb_pb
  table
  upb)
 add_library(upb_json
-  generated_for_cmake/upb/json/parser.c
-  upb/json/printer.c
-  upb/json/parser.h
-  upb/json/printer.h)
+  ../cmake/upb/json/parser.c
+  ../upb/json/printer.c
+  ../upb/json/parser.h
+  ../upb/json/printer.h)
 target_link_libraries(upb_json
  upb
  upb_pb)
-add_library(upb_cc_bindings INTERFACE)
-target_link_libraries(upb_cc_bindings INTERFACE
-  descriptor_upb_proto
-  handlers
-  port
-  upb)
-add_library(upb_test
-  tests/testmain.cc
-  tests/test_util.h
-  tests/upb_test.h)
-target_link_libraries(upb_test
-  handlers
-  port
-  upb)


--- a/cmake/README.md
+++ b/cmake/README.md
@ -0,0 +1,23 @@
+
+# upb CMake build (EXPERIMENTAL)
+
+upb's CMake support is experimental. The core library builds successfully
+under CMake, and this is verified by the Bazel tests in this directory.
+However there is no support for building the upb compiler or for generating
+.upb.c/upb.h files. This means upb's CMake support is incomplete at best,
+unless your application is intended to be purely reflective.
+
+If you find this CMake setup useful in its current state, please consider
+filing an issue so we know. If you have suggestions for how it could be
+more useful (and particularly if you can contribute some code for it)
+please feel free to file an issue for that too. Do keep in mind that upb
+does not currently provide any ABI stability, so we want to avoid providing
+a shared library.
+
+The CMakeLists.txt is generated from the Bazel BUILD files using the Python
+scripts in this directory. We want to avoid having two separate sources of
+truth that both need to be updated when a file is added or removed.
+
+This directory also contains some generated files that would be created
+on the fly during a Bazel build. These are automaticaly kept in sync by
+the Bazel test `//cmake:test_generated_files`.
--- a/cmake/build_defs.bzl
+++ b/cmake/build_defs.bzl
@ -0,0 +1,44 @@
+
+def generated_file_staleness_test(name, outs, generated_pattern):
+    """Tests that checked-in file(s) match the contents of generated file(s).
+
+    The resulting test will verify that all output files exist and have the
+    correct contents.  If the test fails, it can be invoked with --fix to
+    bring the checked-in files up to date.
+
+    Args:
+      name: Name of the rule.
+      outs: the checked-in files that are copied from generated files.
+      generated_pattern: the pattern for transforming each "out" file into a
+        generated file.  For example, if generated_pattern="generated/%s" then
+        a file foo.txt will look for generated file generated/foo.txt.
+    """
+
+    script_name = name + ".py"
+    script_src = ":staleness_test.py"
+
+    # Filter out non-existing rules so Blaze doesn't error out before we even
+    # run the test.
+    existing_outs = native.glob(include = outs)
+
+    # The file list contains a few extra bits of information at the end.
+    # These get unpacked by the Config class in staleness_test_lib.py.
+    file_list = outs + [generated_pattern, native.package_name() or ".", name]
+
+    native.genrule(
+        name = name + "_makescript",
+        outs = [script_name],
+        srcs = [script_src],
+        testonly = 1,
+        cmd = "cat $(location " + script_src + ") > $@; " +
+              "sed -i.bak -e 's|INSERT_FILE_LIST_HERE|" + "\\\n  ".join(file_list) + "|' $@",
+    )
+
+    native.py_test(
+        name = name,
+        srcs = [script_name],
+        data = existing_outs + [generated_pattern % file for file in outs],
+        deps = [
+            ":staleness_test_lib",
+        ],
+    )
--- a/generated_for_cmake/google/protobuf/descriptor.upb.c
+++ b/generated_for_cmake/google/protobuf/descriptor.upb.c
--- a/generated_for_cmake/google/protobuf/descriptor.upb.h
+++ b/generated_for_cmake/google/protobuf/descriptor.upb.h
--- a/cmake/make_cmakelists.py
+++ b/cmake/make_cmakelists.py
@ -46,9 +46,9 @@ class BuildFileFunctions(object):
    found_files = []
    for file in files:
        if os.path.isfile(file):
-            found_files.append(file)
-        elif os.path.isfile("generated_for_cmake/" + file):
-            found_files.append("generated_for_cmake/" + file)
+            found_files.append("../" + file)
+        elif os.path.isfile("cmake/" + file):
+            found_files.append("../cmake/" + file)
        else:
            print("Warning: no such file: " + file)

@ -117,6 +117,9 @@ class BuildFileFunctions(object):
  def proto_library(self, **kwargs):
    pass

+  def cc_proto_library(self, **kwargs):
+    pass
+
  def generated_file_staleness_test(self, **kwargs):
    pass

@ -126,6 +129,9 @@ class BuildFileFunctions(object):
  def upb_proto_library(self, **kwargs):
    pass

+  def upb_proto_library_copts(self, **kwargs):
+    pass
+
  def upb_proto_reflection_library(self, **kwargs):
    pass

@ -163,6 +169,7 @@ class WorkspaceFileFunctions(object):

  def workspace(self, **kwargs):
    self.converter.prelude += "project(%s)\n" % (kwargs["name"])
+    self.converter.prelude += "set(CMAKE_C_STANDARD 99)\n"

  def http_archive(self, **kwargs):
    pass
@ -239,8 +246,8 @@ class Converter(object):
      set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fsanitize=address")
    endif()

-    include_directories(.)
-    include_directories(generated_for_cmake)
+    include_directories(..)
+    include_directories(../cmake)
    include_directories(${CMAKE_CURRENT_BINARY_DIR})

    if(APPLE)
@ -260,6 +267,7 @@ converter = Converter()

 def GetDict(obj):
  ret = {}
+  ret["UPB_DEFAULT_COPTS"] = []  # HACK
  for k in dir(obj):
    if not k.startswith("_"):
      ret[k] = getattr(obj, k);
--- a/cmake/staleness_test.py
+++ b/cmake/staleness_test.py
@ -6,7 +6,7 @@ with the actual list of files before we actually run the script.

 from __future__ import absolute_import

-from tools import staleness_test_lib
+from cmake import staleness_test_lib
 import unittest
 import sys

--- a/cmake/staleness_test_lib.py
+++ b/cmake/staleness_test_lib.py
@ -7,6 +7,7 @@ generated_file_staleness_test() rules.
 from __future__ import absolute_import
 from __future__ import print_function

+import sys
 import os
 from shutil import copyfile

@ -47,13 +48,13 @@ def _GetFilePairs(config):

  ret = []

-  has_bazel_genfiles = os.path.exists("bazel-genfiles")
+  has_bazel_genfiles = os.path.exists("bazel-bin")

  for filename in config.file_list:
    target = os.path.join(config.package_name, filename)
    generated = os.path.join(config.package_name, config.pattern % filename)
    if has_bazel_genfiles:
-      generated = os.path.join("bazel-genfiles", generated)
+      generated = os.path.join("bazel-bin", generated)

    # Generated files should always exist.  Blaze should guarantee this before
    # we are run.
@ -61,6 +62,7 @@ def _GetFilePairs(config):
      print("Generated file '%s' does not exist." % generated)
      print("Please run this command to generate it:")
      print("  bazel build %s:%s" % (config.package_name, config.target_name))
+      sys.exit(1)
    ret.append(_FilePair(target, generated))

  return ret
@ -87,10 +89,9 @@ def _GetMissingAndStaleFiles(file_pairs):
      missing_files.append(pair)
      continue

-    generated = open(pair.generated).read()
-    target = open(pair.target).read()
-    if generated != target:
-      stale_files.append(pair)
+    with open(pair.generated) as g, open(pair.target) as t:
+      if g.read() != t.read():
+        stale_files.append(pair)

  return missing_files, stale_files

--- a/generated_for_cmake/upb/json/parser.c
+++ b/generated_for_cmake/upb/json/parser.c
@ -953,7 +953,7 @@ static bool parse_number_from_buffer(upb_json_parser *p, const char *buf,
  upb_fieldtype_t type = upb_fielddef_type(p->top->f);
  double val;
  double dummy;
-  double inf = UPB_INFINITY;
+  double inf = INFINITY;

  errno = 0;

--- a/examples/bazel/BUILD
+++ b/examples/bazel/BUILD
@ -1,6 +1,8 @@
 load("@rules_proto//proto:defs.bzl", "proto_library")
 load("@upb//bazel:upb_proto_library.bzl", "upb_proto_library")

+licenses(["notice"])
+
 proto_library(
    name = "foo_proto",
    srcs = ["foo.proto"],
@ -15,4 +17,5 @@ cc_binary(
    name = "test_binary",
    srcs = ["test_binary.c"],
    deps = [":foo_upbproto"],
+    copts = ["-std=c99"],
 )
--- a/examples/bazel/test_binary.c
+++ b/examples/bazel/test_binary.c
@ -1,7 +1,7 @@

 #include <time.h>

-#include "foo.upb.h"
+#include "examples/bazel/foo.upb.h"

 int main() {
  upb_arena *arena = upb_arena_new();
--- a/kokoro/ubuntu/build.sh
+++ b/kokoro/ubuntu/build.sh
@ -19,12 +19,12 @@ which bazel
 bazel version

 cd $(dirname $0)/../..
-bazel test --test_output=errors :all
+bazel test --test_output=errors ...

 if [[ $(uname) = "Linux" ]]; then
  # Verify the ASAN build.  Have to exclude test_conformance_upb as protobuf
  # currently leaks memory in the conformance test runner.
-  bazel test --copt=-fsanitize=address --linkopt=-fsanitize=address --test_output=errors :all
+  bazel test --copt=-fsanitize=address --linkopt=-fsanitize=address --test_output=errors ...

  # Verify the UBSan build. Have to exclude Lua as the version we are using
  # fails some UBSan tests.
@ -35,5 +35,5 @@ if [[ $(uname) = "Linux" ]]; then
 fi

 if which valgrind; then
-  bazel test --run_under='valgrind --leak-check=full --error-exitcode=1' :all -- -:test_conformance_upb -:cmake_build
+  bazel test --run_under='valgrind --leak-check=full --error-exitcode=1' ... -- -tests:test_conformance_upb -cmake:cmake_build
 fi
--- a/tests/BUILD
+++ b/tests/BUILD
@ -0,0 +1,283 @@
+load(
+    "//bazel:build_defs.bzl",
+    "UPB_DEFAULT_COPTS",
+    "UPB_DEFAULT_CPPOPTS",
+    "make_shell_script",
+)
+load(
+    "//bazel:upb_proto_library.bzl",
+    "upb_proto_library",
+    "upb_proto_reflection_library",
+)
+
+licenses(["notice"])
+
+config_setting(
+    name = "fuzz",
+    values = {"define": "fuzz=true"},
+)
+
+cc_library(
+    name = "upb_test",
+    testonly = 1,
+    srcs = [
+        "testmain.cc",
+    ],
+    hdrs = [
+        "test_util.h",
+        "upb_test.h",
+    ],
+    copts = UPB_DEFAULT_CPPOPTS,
+    deps = [
+        "//:handlers",
+        "//:port",
+        "//:upb",
+    ],
+)
+
+proto_library(
+    name = "test_proto",
+    testonly = 1,
+    srcs = ["test.proto"],
+)
+
+upb_proto_library(
+    name = "test_upb_proto",
+    testonly = 1,
+    deps = [":test_proto"],
+)
+
+cc_test(
+    name = "test_generated_code",
+    srcs = ["test_generated_code.c"],
+    copts = UPB_DEFAULT_COPTS,
+    deps = [
+        ":empty_upbdefs_proto",
+        ":test_messages_proto3_proto_upb",
+        ":test_upb_proto",
+        ":upb_test",
+    ],
+)
+
+proto_library(
+    name = "empty_proto",
+    srcs = ["empty.proto"],
+)
+
+upb_proto_reflection_library(
+    name = "empty_upbdefs_proto",
+    testonly = 1,
+    deps = [":empty_proto"],
+)
+
+upb_proto_library(
+    name = "test_messages_proto3_proto_upb",
+    testonly = 1,
+    deps = ["@com_google_protobuf//:test_messages_proto3_proto"],
+)
+
+proto_library(
+    name = "test_decoder_proto",
+    srcs = [
+        "pb/test_decoder.proto",
+    ],
+)
+
+upb_proto_reflection_library(
+    name = "test_decoder_upb_proto",
+    deps = [":test_decoder_proto"],
+)
+
+cc_test(
+    name = "test_decoder",
+    srcs = ["pb/test_decoder.cc"],
+    copts = UPB_DEFAULT_CPPOPTS,
+    deps = [
+        ":test_decoder_upb_proto",
+        ":upb_test",
+        "//:handlers",
+        "//:port",
+        "//:upb",
+        "//:upb_pb",
+    ],
+)
+
+proto_library(
+    name = "test_cpp_proto",
+    srcs = [
+        "test_cpp.proto",
+    ],
+)
+
+upb_proto_reflection_library(
+    name = "test_cpp_upb_proto",
+    deps = ["test_cpp_proto"],
+)
+
+cc_test(
+    name = "test_cpp",
+    srcs = ["test_cpp.cc"],
+    copts = UPB_DEFAULT_CPPOPTS,
+    deps = [
+        ":test_cpp_upb_proto",
+        ":upb_test",
+        "//:handlers",
+        "//:port",
+        "//:reflection",
+        "//:upb",
+        "//:upb_pb",
+    ],
+)
+
+cc_test(
+    name = "test_table",
+    srcs = ["test_table.cc"],
+    copts = UPB_DEFAULT_CPPOPTS,
+    deps = [
+        ":upb_test",
+        "//:port",
+        "//:table",
+        "//:upb",
+    ],
+)
+
+# OSS-Fuzz test
+cc_binary(
+    name = "file_descriptor_parsenew_fuzzer",
+    testonly = 1,
+    srcs = ["file_descriptor_parsenew_fuzzer.cc"],
+    copts = UPB_DEFAULT_CPPOPTS + select({
+        "//conditions:default": [],
+        ":fuzz": ["-fsanitize=fuzzer,address"],
+    }),
+    defines = select({
+        "//conditions:default": [],
+        ":fuzz": ["HAVE_FUZZER"],
+    }),
+    deps = [
+        "//:descriptor_upb_proto",
+        "//:upb",
+    ],
+)
+
+# copybara:strip_for_google3_begin
+cc_test(
+    name = "test_encoder",
+    srcs = ["pb/test_encoder.cc"],
+    copts = UPB_DEFAULT_CPPOPTS,
+    deps = [
+        ":upb_test",
+        "//:descriptor_upb_proto",
+        "//:descriptor_upb_proto_reflection",
+        "//:upb",
+        "//:upb_pb",
+    ],
+)
+
+proto_library(
+    name = "test_json_enum_from_separate",
+    srcs = ["json/enum_from_separate_file.proto"],
+    deps = [":test_json_proto"],
+)
+
+proto_library(
+    name = "test_json_proto",
+    srcs = ["json/test.proto"],
+)
+
+upb_proto_reflection_library(
+    name = "test_json_upb_proto_reflection",
+    deps = ["test_json_proto"],
+)
+
+upb_proto_library(
+    name = "test_json_enum_from_separate_upb_proto",
+    deps = [":test_json_enum_from_separate"],
+)
+
+upb_proto_library(
+    name = "test_json_upb_proto",
+    deps = [":test_json_proto"],
+)
+
+cc_test(
+    name = "test_json",
+    srcs = [
+        "json/test_json.cc",
+    ],
+    copts = UPB_DEFAULT_CPPOPTS,
+    deps = [
+        ":test_json_upb_proto",
+        ":test_json_upb_proto_reflection",
+        ":upb_test",
+        "//:upb_json",
+    ],
+)
+# copybara:strip_end
+
+upb_proto_library(
+    name = "conformance_proto_upb",
+    testonly = 1,
+    deps = ["@com_google_protobuf//:conformance_proto"],
+)
+
+upb_proto_reflection_library(
+    name = "conformance_proto_upbdefs",
+    testonly = 1,
+    deps = ["@com_google_protobuf//:conformance_proto"],
+)
+
+upb_proto_reflection_library(
+    name = "test_messages_proto2_upbdefs",
+    testonly = 1,
+    deps = ["@com_google_protobuf//:test_messages_proto2_proto"],
+)
+
+upb_proto_reflection_library(
+    name = "test_messages_proto3_upbdefs",
+    testonly = 1,
+    deps = ["@com_google_protobuf//:test_messages_proto3_proto"],
+)
+
+cc_binary(
+    name = "conformance_upb",
+    testonly = 1,
+    srcs = [
+        "conformance_upb.c",
+    ],
+    copts = UPB_DEFAULT_COPTS,
+    data = [
+        "conformance_upb_failures.txt",
+    ],
+    deps = [
+        ":conformance_proto_upb",
+        ":conformance_proto_upbdefs",
+        ":test_messages_proto2_upbdefs",
+        ":test_messages_proto3_upbdefs",
+        "//:json",
+        "//:port",
+        "//:reflection",
+        "//:textformat",
+        "//:upb",
+    ],
+)
+
+make_shell_script(
+    name = "gen_test_conformance_upb",
+    out = "test_conformance_upb.sh",
+    contents = "external/com_google_protobuf/conformance_test_runner " +
+               " --enforce_recommended " +
+               " --failure_list ./tests/conformance_upb_failures.txt" +
+               " ./tests/conformance_upb",
+)
+
+sh_test(
+    name = "test_conformance_upb",
+    srcs = ["test_conformance_upb.sh"],
+    data = [
+        "conformance_upb_failures.txt",
+        ":conformance_upb",
+        "@com_google_protobuf//:conformance_test_runner",
+    ],
+    deps = ["@bazel_tools//tools/bash/runfiles"],
+)
--- a/tests/bindings/lua/BUILD
+++ b/tests/bindings/lua/BUILD
@ -0,0 +1,62 @@
+load(
+    "//upb/bindings/lua:lua_proto_library.bzl",
+    "lua_proto_library",
+)
+load(
+    "//bazel:build_defs.bzl",
+    "UPB_DEFAULT_COPTS",
+)
+
+licenses(["notice"])
+
+cc_test(
+    name = "test_lua",
+    srcs = ["main.c"],
+    data = [
+        "test_upb.lua",
+        ":descriptor_proto_lua",
+        ":test_messages_proto2_proto_lua",
+        ":test_messages_proto3_proto_lua",
+        ":test_proto_lua",
+        "//:third_party/lunit/console.lua",
+        "//:third_party/lunit/lunit.lua",
+        "//upb/bindings/lua:upb.lua",
+        "@com_google_protobuf//:conformance_proto",
+        "@com_google_protobuf//:descriptor_proto",
+    ],
+    copts = UPB_DEFAULT_COPTS,
+    linkstatic = 1,
+    deps = [
+        "//upb/bindings/lua:lupb",
+        "@lua//:liblua",
+    ],
+)
+
+proto_library(
+    name = "test_proto",
+    testonly = 1,
+    srcs = ["test.proto"],
+)
+
+lua_proto_library(
+    name = "test_proto_lua",
+    testonly = 1,
+    deps = [":test_proto"],
+)
+
+lua_proto_library(
+    name = "descriptor_proto_lua",
+    deps = ["@com_google_protobuf//:descriptor_proto"],
+)
+
+lua_proto_library(
+    name = "test_messages_proto3_proto_lua",
+    testonly = 1,
+    deps = ["@com_google_protobuf//:test_messages_proto3_proto"],
+)
+
+lua_proto_library(
+    name = "test_messages_proto2_proto_lua",
+    testonly = 1,
+    deps = ["@com_google_protobuf//:test_messages_proto2_proto"],
+)
--- a/tests/bindings/lua/main.c
+++ b/tests/bindings/lua/main.c
@ -34,7 +34,7 @@ const char *init =
    "upb/bindings/lua/?.lua"
  "'";

-int main() {
+int main(int argc, char **argv) {
  int ret = 0;
  L = luaL_newstate();
  luaL_openlibs(L);
--- a/tests/bindings/lua/test.proto
+++ b/tests/bindings/lua/test.proto
@ -0,0 +1,8 @@
+
+syntax = "proto2";
+
+package upb_test;
+
+message MapTest {
+  map<string, double> map_string_double = 1;
+}
--- a/tests/bindings/lua/test_upb.lua
+++ b/tests/bindings/lua/test_upb.lua
@ -1,7 +1,7 @@

 local upb = require "lupb"
 local lunit = require "lunit"
-local upb_test = require "tests.test_pb"
+local upb_test = require "tests.bindings.lua.test_pb"
 local test_messages_proto3 = require "google.protobuf.test_messages_proto3_pb"
 local test_messages_proto2 = require "google.protobuf.test_messages_proto2_pb"
 local descriptor = require "google.protobuf.descriptor_pb"
--- a/tests/pb/test_decoder.cc
+++ b/tests/pb/test_decoder.cc
@ -43,7 +43,6 @@
 #else  // AMALGAMATED
 #include "upb/handlers.h"
 #include "upb/pb/decoder.h"
-#include "upb/pb/varint.int.h"
 #include "upb/upb.h"
 #endif  // !AMALGAMATED

@ -103,7 +102,7 @@ using std::string;

 void vappendf(string* str, const char *format, va_list args) {
  va_list copy;
-  _upb_va_copy(copy, args);
+  va_copy(copy, args);

  int count = vsnprintf(NULL, 0, format, args);
  if (count >= 0)
@ -136,6 +135,29 @@ void PrintBinary(const string& str) {
  }
 }

+#define UPB_PB_VARINT_MAX_LEN 10
+
+static size_t upb_vencode64(uint64_t val, char *buf) {
+  size_t i;
+  if (val == 0) { buf[0] = 0; return 1; }
+  i = 0;
+  while (val) {
+    uint8_t byte = val & 0x7fU;
+    val >>= 7;
+    if (val) byte |= 0x80U;
+    buf[i++] = byte;
+  }
+  return i;
+}
+
+static uint32_t upb_zzenc_32(int32_t n) {
+  return ((uint32_t)n << 1) ^ (n >> 31);
+}
+
+static uint64_t upb_zzenc_64(int64_t n) {
+  return ((uint64_t)n << 1) ^ (n >> 63);
+}
+
 /* Routines for building arbitrary protos *************************************/

 const string empty;
--- a/tests/pb/test_encoder.cc
+++ b/tests/pb/test_encoder.cc
@ -5,12 +5,65 @@
 #include "google/protobuf/descriptor.upbdefs.h"
 #include "tests/test_util.h"
 #include "tests/upb_test.h"
-#include "upb/bindings/stdc++/string.h"
 #include "upb/pb/decoder.h"
 #include "upb/pb/encoder.h"
 #include "upb/port_def.inc"
 #include "upb/upb.hpp"

+template <class T>
+class FillStringHandler {
+ public:
+  static void SetHandler(upb_byteshandler* handler) {
+    upb_byteshandler_setstartstr(handler, &FillStringHandler::StartString,
+                                 NULL);
+    upb_byteshandler_setstring(handler, &FillStringHandler::StringBuf, NULL);
+  }
+
+ private:
+  // TODO(haberman): add UpbBind/UpbMakeHandler support to BytesHandler so these
+  // can be prettier callbacks.
+  static void* StartString(void *c, const void *hd, size_t size) {
+    UPB_UNUSED(hd);
+    UPB_UNUSED(size);
+
+    T* str = static_cast<T*>(c);
+    str->clear();
+    return c;
+  }
+
+  static size_t StringBuf(void* c, const void* hd, const char* buf, size_t n,
+                          const upb_bufhandle* h) {
+    UPB_UNUSED(hd);
+    UPB_UNUSED(h);
+
+    T* str = static_cast<T*>(c);
+    try {
+      str->append(buf, n);
+      return n;
+    } catch (const std::exception&) {
+      return 0;
+    }
+  }
+};
+
+class StringSink {
+ public:
+  template <class T>
+  explicit StringSink(T* target) {
+    // TODO(haberman): we need to avoid rebuilding a new handler every time,
+    // but with class globals disallowed for google3 C++ this is tricky.
+    upb_byteshandler_init(&handler_);
+    FillStringHandler<T>::SetHandler(&handler_);
+    input_.Reset(&handler_, target);
+  }
+
+  upb::BytesSink input() { return input_; }
+
+ private:
+  upb_byteshandler handler_;
+  upb::BytesSink input_;
+};
+
 void test_pb_roundtrip() {
  std::string input(
      google_protobuf_descriptor_proto_upbdefinit.descriptor.data,
@ -29,7 +82,7 @@ void test_pb_roundtrip() {
  const upb::pb::DecoderMethodPtr method = decoder_cache.Get(md);

  std::string output;
-  upb::StringSink string_sink(&output);
+  StringSink string_sink(&output);
  upb::pb::EncoderPtr encoder =
      upb::pb::EncoderPtr::Create(&arena, encoder_handlers, string_sink.input());
  upb::pb::DecoderPtr decoder =
--- a/tests/pb/test_varint.c
+++ b/tests/pb/test_varint.c
@ -1,126 +0,0 @@
-
-#include <stdio.h>
-#include "upb/pb/varint.int.h"
-#include "tests/upb_test.h"
-
-#include "upb/port_def.inc"
-
-/* Test that we can round-trip from int->varint->int. */
-static void test_varint_for_num(upb_decoderet (*decoder)(const char*),
-                                uint64_t num) {
-  char buf[16];
-  size_t bytes;
-  upb_decoderet r;
-
-  memset(buf, 0xff, sizeof(buf));
-  bytes = upb_vencode64(num, buf);
-
-  if (num <= UINT32_MAX) {
-    uint64_t encoded = upb_vencode32((uint32_t)num);
-    char buf2[16];
-    upb_decoderet r;
-
-    memset(buf2, 0, sizeof(buf2));
-    memcpy(&buf2, &encoded, 8);
-#ifdef UPB_BIG_ENDIAN
-    char swap[8];
-    swap[0] = buf2[7];
-    swap[1] = buf2[6];
-    swap[2] = buf2[5];
-    swap[3] = buf2[4];
-    swap[4] = buf2[3];
-    swap[5] = buf2[2];
-    swap[6] = buf2[1];
-    swap[7] = buf2[0];
-    buf2[0] = swap[0];
-    buf2[1] = swap[1];
-    buf2[2] = swap[2];
-    buf2[3] = swap[3];
-    buf2[4] = swap[4];
-    buf2[5] = swap[5];
-    buf2[6] = swap[6];
-    buf2[7] = swap[7];
-#endif    
-    r = decoder(buf2);
-    ASSERT(r.val == num);
-    ASSERT(r.p == buf2 + upb_value_size(encoded));
-    ASSERT(upb_zzenc_32(upb_zzdec_32((uint32_t)num)) == num);
-  }
-
-  r = decoder(buf);
-  ASSERT(r.val == num);
-  ASSERT(r.p == buf + bytes);
-  ASSERT(upb_zzenc_64(upb_zzdec_64(num)) == num);
-}
-
-/* Making up for the lack of 64-bit constants in C89. */
-static uint64_t make_u64(uint32_t high, uint32_t low) {
-  uint64_t ret = high;
-  ret = (ret << 32) | low;
-  return ret;
-}
-
-static void test_varint_decoder(upb_decoderet (*decoder)(const char*)) {
-#define TEST(bytes, expected_val) {\
-    size_t n = sizeof(bytes) - 1;  /* for NULL */ \
-    char buf[UPB_PB_VARINT_MAX_LEN]; \
-    upb_decoderet r; \
-    memset(buf, 0xff, sizeof(buf)); \
-    memcpy(buf, bytes, n); \
-    r = decoder(buf); \
-    ASSERT(r.val == expected_val); \
-    ASSERT(r.p == buf + n); \
-  }
-
-  uint64_t num;
-
-  char twelvebyte[16] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1};
-  const char *twelvebyte_buf = twelvebyte;
-  /* A varint that terminates before hitting the end of the provided buffer,
-   * but in too many bytes (11 instead of 10). */
-  upb_decoderet r = decoder(twelvebyte_buf);
-  ASSERT(r.p == NULL);
-
-  TEST("\x00", 0UL);
-  TEST("\x01", 1UL);
-  TEST("\x81\x14", 0xa01UL);
-  TEST("\x81\x03", 0x181UL);
-  TEST("\x81\x83\x07", 0x1c181UL);
-  TEST("\x81\x83\x87\x0f", 0x1e1c181UL);
-  TEST("\x81\x83\x87\x8f\x1f", make_u64(0x1, 0xf1e1c181UL));
-  TEST("\x81\x83\x87\x8f\x9f\x3f", make_u64(0x1f9, 0xf1e1c181UL));
-  TEST("\x81\x83\x87\x8f\x9f\xbf\x7f", make_u64(0x1fdf9, 0xf1e1c181UL));
-  TEST("\x81\x83\x87\x8f\x9f\xbf\xff\x01", make_u64(0x3fdf9, 0xf1e1c181UL));
-  TEST("\x81\x83\x87\x8f\x9f\xbf\xff\x81\x03",
-       make_u64(0x303fdf9, 0xf1e1c181UL));
-  TEST("\x81\x83\x87\x8f\x9f\xbf\xff\x81\x83\x07",
-       make_u64(0x8303fdf9, 0xf1e1c181UL));
-#undef TEST
-
-  for (num = 5; num * 1.5 < UINT64_MAX; num *= 1.5) {
-    test_varint_for_num(decoder, num);
-  }
-  test_varint_for_num(decoder, 0);
-}
-
-
-#define TEST_VARINT_DECODER(decoder) \
-  /* Create non-inline versions for convenient inspection of assembly language \
-   * output. */ \
-  upb_decoderet _upb_vdecode_ ## decoder(const char *p) { \
-    return upb_vdecode_ ## decoder(p); \
-  } \
-  void test_ ## decoder(void) { \
-    test_varint_decoder(&_upb_vdecode_ ## decoder); \
-  } \
-
-TEST_VARINT_DECODER(check2_branch32)
-TEST_VARINT_DECODER(check2_branch64)
-
-int run_tests(int argc, char *argv[]) {
-  UPB_UNUSED(argc);
-  UPB_UNUSED(argv);
-  test_check2_branch32();
-  test_check2_branch64();
-  return 0;
-}
--- a/tests/test_cpp.cc
+++ b/tests/test_cpp.cc
@ -952,6 +952,31 @@ void TestArena() {
  }
 }

+void TestInlinedArena() {
+  int n = 100000;
+
+  struct Decrementer {
+    Decrementer(int* _p) : p(_p) {}
+    ~Decrementer() { (*p)--; }
+    int* p;
+  };
+
+  {
+    upb::InlinedArena<1024> arena;
+    for (int i = 0; i < n; i++) {
+      arena.Own(new Decrementer(&n));
+
+      // Intersperse allocation and ensure we can write to it.
+      int* val = static_cast<int*>(upb_arena_malloc(arena.ptr(), sizeof(int)));
+      *val = i;
+    }
+
+    // Test a large allocation.
+    upb_arena_malloc(arena.ptr(), 1000000);
+  }
+  ASSERT(n == 0);
+}
+
 extern "C" {

 int run_tests() {
--- a/tests/test_generated_code.c
+++ b/tests/test_generated_code.c
@ -24,7 +24,7 @@ const int32_t test_int32_2 = -20;
 const int32_t test_int32_3 = 30;
 const int32_t test_int32_4 = -40;

-static void test_scalars() {
+static void test_scalars(void) {
  upb_arena *arena = upb_arena_new();
  protobuf_test_messages_proto3_TestAllTypesProto3 *msg =
      protobuf_test_messages_proto3_TestAllTypesProto3_new(arena);
@ -117,7 +117,7 @@ static void check_string_map_one_entry(
  ASSERT(!const_ent);
 }

-static void test_string_double_map() {
+static void test_string_double_map(void) {
  upb_arena *arena = upb_arena_new();
  upb_strview serialized;
  upb_test_MapTest *msg = upb_test_MapTest_new(arena);
@ -141,7 +141,7 @@ static void test_string_double_map() {
  upb_arena_free(arena);
 }

-static void test_string_map() {
+static void test_string_map(void) {
  upb_arena *arena = upb_arena_new();
  protobuf_test_messages_proto3_TestAllTypesProto3 *msg =
      protobuf_test_messages_proto3_TestAllTypesProto3_new(arena);
@ -259,7 +259,7 @@ static void check_int32_map_one_entry(
  ASSERT(!const_ent);
 }

-static void test_int32_map() {
+static void test_int32_map(void) {
  upb_arena *arena = upb_arena_new();
  protobuf_test_messages_proto3_TestAllTypesProto3 *msg =
      protobuf_test_messages_proto3_TestAllTypesProto3_new(arena);
@ -328,7 +328,7 @@ static void test_int32_map() {
  upb_arena_free(arena);
 }

-void test_repeated() {
+void test_repeated(void) {
  upb_arena *arena = upb_arena_new();
  protobuf_test_messages_proto3_TestAllTypesProto3 *msg =
      protobuf_test_messages_proto3_TestAllTypesProto3_new(arena);
@ -347,7 +347,7 @@ void test_repeated() {
  upb_arena_free(arena);
 }

-void test_null_decode_buf() {
+void test_null_decode_buf(void) {
  upb_arena *arena = upb_arena_new();
  protobuf_test_messages_proto3_TestAllTypesProto3 *msg =
      protobuf_test_messages_proto3_TestAllTypesProto3_parse(NULL, 0, arena);
@ -359,7 +359,7 @@ void test_null_decode_buf() {
  upb_arena_free(arena);
 }

-void test_status_truncation() {
+void test_status_truncation(void) {
  int i, j;
  upb_status status;
  upb_status status2;
--- a/upb/bindings/lua/BUILD
+++ b/upb/bindings/lua/BUILD
@ -0,0 +1,40 @@
+load(
+    "//bazel:build_defs.bzl",
+    "UPB_DEFAULT_COPTS",
+    "UPB_DEFAULT_CPPOPTS",
+)
+
+licenses(["notice"])
+
+cc_library(
+    name = "lupb",
+    srcs = [
+        "def.c",
+        "msg.c",
+        "upb.c",
+    ],
+    hdrs = [
+        "upb.h",
+    ],
+    copts = UPB_DEFAULT_COPTS,
+    visibility = ["//visibility:public"],
+    deps = [
+        "//:reflection",
+        "//:textformat",
+        "//:upb",
+        "@lua//:liblua",
+    ],
+)
+
+cc_binary(
+    name = "protoc-gen-lua",
+    srcs = ["upbc.cc"],
+    copts = UPB_DEFAULT_CPPOPTS,
+    visibility = ["//visibility:public"],
+    deps = [
+        "@com_google_absl//absl/strings",
+        "@com_google_protobuf//:protoc_lib",
+    ],
+)
+
+exports_files(["upb.lua"])
--- a/upb/bindings/lua/def.c
+++ b/upb/bindings/lua/def.c
@ -56,8 +56,8 @@ static void lupb_wrapper_pushwrapper(lua_State *L, int narg, const void *def,
 * wrapper for field |f| of this msgdef.
 */
 void lupb_msgdef_pushsubmsgdef(lua_State *L, const upb_fielddef *f) {
-  assert(luaL_testudata(L, -1, LUPB_MSGDEF));
  const upb_msgdef *m = upb_fielddef_msgsubdef(f);
+  assert(m);
  assert(upb_fielddef_containingtype(f) == lupb_msgdef_check(L, -1));
  lupb_wrapper_pushwrapper(L, -1, m, LUPB_MSGDEF);
  lua_replace(L, -2);  /* Replace msgdef with submsgdef. */
@ -251,8 +251,8 @@ static int lupb_oneofiter_next(lua_State *L) {
 }

 static int lupb_oneofdef_fields(lua_State *L) {
-  lupb_oneofdef_check(L, 1);
  int *index = lua_newuserdata(L, sizeof(int));
+  lupb_oneofdef_check(L, 1);
  *index = 0;

  /* Closure upvalues are: oneofdef, index. */
@ -408,8 +408,8 @@ static int lupb_msgfielditer_next(lua_State *L) {
 }

 static int lupb_msgdef_fields(lua_State *L) {
-  lupb_msgdef_check(L, 1);
  int *index = lua_newuserdata(L, sizeof(int));
+  lupb_msgdef_check(L, 1);
  *index = 0;

  /* Closure upvalues are: msgdef, index. */
@ -441,8 +441,8 @@ static int lupb_msgoneofiter_next(lua_State *L) {
 }

 static int lupb_msgdef_oneofs(lua_State *L) {
-  lupb_msgdef_check(L, 1);
  int *index = lua_newuserdata(L, sizeof(int));
+  lupb_msgdef_check(L, 1);
  *index = 0;

  /* Closure upvalues are: msgdef, index. */
@ -750,7 +750,7 @@ static int lupb_symtab_addfile(lua_State *L) {
  size_t len;
  upb_symtab *s = lupb_symtab_check(L, 1);
  const char *str = luaL_checklstring(L, 2, &len);
-  upb_arena *arena = lupb_arena_pushnew(L);;
+  upb_arena *arena = lupb_arena_pushnew(L);
  const google_protobuf_FileDescriptorProto *file;
  const upb_filedef *file_def;
  upb_status status;
@ -776,7 +776,7 @@ static int lupb_symtab_addset(lua_State *L) {
  google_protobuf_FileDescriptorSet *set;
  upb_symtab *s = lupb_symtab_check(L, 1);
  const char *str = luaL_checklstring(L, 2, &len);
-  upb_arena *arena = lupb_arena_pushnew(L);;
+  upb_arena *arena = lupb_arena_pushnew(L);
  upb_status status;

  upb_status_clear(&status);
--- a/upb/bindings/lua/lua_proto_library.bzl
+++ b/upb/bindings/lua/lua_proto_library.bzl
@ -82,7 +82,7 @@ _lua_proto_library_aspect = aspect(
        "_upbc": attr.label(
            executable = True,
            cfg = "host",
-            default = "//:protoc-gen-lua",
+            default = "//upb/bindings/lua:protoc-gen-lua",
        ),
        "_protoc": attr.label(
            executable = True,
--- a/upb/bindings/lua/msg.c
+++ b/upb/bindings/lua/msg.c
@ -564,8 +564,8 @@ static int lupb_mapiter_next(lua_State *L) {
 *   pairs(map)
 */
 static int lupb_map_pairs(lua_State *L) {
-  lupb_map_check(L, 1);
  size_t *iter = lua_newuserdata(L, sizeof(*iter));
+  lupb_map_check(L, 1);

  *iter = UPB_MAP_BEGIN;
  lua_pushvalue(L, 1);
--- a/upb/bindings/stdc++/string.h
+++ b/upb/bindings/stdc++/string.h
@ -1,69 +0,0 @@
-
-#ifndef UPB_STDCPP_H_
-#define UPB_STDCPP_H_
-
-#include "upb/sink.h"
-
-#include "upb/port_def.inc"
-
-namespace upb {
-
-template <class T>
-class FillStringHandler {
- public:
-  static void SetHandler(upb_byteshandler* handler) {
-    upb_byteshandler_setstartstr(handler, &FillStringHandler::StartString,
-                                 NULL);
-    upb_byteshandler_setstring(handler, &FillStringHandler::StringBuf, NULL);
-  }
-
- private:
-  // TODO(haberman): add UpbBind/UpbMakeHandler support to BytesHandler so these
-  // can be prettier callbacks.
-  static void* StartString(void *c, const void *hd, size_t size) {
-    UPB_UNUSED(hd);
-    UPB_UNUSED(size);
-
-    T* str = static_cast<T*>(c);
-    str->clear();
-    return c;
-  }
-
-  static size_t StringBuf(void* c, const void* hd, const char* buf, size_t n,
-                          const upb_bufhandle* h) {
-    UPB_UNUSED(hd);
-    UPB_UNUSED(h);
-
-    T* str = static_cast<T*>(c);
-    try {
-      str->append(buf, n);
-      return n;
-    } catch (const std::exception&) {
-      return 0;
-    }
-  }
-};
-
-class StringSink {
- public:
-  template <class T>
-  explicit StringSink(T* target) {
-    // TODO(haberman): we need to avoid rebuilding a new handler every time,
-    // but with class globals disallowed for google3 C++ this is tricky.
-    upb_byteshandler_init(&handler_);
-    FillStringHandler<T>::SetHandler(&handler_);
-    input_.Reset(&handler_, target);
-  }
-
-  BytesSink input() { return input_; }
-
- private:
-  upb_byteshandler handler_;
-  BytesSink input_;
-};
-
-}  // namespace upb
-
-#include "upb/port_undef.inc"
-
-#endif  // UPB_STDCPP_H_
--- a/upb/decode.c
+++ b/upb/decode.c
@ -139,10 +139,14 @@ static const int8_t delim_ops[37] = {

 /* Data pertaining to the parse. */
 typedef struct {
-  const char *limit;       /* End of delimited region or end of buffer. */
-  upb_arena arena;
+  const char *end;         /* Can read up to 16 bytes slop beyond this. */
+  const char *limit_ptr;   /* = end + UPB_MIN(limit, 0) */
+  int limit;               /* Submessage limit relative to end. */
  int depth;
  uint32_t end_group; /* Set to field number of END_GROUP tag, if any. */
+  bool alias;
+  char patch[32];
+  upb_arena arena;
  jmp_buf err;
 } upb_decstate;

@ -150,7 +154,7 @@ typedef union {
  bool bool_val;
  uint32_t uint32_val;
  uint64_t uint64_val;
-  upb_strview str_val;
+  uint32_t size;
 } wireval;

 static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
@ -200,41 +204,48 @@ static bool decode_reserve(upb_decstate *d, upb_array *arr, size_t elem) {
  return need_realloc;
 }

+typedef struct {
+  const char *ptr;
+  uint64_t val;
+} decode_vret;
+
 UPB_NOINLINE
-static const char *decode_longvarint64(upb_decstate *d, const char *ptr,
-                                       const char *limit, uint64_t *val) {
-  uint8_t byte;
-  int bitpos = 0;
-  uint64_t out = 0;
-
-  do {
-    if (bitpos >= 70 || ptr == limit) decode_err(d);
-    byte = *ptr;
-    out |= (uint64_t)(byte & 0x7F) << bitpos;
-    ptr++;
-    bitpos += 7;
-  } while (byte & 0x80);
-
-  *val = out;
-  return ptr;
+static decode_vret decode_longvarint64(const char *ptr, uint64_t val) {
+  decode_vret ret = {NULL, 0};
+  uint64_t byte;
+  int i;
+  for (i = 1; i < 10; i++) {
+    byte = (uint8_t)ptr[i];
+    val += (byte - 1) << (i * 7);
+    if (!(byte & 0x80)) {
+      ret.ptr = ptr + i + 1;
+      ret.val = val;
+      return ret;
+    }
+  }
+  return ret;
 }

 UPB_FORCEINLINE
 static const char *decode_varint64(upb_decstate *d, const char *ptr,
-                                   const char *limit, uint64_t *val) {
-  if (UPB_LIKELY(ptr < limit && (*ptr & 0x80) == 0)) {
-    *val = (uint8_t)*ptr;
+                                   uint64_t *val) {
+  uint64_t byte = (uint8_t)*ptr;
+  if (UPB_LIKELY((byte & 0x80) == 0)) {
+    *val = byte;
    return ptr + 1;
  } else {
-    return decode_longvarint64(d, ptr, limit, val);
+    decode_vret res = decode_longvarint64(ptr, byte);
+    if (!res.ptr) decode_err(d);
+    *val = res.val;
+    return res.ptr;
  }
 }

 UPB_FORCEINLINE
 static const char *decode_varint32(upb_decstate *d, const char *ptr,
-                                   const char *limit, uint32_t *val) {
+                                   uint32_t *val) {
  uint64_t u64;
-  ptr = decode_varint64(d, ptr, limit, &u64);
+  ptr = decode_varint64(d, ptr, &u64);
  if (u64 > UINT32_MAX) decode_err(d);
  *val = (uint32_t)u64;
  return ptr;
@ -287,17 +298,82 @@ static upb_msg *decode_newsubmsg(upb_decstate *d, const upb_msglayout *layout,
  return _upb_msg_new_inl(subl, &d->arena);
 }

-static void decode_tosubmsg(upb_decstate *d, upb_msg *submsg,
-                            const upb_msglayout *layout,
-                            const upb_msglayout_field *field, upb_strview val) {
+static int decode_pushlimit(upb_decstate *d, const char *ptr, int size) {
+  int limit = size + (int)(ptr - d->end);
+  int delta = d->limit - limit;
+  d->limit = limit;
+  d->limit_ptr = d->end + UPB_MIN(0, limit);
+  return delta;
+}
+
+static void decode_poplimit(upb_decstate *d, int saved_delta) {
+  d->limit += saved_delta;
+  d->limit_ptr = d->end + UPB_MIN(0, d->limit);
+}
+
+typedef struct {
+  bool ok;
+  const char *ptr;
+} decode_doneret;
+
+UPB_NOINLINE
+static const char *decode_isdonefallback(upb_decstate *d, const char *ptr,
+                                         int overrun) {
+  if (overrun < d->limit) {
+    /* Need to copy remaining data into patch buffer. */
+    UPB_ASSERT(overrun < 16);
+    memset(d->patch + 16, 0, 16);
+    memcpy(d->patch, d->end, 16);
+    ptr = &d->patch[0] + overrun;
+    d->end = &d->patch[16];
+    d->limit -= 16;
+    d->limit_ptr = d->end + d->limit;
+    d->alias = false;
+    UPB_ASSERT(ptr < d->limit_ptr);
+    return ptr;
+  } else {
+    decode_err(d);
+  }
+}
+
+UPB_FORCEINLINE
+static bool decode_isdone(upb_decstate *d, const char **ptr) {
+  int overrun = *ptr - d->end;
+  if (UPB_LIKELY(*ptr < d->limit_ptr)) {
+    return false;
+  } else if (UPB_LIKELY(overrun == d->limit)) {
+    return true;
+  } else {
+    *ptr = decode_isdonefallback(d, *ptr, overrun);
+    return false;
+  }
+}
+
+static const char *decode_readstr(upb_decstate *d, const char *ptr, int size,
+                                  upb_strview *str) {
+  if (d->alias) {
+    str->data = ptr;
+  } else {
+    char *data =  upb_arena_malloc(&d->arena, size);
+    if (!data) decode_err(d);
+    memcpy(data, ptr, size);
+    str->data = data;
+  }
+  str->size = size;
+  return ptr + size;
+}
+
+static const char *decode_tosubmsg(upb_decstate *d, const char *ptr,
+                                   upb_msg *submsg, const upb_msglayout *layout,
+                                   const upb_msglayout_field *field, int size) {
  const upb_msglayout *subl = layout->submsgs[field->submsg_index];
-  const char *saved_limit = d->limit;
+  int saved_delta = decode_pushlimit(d, ptr, size);
  if (--d->depth < 0) decode_err(d);
-  d->limit = val.data + val.size;
-  decode_msg(d, val.data, submsg, subl);
-  d->limit = saved_limit;
+  ptr = decode_msg(d, ptr, submsg, subl);
+  decode_poplimit(d, saved_delta);
  if (d->end_group != 0) decode_err(d);
  d->depth++;
+  return ptr;
 }

 static const char *decode_group(upb_decstate *d, const char *ptr,
@ -345,15 +421,14 @@ static const char *decode_toarray(upb_decstate *d, const char *ptr,
      memcpy(mem, &val, 1 << op);
      return ptr;
    case OP_STRING:
-      decode_verifyutf8(d, val.str_val.data, val.str_val.size);
+      decode_verifyutf8(d, ptr, val.size);
      /* Fallthrough. */
-    case OP_BYTES:
+    case OP_BYTES: {
      /* Append bytes. */
-      mem =
-          UPB_PTR_AT(_upb_array_ptr(arr), arr->len * sizeof(upb_strview), void);
+      upb_strview *str = (upb_strview*)_upb_array_ptr(arr) + arr->len;
      arr->len++;
-      memcpy(mem, &val, sizeof(upb_strview));
-      return ptr;
+      return decode_readstr(d, ptr, val.size, str);
+    }
    case OP_SUBMSG: {
      /* Append submessage / group. */
      upb_msg *submsg = decode_newsubmsg(d, layout, field);
@ -361,26 +436,25 @@ static const char *decode_toarray(upb_decstate *d, const char *ptr,
          submsg;
      arr->len++;
      if (UPB_UNLIKELY(field->descriptortype == UPB_DTYPE_GROUP)) {
-        ptr = decode_togroup(d, ptr, submsg, layout, field);
+        return decode_togroup(d, ptr, submsg, layout, field);
      } else {
-        decode_tosubmsg(d, submsg, layout, field, val.str_val);
+        return decode_tosubmsg(d, ptr, submsg, layout, field, val.size);
      }
-      return ptr;
    }
    case OP_FIXPCK_LG2(2):
    case OP_FIXPCK_LG2(3): {
      /* Fixed packed. */
      int lg2 = op - OP_FIXPCK_LG2(0);
      int mask = (1 << lg2) - 1;
-      size_t count = val.str_val.size >> lg2;
-      if ((val.str_val.size & mask) != 0) {
+      size_t count = val.size >> lg2;
+      if ((val.size & mask) != 0) {
        decode_err(d); /* Length isn't a round multiple of elem size. */
      }
      decode_reserve(d, arr, count);
      mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
      arr->len += count;
-      memcpy(mem, val.str_val.data, val.str_val.size);
-      return ptr;
+      memcpy(mem, ptr, val.size);  /* XXX: ptr boundary. */
+      return ptr + val.size;
    }
    case OP_VARPCK_LG2(0):
    case OP_VARPCK_LG2(2):
@ -388,12 +462,11 @@ static const char *decode_toarray(upb_decstate *d, const char *ptr,
      /* Varint packed. */
      int lg2 = op - OP_VARPCK_LG2(0);
      int scale = 1 << lg2;
-      const char *ptr = val.str_val.data;
-      const char *end = ptr + val.str_val.size;
+      int saved_limit = decode_pushlimit(d, ptr, val.size);
      char *out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
-      while (ptr < end) {
+      while (!decode_isdone(d, &ptr)) {
        wireval elem;
-        ptr = decode_varint64(d, ptr, end, &elem.uint64_val);
+        ptr = decode_varint64(d, ptr, &elem.uint64_val);
        decode_munge(field->descriptortype, &elem);
        if (decode_reserve(d, arr, 1)) {
          out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
@ -402,7 +475,7 @@ static const char *decode_toarray(upb_decstate *d, const char *ptr,
        memcpy(out, &elem, scale);
        out += scale;
      }
-      if (ptr != end) decode_err(d);
+      decode_poplimit(d, saved_limit);
      return ptr;
    }
    default:
@ -410,9 +483,9 @@ static const char *decode_toarray(upb_decstate *d, const char *ptr,
  }
 }

-static void decode_tomap(upb_decstate *d, upb_msg *msg,
-                         const upb_msglayout *layout,
-                         const upb_msglayout_field *field, wireval val) {
+static const char *decode_tomap(upb_decstate *d, const char *ptr, upb_msg *msg,
+                                const upb_msglayout *layout,
+                                const upb_msglayout_field *field, wireval val) {
  upb_map **map_p = UPB_PTR_AT(msg, field->offset, upb_map *);
  upb_map *map = *map_p;
  upb_map_entry ent;
@ -440,10 +513,9 @@ static void decode_tomap(upb_decstate *d, upb_msg *msg,
    ent.v.val = upb_value_ptr(_upb_msg_new(entry->submsgs[0], &d->arena));
  }

-  decode_tosubmsg(d, &ent.k, layout, field, val.str_val);
-
-  /* Insert into map. */
+  ptr = decode_tosubmsg(d, ptr, &ent.k, layout, field, val.size);
  _upb_map_set(map, &ent.k, map->key_size, &ent.v, map->val_size, &d->arena);
+  return ptr;
 }

 static const char *decode_tomsg(upb_decstate *d, const char *ptr, upb_msg *msg,
@ -477,16 +549,15 @@ static const char *decode_tomsg(upb_decstate *d, const char *ptr, upb_msg *msg,
      if (UPB_UNLIKELY(type == UPB_DTYPE_GROUP)) {
        ptr = decode_togroup(d, ptr, submsg, layout, field);
      } else {
-        decode_tosubmsg(d, submsg, layout, field, val.str_val);
+        ptr = decode_tosubmsg(d, ptr, submsg, layout, field, val.size);
      }
      break;
    }
    case OP_STRING:
-      decode_verifyutf8(d, val.str_val.data, val.str_val.size);
+      decode_verifyutf8(d, ptr, val.size);
      /* Fallthrough. */
    case OP_BYTES:
-      memcpy(mem, &val, sizeof(upb_strview));
-      break;
+      return decode_readstr(d, ptr, val.size, mem);
    case OP_SCALAR_LG2(3):
      memcpy(mem, &val, 8);
      break;
@ -505,7 +576,7 @@ static const char *decode_tomsg(upb_decstate *d, const char *ptr, upb_msg *msg,

 static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
                              const upb_msglayout *layout) {
-  while (ptr < d->limit) {
+  while (!decode_isdone(d, &ptr)) {
    uint32_t tag;
    const upb_msglayout_field *field;
    int field_number;
@ -514,7 +585,7 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
    wireval val;
    int op;

-    ptr = decode_varint32(d, ptr, d->limit, &tag);
+    ptr = decode_varint32(d, ptr, &tag);
    field_number = tag >> 3;
    wire_type = tag & 7;

@ -522,12 +593,11 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,

    switch (wire_type) {
      case UPB_WIRE_TYPE_VARINT:
-        ptr = decode_varint64(d, ptr, d->limit, &val.uint64_val);
+        ptr = decode_varint64(d, ptr, &val.uint64_val);
        op = varint_ops[field->descriptortype];
        decode_munge(field->descriptortype, &val);
        break;
      case UPB_WIRE_TYPE_32BIT:
-        if (d->limit - ptr < 4) decode_err(d);
        memcpy(&val.uint32_val, ptr, 4);
        val.uint32_val = _upb_be_swap32(val.uint32_val);
        ptr += 4;
@ -535,7 +605,6 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
        if (((1 << field->descriptortype) & fixed32_ok) == 0) goto unknown;
        break;
      case UPB_WIRE_TYPE_64BIT:
-        if (d->limit - ptr < 8) decode_err(d);
        memcpy(&val.uint64_val, ptr, 8);
        val.uint64_val = _upb_be_swap64(val.uint64_val);
        ptr += 8;
@ -543,16 +612,12 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
        if (((1 << field->descriptortype) & fixed64_ok) == 0) goto unknown;
        break;
      case UPB_WIRE_TYPE_DELIMITED: {
-        uint32_t size;
        int ndx = field->descriptortype;
        if (_upb_isrepeated(field)) ndx += 18;
-        ptr = decode_varint32(d, ptr, d->limit, &size);
-        if (size >= INT32_MAX || (size_t)(d->limit - ptr) < size) {
+        ptr = decode_varint32(d, ptr, &val.size);
+        if (val.size >= INT32_MAX || ptr - d->end + val.size > d->limit) {
          decode_err(d); /* Length overflow. */
        }
-        val.str_val.data = ptr;
-        val.str_val.size = size;
-        ptr += size;
        op = delim_ops[ndx];
        break;
      }
@ -576,7 +641,7 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
          ptr = decode_toarray(d, ptr, msg, layout, field, val, op);
          break;
        case _UPB_LABEL_MAP:
-          decode_tomap(d, msg, layout, field, val);
+          ptr = decode_tomap(d, ptr, msg, layout, field, val);
          break;
        default:
          ptr = decode_tomsg(d, ptr, msg, layout, field, val, op);
@ -590,6 +655,7 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
        ptr = decode_group(d, ptr, NULL, NULL, field_number);
      }
      if (msg) {
+        if (wire_type == UPB_WIRE_TYPE_DELIMITED) ptr += val.size;
        if (!_upb_msg_addunknown(msg, field_start, ptr - field_start,
                                 &d->arena)) {
          decode_err(d);
@ -598,7 +664,6 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
    }
  }

-  if (ptr != d->limit) decode_err(d);
  return ptr;
 }

@ -607,9 +672,22 @@ bool upb_decode(const char *buf, size_t size, void *msg, const upb_msglayout *l,
  bool ok;
  upb_decstate state;

-  if (size == 0) return true;
+  if (size == 0) {
+    return true;
+  } else if (size < 16) {
+    memset(&state.patch, 0, 32);
+    memcpy(&state.patch, buf, size);
+    buf = state.patch;
+    state.end = buf + size;
+    state.limit = 0;
+    state.alias = false;
+  } else {
+    state.end = buf + size - 16;
+    state.limit = 16;
+    state.alias = true;
+  }

-  state.limit = buf + size;
+  state.limit_ptr = state.end;
  state.depth = 64;
  state.end_group = 0;
  state.arena.head = arena->head;
--- a/upb/json/parser.rl
+++ b/upb/json/parser.rl
@ -951,7 +951,7 @@ static bool parse_number_from_buffer(upb_json_parser *p, const char *buf,
  upb_fieldtype_t type = upb_fielddef_type(p->top->f);
  double val;
  double dummy;
-  double inf = UPB_INFINITY;
+  double inf = INFINITY;

  errno = 0;

--- a/upb/json/printer.c
+++ b/upb/json/printer.c
@ -7,7 +7,9 @@

 #include <ctype.h>
 #include <inttypes.h>
+#include <math.h>
 #include <stdint.h>
+#include <stdio.h>
 #include <string.h>
 #include <time.h>

@ -139,7 +141,7 @@ static void putstring(upb_json_printer *p, const char *buf, size_t len) {
      char escape_buf[8];
      if (!escape) {
        unsigned char byte = (unsigned char)c;
-        _upb_snprintf(escape_buf, sizeof(escape_buf), "\\u%04x", (int)byte);
+        snprintf(escape_buf, sizeof(escape_buf), "\\u%04x", (int)byte);
        escape = escape_buf;
      }

@ -178,53 +180,53 @@ const char neginf[] = "\"-Infinity\"";
 const char inf[] = "\"Infinity\"";

 static size_t fmt_double(double val, char* buf, size_t length) {
-  if (val == UPB_INFINITY) {
+  if (val == INFINITY) {
    CHKLENGTH(length >= strlen(inf));
    strcpy(buf, inf);
    return strlen(inf);
-  } else if (val == -UPB_INFINITY) {
+  } else if (val == -INFINITY) {
    CHKLENGTH(length >= strlen(neginf));
    strcpy(buf, neginf);
    return strlen(neginf);
  } else {
-    size_t n = _upb_snprintf(buf, length, "%.17g", val);
+    size_t n = snprintf(buf, length, "%.17g", val);
    CHKLENGTH(n > 0 && n < length);
    return n;
  }
 }

 static size_t fmt_float(float val, char* buf, size_t length) {
-  size_t n = _upb_snprintf(buf, length, "%.8g", val);
+  size_t n = snprintf(buf, length, "%.8g", val);
  CHKLENGTH(n > 0 && n < length);
  return n;
 }

 static size_t fmt_bool(bool val, char* buf, size_t length) {
-  size_t n = _upb_snprintf(buf, length, "%s", (val ? "true" : "false"));
+  size_t n = snprintf(buf, length, "%s", (val ? "true" : "false"));
  CHKLENGTH(n > 0 && n < length);
  return n;
 }

 static size_t fmt_int64_as_number(int64_t val, char* buf, size_t length) {
-  size_t n = _upb_snprintf(buf, length, "%" PRId64, val);
+  size_t n = snprintf(buf, length, "%" PRId64, val);
  CHKLENGTH(n > 0 && n < length);
  return n;
 }

 static size_t fmt_uint64_as_number(uint64_t val, char* buf, size_t length) {
-  size_t n = _upb_snprintf(buf, length, "%" PRIu64, val);
+  size_t n = snprintf(buf, length, "%" PRIu64, val);
  CHKLENGTH(n > 0 && n < length);
  return n;
 }

 static size_t fmt_int64_as_string(int64_t val, char* buf, size_t length) {
-  size_t n = _upb_snprintf(buf, length, "\"%" PRId64 "\"", val);
+  size_t n = snprintf(buf, length, "\"%" PRId64 "\"", val);
  CHKLENGTH(n > 0 && n < length);
  return n;
 }

 static size_t fmt_uint64_as_string(uint64_t val, char* buf, size_t length) {
-  size_t n = _upb_snprintf(buf, length, "\"%" PRIu64 "\"", val);
+  size_t n = snprintf(buf, length, "\"%" PRIu64 "\"", val);
  CHKLENGTH(n > 0 && n < length);
  return n;
 }
@ -870,12 +872,12 @@ static bool printer_enddurationmsg(void *closure, const void *handler_data,
    return false;
  }

-  _upb_snprintf(buffer, sizeof(buffer), "%ld", (long)p->seconds);
+  snprintf(buffer, sizeof(buffer), "%ld", (long)p->seconds);
  base_len = strlen(buffer);

  if (p->nanos != 0) {
    char nanos_buffer[UPB_DURATION_MAX_NANO_LEN + 3];
-    _upb_snprintf(nanos_buffer, sizeof(nanos_buffer), "%.9f",
+    snprintf(nanos_buffer, sizeof(nanos_buffer), "%.9f",
                  p->nanos / 1000000000.0);
    /* Remove trailing 0. */
    for (i = UPB_DURATION_MAX_NANO_LEN + 2;
@ -949,8 +951,8 @@ static bool printer_endtimestampmsg(void *closure, const void *handler_data,
           "%Y-%m-%dT%H:%M:%S", gmtime(&time));
  if (p->nanos != 0) {
    char nanos_buffer[UPB_TIMESTAMP_MAX_NANO_LEN + 3];
-    _upb_snprintf(nanos_buffer, sizeof(nanos_buffer), "%.9f",
-                  p->nanos / 1000000000.0);
+    snprintf(nanos_buffer, sizeof(nanos_buffer), "%.9f",
+             p->nanos / 1000000000.0);
    /* Remove trailing 0. */
    for (i = UPB_TIMESTAMP_MAX_NANO_LEN + 2;
         nanos_buffer[i] == '0'; i--) {
--- a/upb/json_decode.c
+++ b/upb/json_decode.c
@ -5,6 +5,7 @@
 #include <float.h>
 #include <inttypes.h>
 #include <limits.h>
+#include <math.h>
 #include <setjmp.h>
 #include <stdlib.h>
 #include <string.h>
@ -747,11 +748,11 @@ static upb_msgval jsondec_double(jsondec *d, const upb_fielddef *f) {
    case JD_STRING:
      str = jsondec_string(d);
      if (jsondec_streql(str, "NaN")) {
-        val.double_val = UPB_NAN;
+        val.double_val = NAN;
      } else if (jsondec_streql(str, "Infinity")) {
-        val.double_val = UPB_INFINITY;
+        val.double_val = INFINITY;
      } else if (jsondec_streql(str, "-Infinity")) {
-        val.double_val = -UPB_INFINITY;
+        val.double_val = -INFINITY;
      } else {
        val.double_val = strtod(str.data, NULL);
      }
@ -761,7 +762,7 @@ static upb_msgval jsondec_double(jsondec *d, const upb_fielddef *f) {
  }

  if (upb_fielddef_type(f) == UPB_TYPE_FLOAT) {
-    if (val.double_val != UPB_INFINITY && val.double_val != -UPB_INFINITY &&
+    if (val.double_val != INFINITY && val.double_val != -INFINITY &&
        (val.double_val > FLT_MAX || val.double_val < -FLT_MAX)) {
      jsondec_err(d, "Float out of range");
    }
@ -1096,6 +1097,7 @@ static void jsondec_duration(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
  upb_strview str = jsondec_string(d);
  const char *ptr = str.data;
  const char *end = ptr + str.size;
+  const int64_t max = (uint64_t)3652500 * 86400;

  /* "3.000000001s", "3s", etc. */
  ptr = jsondec_buftoint64(d, ptr, end, &seconds.int64_val);
@ -1105,7 +1107,7 @@ static void jsondec_duration(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
    jsondec_err(d, "Malformed duration");
  }

-  if (seconds.int64_val < -315576000000LL || seconds.int64_val > 315576000000LL) {
+  if (seconds.int64_val < -max || seconds.int64_val > max) {
    jsondec_err(d, "Duration out of range");
  }

--- a/upb/json_encode.c
+++ b/upb/json_encode.c
@ -4,14 +4,16 @@
 #include <ctype.h>
 #include <float.h>
 #include <inttypes.h>
+#include <math.h>
+#include <setjmp.h>
 #include <stdarg.h>
 #include <stdio.h>
 #include <string.h>
-#include <setjmp.h>

 #include "upb/decode.h"
 #include "upb/reflection.h"

+/* Must be last. */
 #include "upb/port_def.inc"

 typedef struct {
@ -76,7 +78,7 @@ static void jsonenc_printf(jsonenc *e, const char *fmt, ...) {
  va_list args;

  va_start(args, fmt);
-  n = _upb_vsnprintf(e->ptr, have, fmt, args);
+  n = vsnprintf(e->ptr, have, fmt, args);
  va_end(args);

  if (UPB_LIKELY(have > n)) {
@ -268,9 +270,9 @@ static void jsonenc_string(jsonenc *e, upb_strview str) {
 }

 static void jsonenc_double(jsonenc *e, const char *fmt, double val) {
-  if (val == UPB_INFINITY) {
+  if (val == INFINITY) {
    jsonenc_putstr(e, "\"Infinity\"");
-  } else if (val == -UPB_INFINITY) {
+  } else if (val == -INFINITY) {
    jsonenc_putstr(e, "\"-Infinity\"");
  } else if (val != val) {
    jsonenc_putstr(e, "\"NaN\"");
--- a/upb/pb/textprinter.c
+++ b/upb/pb/textprinter.c
@ -105,8 +105,8 @@ bool putf(upb_textprinter *p, const char *fmt, ...) {
  va_start(args, fmt);

  /* Run once to get the length of the string. */
-  _upb_va_copy(args_copy, args);
-  len = _upb_vsnprintf(NULL, 0, fmt, args_copy);
+  va_copy(args_copy, args);
+  len = vsnprintf(NULL, 0, fmt, args_copy);
  va_end(args_copy);

  /* + 1 for NULL terminator (vsprintf() requires it even if we don't). */
--- a/upb/pb/varint.int.h
+++ b/upb/pb/varint.int.h
@ -150,9 +150,7 @@ UPB_INLINE uint64_t upb_vencode32(uint32_t val) {
  uint64_t ret = 0;
  UPB_ASSERT(bytes <= 5);
  memcpy(&ret, buf, bytes);
-#ifdef UPB_BIG_ENDIAN
-  ret = byteswap64(ret);
-#endif
+  ret = _upb_be_swap64(ret);
  UPB_ASSERT(ret <= 0xffffffffffU);
  return ret;
 }
--- a/upb/port_def.inc
+++ b/upb/port_def.inc
@ -20,6 +20,15 @@
 *
 * This file is private and must not be included by users!
 */
+
+#if !(__STDC_VERSION__ >= 199901L || __cplusplus >= 201103L)
+#error upb requires C99 or C++11
+#endif
+
+#if (defined(_MSC_VER) && _MSC_VER < 1900)
+#error upb requires MSVC >= 2015.
+#endif
+
 #include <stdint.h>
 #include <stddef.h>

@ -68,12 +77,6 @@
 #define UPB_UNLIKELY(x) (x)
 #endif

-/* Define UPB_BIG_ENDIAN manually if you're on big endian and your compiler
- * doesn't provide these preprocessor symbols. */
-#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
-#define UPB_BIG_ENDIAN
-#endif
-
 /* Macros for function attributes on compilers that support them. */
 #ifdef __GNUC__
 #define UPB_FORCEINLINE __inline__ __attribute__((always_inline))
@ -89,49 +92,6 @@
 #define UPB_NORETURN
 #endif

-#if __STDC_VERSION__ >= 199901L || __cplusplus >= 201103L
-/* C99/C++11 versions. */
-#include <stdio.h>
-#define _upb_snprintf snprintf
-#define _upb_vsnprintf vsnprintf
-#define _upb_va_copy(a, b) va_copy(a, b)
-#elif defined(_MSC_VER)
-/* Microsoft C/C++ versions. */
-#include <stdarg.h>
-#include <stdio.h>
-#if _MSC_VER < 1900
-int msvc_snprintf(char* s, size_t n, const char* format, ...);
-int msvc_vsnprintf(char* s, size_t n, const char* format, va_list arg);
-#define UPB_MSVC_VSNPRINTF
-#define _upb_snprintf msvc_snprintf
-#define _upb_vsnprintf msvc_vsnprintf
-#else
-#define _upb_snprintf snprintf
-#define _upb_vsnprintf vsnprintf
-#endif
-#define _upb_va_copy(a, b) va_copy(a, b)
-#elif defined __GNUC__
-/* A few hacky workarounds for functions not in C89.
- * For internal use only!
- * TODO(haberman): fix these by including our own implementations, or finding
- * another workaround.
- */
-#define _upb_snprintf __builtin_snprintf
-#define _upb_vsnprintf __builtin_vsnprintf
-#define _upb_va_copy(a, b) __va_copy(a, b)
-#else
-#error Need implementations of [v]snprintf and va_copy
-#endif
-
-#ifdef __cplusplus
-#if __cplusplus >= 201103L || defined(__GXX_EXPERIMENTAL_CXX0X__) || \
-    (defined(_MSC_VER) && _MSC_VER >= 1900)
-/* C++11 is present */
-#else
-#error upb requires C++11 for C++ support
-#endif
-#endif
-
 #define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
 #define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))

@ -159,29 +119,12 @@ int msvc_vsnprintf(char* s, size_t n, const char* format, va_list arg);
 #define UPB_ASSERT(expr) assert(expr)
 #endif

-/* UPB_ASSERT_DEBUGVAR(): assert that uses functions or variables that only
- * exist in debug mode.  This turns into regular assert. */
-#define UPB_ASSERT_DEBUGVAR(expr) assert(expr)
-
 #if defined(__GNUC__) || defined(__clang__)
 #define UPB_UNREACHABLE() do { assert(0); __builtin_unreachable(); } while(0)
 #else
 #define UPB_UNREACHABLE() do { assert(0); } while(0)
 #endif

-/* UPB_INFINITY representing floating-point positive infinity. */
-#include <math.h>
-#ifdef INFINITY
-#define UPB_INFINITY INFINITY
-#else
-#define UPB_INFINITY (1.0 / 0.0)
-#endif
-#ifdef NAN
-#define UPB_NAN NAN
-#else
-#define UPB_NAN (0.0 / 0.0)
-#endif
-
 #if defined(__SANITIZE_ADDRESS__)
 #define UPB_ASAN 1
 #ifdef __cplusplus
--- a/upb/port_undef.inc
+++ b/upb/port_undef.inc
@ -18,14 +18,7 @@
 #undef UPB_UNUSED
 #undef UPB_ASSUME
 #undef UPB_ASSERT
-#undef UPB_ASSERT_DEBUGVAR
 #undef UPB_UNREACHABLE
-#undef UPB_INFINITY
-#undef UPB_NAN
-#undef UPB_MSVC_VSNPRINTF
-#undef _upb_snprintf
-#undef _upb_vsnprintf
-#undef _upb_va_copy
 #undef UPB_POISON_MEMORY_REGION
 #undef UPB_UNPOISON_MEMORY_REGION
 #undef UPB_ASAN
--- a/upb/text_encode.c
+++ b/upb/text_encode.c
@ -43,7 +43,7 @@ static void txtenc_printf(txtenc *e, const char *fmt, ...) {
  va_list args;

  va_start(args, fmt);
-  n = _upb_vsnprintf(e->ptr, have, fmt, args);
+  n = vsnprintf(e->ptr, have, fmt, args);
  va_end(args);

  if (UPB_LIKELY(have > n)) {
--- a/upb/upb.c
+++ b/upb/upb.c
@ -40,7 +40,7 @@ void upb_status_seterrf(upb_status *status, const char *fmt, ...) {
 void upb_status_vseterrf(upb_status *status, const char *fmt, va_list args) {
  if (!status) return;
  status->ok = false;
-  _upb_vsnprintf(status->msg, sizeof(status->msg), fmt, args);
+  vsnprintf(status->msg, sizeof(status->msg), fmt, args);
  status->msg[UPB_STATUS_MAX_MESSAGE - 1] = '\0';
 }

@ -49,7 +49,7 @@ void upb_status_vappenderrf(upb_status *status, const char *fmt, va_list args) {
  if (!status) return;
  status->ok = false;
  len = strlen(status->msg);
-  _upb_vsnprintf(status->msg + len, sizeof(status->msg) - len, fmt, args);
+  vsnprintf(status->msg + len, sizeof(status->msg) - len, fmt, args);
  status->msg[UPB_STATUS_MAX_MESSAGE - 1] = '\0';
 }

--- a/upb/upb.h
+++ b/upb/upb.h
@ -301,7 +301,7 @@ UPB_INLINE uint32_t _upb_be_swap32(uint32_t val) {
    return val;
  } else {
    return ((val & 0xff) << 24) | ((val & 0xff00) << 8) |
-           ((val & 0xff0000ULL) >> 8) | ((val & 0xff000000ULL) >> 24);
+           ((val & 0xff0000) >> 8) | ((val & 0xff000000) >> 24);
  }
 }

@ -309,11 +309,7 @@ UPB_INLINE uint64_t _upb_be_swap64(uint64_t val) {
  if (_upb_isle()) {
    return val;
  } else {
-    return ((val & 0xff) << 56) | ((val & 0xff00) << 40) |
-           ((val & 0xff0000) << 24) | ((val & 0xff000000) << 8) |
-           ((val & 0xff00000000ULL) >> 8) | ((val & 0xff0000000000ULL) >> 24) |
-           ((val & 0xff000000000000ULL) >> 40) |
-           ((val & 0xff00000000000000ULL) >> 56);
+    return ((uint64_t)_upb_be_swap32(val) << 32) | _upb_be_swap32(val >> 32);
  }
 }

--- a/upb/upb.hpp
+++ b/upb/upb.hpp
@ -41,6 +41,9 @@ class Arena {
 public:
  // A simple arena with no initial memory block and the default allocator.
  Arena() : ptr_(upb_arena_new(), upb_arena_free) {}
+  Arena(char *initial_block, size_t size)
+      : ptr_(upb_arena_init(initial_block, size, &upb_alloc_global),
+             upb_arena_free) {}

  upb_arena* ptr() { return ptr_.get(); }

@ -71,15 +74,12 @@ class Arena {
 template <int N>
 class InlinedArena : public Arena {
 public:
-  InlinedArena() : ptr_(upb_arena_new(&initial_block_, N, &upb_alloc_global)) {}
-
-  upb_arena* ptr() { return ptr_.get(); }
+  InlinedArena() : Arena(initial_block_, N) {}

 private:
  InlinedArena(const InlinedArena*) = delete;
  InlinedArena& operator=(const InlinedArena*) = delete;

-  std::unique_ptr<upb_arena, decltype(&upb_arena_free)> ptr_;
  char initial_block_[N];
 };

--- a/upbc/BUILD
+++ b/upbc/BUILD
@ -0,0 +1,35 @@
+load(
+    "//bazel:build_defs.bzl",
+    "UPB_DEFAULT_CPPOPTS",
+)
+
+licenses(["notice"])
+
+cc_library(
+    name = "upbc_generator",
+    srcs = [
+        "generator.cc",
+        "message_layout.cc",
+        "message_layout.h",
+    ],
+    hdrs = ["generator.h"],
+    copts = UPB_DEFAULT_CPPOPTS,
+    deps = [
+        "@com_google_absl//absl/base:core_headers",
+        "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/strings",
+        "@com_google_protobuf//:protobuf",
+        "@com_google_protobuf//:protoc_lib",
+    ],
+)
+
+cc_binary(
+    name = "protoc-gen-upb",
+    srcs = ["main.cc"],
+    copts = UPB_DEFAULT_CPPOPTS,
+    visibility = ["//visibility:public"],
+    deps = [
+        ":upbc_generator",
+        "@com_google_protobuf//:protoc_lib",
+    ],
+)