Merge branch 'main' into minitable-api

3 years ago · 6f18f3e57e
parent 74bc67bbbc 676a984bb3
commit 6f18f3e57e
39 changed files with 798 additions and 452 deletions
--- a/.github/workflows/bazel_tests.yml
+++ b/.github/workflows/bazel_tests.yml
@ -24,6 +24,7 @@ jobs:
          - { CC: clang, os: ubuntu-20.04, flags: "--//:fasttable_enabled=true -- -cmake:test_generated_files" }
          - { CC: clang, os: ubuntu-20.04, flags: "--config=asan -c dbg -- -benchmarks:benchmark -python/..." }
          - { CC: clang, os: ubuntu-20.04, flags: "--config=ubsan -c dbg -- -benchmarks:benchmark -python/... -upb/bindings/lua/...", install: "libunwind-dev" }
          - { CC: clang, os: ubuntu-20.04, flags: "--copt=-m32 --linkopt=-m32 -- -... benchmarks:benchmark ", install: "g++-multilib" }
          - { CC: clang, os: macos-11, flags: "" }
    steps:
@ -31,7 +32,7 @@ jobs:
      - name: Setup Python venv
        run: rm -rf /tmp/venv && python3 -m venv /tmp/venv
      - name: Install dependencies
-        run: sudo apt install -y ${{ matrix.install }}
+        run: sudo apt update && sudo apt install -y ${{ matrix.install }}
        if: matrix.install != ''
      - name: Run tests
        run: cd ${{ github.workspace }} && PATH=/tmp/venv/bin:$PATH CC=${{ matrix.CC }} bazel test --test_output=errors ... ${{ matrix.flags }}
--- a/37
+++ b/37
@ -77,7 +77,7 @@ cc_library(
        "upb/port_def.inc",
        "upb/port_undef.inc",
    ],
-    visibility = ["//tests:__pkg__"],
+    visibility = ["//:__subpackages__"],
 )
 cc_library(
@ -159,7 +159,9 @@ cc_library(
 cc_library(
    name = "generated_code_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me",
    hdrs = [
        "upb/decode.h",
        "upb/decode_fast.h",
        "upb/encode.h",
        "upb/msg.h",
        "upb/msg_internal.h",
        "upb/port_def.inc",
@ -173,6 +175,22 @@ cc_library(
    ],
 )
 cc_library(
    name = "generated_reflection_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me",
    hdrs = [
        "upb/def.h",
        "upb/port_def.inc",
        "upb/port_undef.inc",
    ],
    copts = UPB_DEFAULT_COPTS,
    visibility = ["//visibility:public"],
    deps = [
        ":descriptor_upb_proto",
        ":reflection",
        ":table",
    ],
 )
 upb_proto_library(
    name = "descriptor_upb_proto",
    visibility = ["//visibility:public"],
@ -222,6 +240,7 @@ cc_library(
    deps = [
        ":port",
        ":reflection",
        ":table",
    ],
 )
@ -254,6 +273,7 @@ cc_test(
        ":empty_upbdefs_proto",
        ":test_messages_proto3_proto_upb",
        ":test_upb_proto",
        ":upb",
        "@com_google_googletest//:gtest_main",
    ],
 )
@ -292,7 +312,11 @@ cc_test(
    srcs = ["upb/msg_test.cc"],
    deps = [
        ":json",
        ":msg_test_upb_proto",
        ":msg_test_upb_proto_reflection",
        ":reflection",
        ":test_messages_proto3_proto_upb",
        ":upb",
        "@com_google_googletest//:gtest_main",
    ],
 )
@ -304,6 +328,12 @@ proto_library(
    deps = ["@com_google_protobuf//:test_messages_proto3_proto"],
 )
 upb_proto_library(
    name = "msg_test_upb_proto",
    testonly = 1,
    deps = [":msg_test_proto"],
 )
 upb_proto_reflection_library(
    name = "msg_test_upb_proto_reflection",
    testonly = 1,
@ -436,7 +466,10 @@ cc_library(
        "upb/table_internal.h",
        "upb/upb.h",
    ],
-    visibility = ["//tests:__pkg__"],
+    visibility = [
        "//python:__pkg__",
        "//tests:__pkg__",
    ],
    deps = [
        ":port",
    ],
--- a/bazel/py_extension.bzl
+++ b/bazel/py_extension.bzl
@ -0,0 +1,39 @@
 load(
    "//bazel:build_defs.bzl",
    "UPB_DEFAULT_COPTS",
 )
 def py_extension(name, srcs, deps=[]):
    version_script = name + "_version_script.lds"
    symbol = "PyInit_" + name
    native.genrule(
        name = "gen_" + version_script,
        outs = [version_script],
        cmd = "echo 'message { global: " + symbol + "; local: *; };' > $@",
    )
    native.cc_binary(
        name = name,
        srcs = srcs,
        copts = UPB_DEFAULT_COPTS + [
            # The Python API requires patterns that are ISO C incompatible, like
            # casts between function pointers and object pointers.
            "-Wno-pedantic",
        ],
        # We use a linker script to hide all symbols except the entry point for
        # the module.
        linkopts = select({
            "@platforms//os:linux": ["-Wl,--version-script,$(location :" + version_script + ")"],
            "@platforms//os:macos": [
                "-Wl,-exported_symbol",
                "-Wl,_" + symbol,
            ],
        }),
        linkshared = True,
        linkstatic = True,
        deps = deps + [
            ":" + version_script,
            "@system_python//:python_headers",
        ],
    )
--- a/bazel/py_proto_library.bzl
+++ b/bazel/py_proto_library.bzl
@ -103,7 +103,7 @@ def _py_proto_library_aspect_impl(target, ctx):
    )
    outs_depset = depset(srcs)
    return [
-        PyInfo(transitive_sources = outs_depset)
+        PyInfo(transitive_sources = outs_depset),
    ]
 _py_proto_library_aspect = aspect(
--- a/bazel/pyproto_test_wrapper.bzl
+++ b/bazel/pyproto_test_wrapper.bzl
@ -0,0 +1,34 @@
 # copybara:strip_for_google3_begin
 def pyproto_test_wrapper(name):
    src = name + "_wrapper.py"
    native.py_test(
        name = name,
        srcs = [src],
        legacy_create_init = False,
        main = src,
        data = ["@com_google_protobuf//:testdata"],
        deps = [
            "//python:message_ext",
            "@com_google_protobuf//:python_common_test_protos",
            "@com_google_protobuf//:python_specific_test_protos",
            "@com_google_protobuf//:python_srcs",
        ],
    )
 # copybara:replace_for_google3_begin
 #
 # def pyproto_test_wrapper(name):
 #     src = name + "_wrapper.py"
 #     native.py_test(
 #         name = name,
 #         srcs = [src],
 #         main = src,
 #         deps = [
 #             "//net/proto2/python/internal:" + name + "_for_deps",
 #             "//net/proto2/python/public:use_upb_protos",
 #         ],
 #     )
 #
 # copybara:replace_for_google3_end
--- a/bazel/upb_proto_library.bzl
+++ b/bazel/upb_proto_library.bzl
@ -299,7 +299,6 @@ _upb_proto_library_aspect = aspect(
        ),
        "_upb": attr.label_list(default = [
            "//:generated_code_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me",
            "//:upb",
        ]),
        "_fasttable_enabled": attr.label(default = "//:fasttable_enabled"),
    }),
@ -348,9 +347,7 @@ _upb_proto_reflection_library_aspect = aspect(
        ),
        "_upbdefs": attr.label_list(
            default = [
-                "//:generated_code_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me",
+                "//:generated_reflection_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me",
                "//:upb",
                "//:reflection",
            ],
        ),
    }),
--- a/bazel/workspace_deps.bzl
+++ b/bazel/workspace_deps.bzl
@ -23,7 +23,7 @@ def upb_deps():
            "rm python/google/protobuf/__init__.py",
            "rm python/google/protobuf/pyext/__init__.py",
            "rm python/google/protobuf/internal/__init__.py",
-        ]
+        ],
    )
    rules_python_version = "740825b7f74930c62f44af95c9a4c1bd428d2c53"  # Latest @ 2021-06-23
--- a/benchmarks/BUILD
+++ b/benchmarks/BUILD
@ -73,7 +73,7 @@ cc_proto_library(
    deps = [":benchmark_descriptor_sv_proto"],
 )
-cc_binary(
+cc_test(
    name = "benchmark",
    testonly = 1,
    srcs = ["benchmark.cc"],
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@ -103,6 +103,11 @@ add_library(generated_code_support__only_for_generated_code_do_not_use__i_give_p
 target_link_libraries(generated_code_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me INTERFACE
  table
  upb)
 add_library(generated_reflection_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me INTERFACE)
 target_link_libraries(generated_reflection_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me INTERFACE
  descriptor_upb_proto
  reflection
  table)
 add_library(reflection
  ../upb/def.c
  ../upb/msg.h
@ -122,7 +127,8 @@ add_library(textformat
  ../upb/text_encode.h)
 target_link_libraries(textformat
  port
-  reflection)
+  reflection
  table)
 add_library(json
  ../upb/json_decode.c
  ../upb/json_encode.c
--- a/doc/vs-cpp-protos.md
+++ b/doc/vs-cpp-protos.md
@ -0,0 +1,254 @@
 # upb vs. C++ Protobuf Design
 [upb](https://github.com/protocolbuffers/upb) is a small C protobuf library.
 While some of the design follows in the footsteps of the C++ Protobuf Library,
 upb departs from C++'s design in several key ways.  This document compares
 and contrasts the two libraries on several design points.
 ## Design Goals
 Before we begin, it is worth calling out that upb and C++ have different design
 goals, and this motivates some of the differences we will see.
 C++ protobuf is a user-level library: it is designed to be used directly by C++
 applications.  These applications will expect a full-featured C++ API surface
 that uses C++ idioms.  The C++ library is also willing to add features to
 increase server performance, even if these features would add size or complexity
 to the library.  Because C++ protobuf is a user-level library, API stability is
 of utmost importance: breaking API changes are rare and carefully managed when
 they do occur.  The focus on C++ also means that ABI compatibility with C is not
 a priority.
 upb, on the other hand, is designed primarily to be wrapped by other languages.
 It is a C protobuf kernel that forms the basis on which a user-level protobuf
 library can be built.  This means we prefer to keep the API surface as small and
 orthogonal as possible.  While upb supports all protobuf features required for
 full conformance, upb prioritizes simplicity and small code size, and avoids
 adding features like lazy fields that can accelerate some use cases but at great
 cost in terms of complexity.  As upb is not aimed directly at users, there is
 much more freedom to make API-breaking changes when necessary, which helps the
 core to stay small and simple.  We want to be compatible with all FFI
 interfaces, so C ABI compatibility is a must.
 Despite these differences, C++ protos and upb offer [roughly the same core set
 of features](https://github.com/protocolbuffers/upb#features).
 ## Arenas
 upb and C++ protos both offer arena allocation, but there are some key
 differences.
 ### C++
 As a matter of history, when C++ protos were open-sourced in 2008, they did not
 support arenas.  Originally there was only unique ownership, whereby each
 message uniquely owns all child messages and will free them when the parent is
 freed.
 Arena allocation was added as a feature in 2014 as a way of dramatically
 reducing allocation and (especially) deallocation costs.  But the library was
 not at liberty to remove the unique ownership model, because it would break far
 too many users.  As a result, C++ has supported a **hybrid allocation model**
 ever since, allowing users to allocate messages either directly from the
 stack/heap or from an arena.  The library attempts to ensure that there are
 no dangling pointers by performing automatic copies in some cases (for example
 `a->set_allocated_b(b)`, where `a` and `b` are on different arenas).
 C++'s arena object itself `google::protobuf::Arena` is **thread-safe** by
 design, which allows users to allocate from multiple threads simultaneously
 without external synchronization.  The user can supply an initial block of
 memory to the arena, and can choose some parameters to control the arena block
 size.  The user can also supply block alloc/dealloc functions, but the alloc
 function is expected to always return some memory.  The C++ library in general
 does not attempt to handle out of memory conditions.
 ### upb
 upb uses **arena allocation exclusively**. All messages must be allocated from
 an arena, and can only be freed by freeing the arena.  It is entirely the user's
 responsibility to ensure that there are no dangling pointers: when a user sets a
 message field, this will always trivially overwrite the pointer and will never
 perform an implicit copy.
 upb's `upb::Arena` is **thread-compatible**, which means it cannot be used
 concurrently without synchronization.  The arena can be seeded with an initial
 block of memory, but it does not explicitly support any parameters for choosing
 block size.  It support a custom alloc/dealloc function, and this function is
 allowed to return `NULL` if no dynamic memory is available.  This allows upb
 arenas to have a max/fixed size, and makes it possible in theory to write code
 that is tolerant to out-of-memory errors.
 upb's arena also supports a novel operation known as **fuse**, which joins two
 arenas together into a single lifetime.  Though both arenas must still be freed
 separately, none of the memory will actually be freed until *both* arenas have
 been freed.  This is useful for avoiding dangling pointers when reparenting a
 message with one that may be on a different arena.
 ### Comparison
 **hybrid allocation vs. arena-only**:
 * The C++ hybrid allocation model introduces a great deal of complexity and
  unpredictability into the library.  upb benefits from having a much simpler
  and more predictable design.
 * Some of the complexity in C++'s hybrid model arises from the fact that arenas
  were added after the fact.  Designing for a hybrid model from the outset
  would likely yield a simpler result.
 * Unique ownership does support some usage patterns that arenas cannot directly
  accommodate.  For example, you can reparent a message and the child will precisely
  follow the lifetime of its new parent.  An arena would require you to either
  perform a deep copy or extend the lifetime.
 **thread-compatible vs. thread-safe arena**
 * A thread-safe arena (as in C++) is safer and easier to use.  A thread-compatible
  arena requires that the user prove that the arena cannot be used concurrently.
 * [Thread Sanitizer](https://github.com/google/sanitizers/wiki/ThreadSanitizerCppManual)
  is far more accessible than it was in 2014 (when C++ introduced a thread-safe
  arena).  We now have more tools at our disposal to ensure that we do not trigger
  data races in a thread-compatible arena like upb.
 * Thread-compatible arenas are more performant.
 * Thread-compatible arenas have a far simpler implementation.  The C++ thread-safe
  arena relies on thread-local variables, which introduce complications on some
  platforms.  It also requires far more subtle reasoning for correctness and
  performance.
 **fuse vs. no fuse**
 * The `upb_Arena_Fuse()` operation is a key part of how upb supports reparenting
  of messages when the parent may be on a different arena.  Without this, upb has
  no way of supporting `foo.bar = bar` in dynamic languages without performing a
  deep copy.
 * A downside of `upb_Arena_Fuse()` is that passing an arena to a function can allow
  that function to extend the lifetime of the arena in potentially
  unpredictable ways.  This can be prevented if necessary, as fuse can fail, eg. if
  one arena has an initial block.  But this adds some complexity by requiring callers
  to handle the case where fuse fails.
 ## Code Generation vs. Tables
 The C++ protobuf library has always been built around code generation, while upb
 generates only tables.  In other words, `foo.pb.cc` files contain functions,
 whereas `foo.upb.c` files emit only data structures.
 ### C++
 C++ generated code emits a large number of functions into `foo.pb.cc` files.
 An incomplete list:
 * `FooMsg::FooMsg()` (constructor): initializes all fields to their default value.
 * `FooMsg::~FooMsg()` (destructor): frees any present child messages.
 * `FooMsg::Clear()`: clears all fields back to their default/empty value.
 * `FooMsg::_InternalParse()`: generated code for parsing a message.
 * `FooMsg::_InternalSerialize()`: generated code for serializing a message.
 * `FooMsg::ByteSizeLong()`: calculates serialized size, as a first pass before serializing.
 * `FooMsg::MergeFrom()`: copies/appends present fields from another message.
 * `FooMsg::IsInitialized()`: checks whether required fields are set.
 This code lives in the `.text` section and contains function calls to the generated
 classes for child messages.
 ### upb
 upb does not generate any code into `foo.upb.c` files, only data structures.  upb uses a
 compact data table known as a *mini table* to represent the schema and all fields.
 upb uses mini tables to perform all of the operations that would traditionally be done
 with generated code.  Revisiting the list from the previous section:
 * `FooMsg::FooMsg()` (constructor): upb instead initializes all messages with `memset(msg, 0, size)`.
   Non-zero defaults are injected in the accessors.
 * `FooMsg::~FooMsg()` (destructor): upb messages are freed by freeing the arena.
 * `FooMsg::Clear()`: can be performed with `memset(msg, 0, size)`.
 * `FooMsg::_InternalParse()`: upb's parser uses mini tables as data, instead of generating code.
 * `FooMsg::_InternalSerialize()`: upb's serializer also uses mini-tables instead of generated code.
 * `FooMsg::ByteSizeLong()`: upb performs serialization in reverse so that an initial pass is not required.
 * `FooMsg::MergeFrom()`: upb supports this via serialize+parse from the other message.
 * `FooMsg::IsInitialized()`: upb's encoder and decoder have special flags to check for required fields.
  A util library `upb/util/required_fields.h` handles the corner cases.
 ### Comparison
 If we compare compiled code size, upb is far smaller.  Here is a comparison of the code
 size of a trivial binary that does nothing but a parse and serialize of `descriptor.proto`.
 This means we are seeing both the overhead of the core library itself as well as the
 generated code (or table) for `descriptor.proto`.  (For extra clarity we should break this
 down by generated code vs core library in the future).
 | Library         | `.text` | `.data` | `.bss` |
 |------------     |---------|---------|--------|
 | upb             |  26Ki   | 0.6Ki   | 0.01Ki |
 | C++ (lite)      | 187Ki   | 2.8Ki   | 1.25Ki |
 | C++ (code size) | 904Ki   | 6.1Ki   | 1.88Ki |
 | C++ (full)      | 983Ki   | 6.1Ki   | 1.88Ki |
 "C++ (code size)" refers to protos compiled with `optimize_for = CODE_SIZE`, a mode
 in which generated code contains reflection only, in an attempt to make the
 generated code size smaller (however it requires the full runtime instead
 of the lite runtime).
 ## Bifurcated vs. Optional Reflection
 upb and C++ protos both offer reflection without making it mandatory.  However
 the models for enabling/disabling reflection are very different.
 ### C++
 C++ messages offer full reflection by default.  Messages in C++ generally
 derive from `Message`, and the base class provides a member function
 `Reflection* Message::GetReflection()` which returns the reflection object.
 It follows that any message deriving from `Message` will always have reflection
 linked into the binary, whether or not the reflection object is ever used.
 Because `GetReflection()` is a function on the base class, it is not possible
 to statically determine if a given message's reflection is used:
 ```c++
 Reflection* GetReflection(const Message& message) {
    // Can refer to any message in the whole binary.
    return message.GetReflection();
 }
 ```
 The C++ library does provide a way of omitting reflection: `MessageLite`.  We can
 cause a message to be lite in two different ways:
 * `optimize_for = LITE_RUNTIME` in a `.proto` file will cause all messages in that
  file to be lite.
 * `lite` as a codegen param: this will force all messages to lite, even if the
  `.proto` file does not have `optimize_for = LITE_RUNTIME`.
 A lite message will derive from `MessageLite` instead of `Message`.  Since
 `MessageLite` has no `GetReflection()` function, this means no reflection is
 available, so we can avoid taking the code size hit.
 ### upb
 upb does not have the `Message` vs. `MessageLite` bifurcation.  There is only one
 kind of message type `upb_Message`, which means there is no need to configure in
 a `.proto` file which messages will need reflection and which will not.
 Every message has the *option* to link in reflection from a separate `foo.upbdefs.o`
 file, without needing to change the message itself in any way.
 upb does not provide the equivalent of `Message::GetReflection()`: there is no
 facility for retrieving the reflection of a message whose type is not known statically.
 It would be possible to layer such a facility on top of the upb core, though this
 would probably require some kind of code generation.
 ### Comparison
 * Most messages in C++ will not bother to declare themselves as "lite".  This means
  that many C++ messages will link in reflection even when it is never used, bloating
  binaries unnecessarily.
 * `optimize_for = LITE_RUNTIME` is difficult to use in practice, because it prevents 
  any non-lite protos from `import`ing that file.
 * Forcing all protos to lite via a codegen parameter (for example, when building for
  mobile) is more practical than `optimize_for = LITE_RUNTIME`.  But this will break
  the compile for any code that tries to upcast to `Message`, or tries to use a
  non-lite method.
 * The one major advantage of the C++ model is that it can support `msg.DebugString()`
  on a type-erased proto.  For upb you have to explicitly pass the `upb_MessageDef*`
  separately if you want to perform an operation like printing a proto to text format.
 ## Explicit Registration vs. Globals
 TODO
--- a/python/BUILD
+++ b/python/BUILD
@ -23,23 +23,14 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-load(
+load("//bazel:py_proto_library.bzl", "py_proto_library")
-    "//bazel:build_defs.bzl",
+load("//bazel:py_extension.bzl", "py_extension")  # copybara:strip_for_google3
-    "UPB_DEFAULT_COPTS",
+load("@rules_python//python:packaging.bzl", "py_wheel")
 )
 load(
    "//bazel:py_proto_library.bzl",
    "py_proto_library",
 )
 load(
    "@rules_python//python:packaging.bzl",
    "py_wheel",
 )
 licenses(["notice"])
-cc_binary(
+py_extension(
-    name = "message",
+    name = "_message",
    srcs = [
        "convert.c",
        "convert.h",
@ -61,45 +52,38 @@ cc_binary(
        "repeated.c",
        "repeated.h",
    ],
    copts = UPB_DEFAULT_COPTS + [
        # The Python API requires patterns that are ISO C incompatible, like
        # casts between function pointers and object pointers.
        "-Wno-pedantic",
    ],
    # We use a linker script to hide all symbols except the entry point for
    # the module.
    linkopts = select({
        "@platforms//os:linux": ["-Wl,--version-script,$(location :version_script.lds)"],
        "@platforms//os:macos": [
            "-Wl,-exported_symbol",
            "-Wl,_PyInit__message",
        ],
    }),
    linkshared = True,
    linkstatic = True,
    deps = [
        ":version_script.lds",
        "//:descriptor_upb_proto_reflection",
        "//:reflection",
        "//:table",
        "//:textformat",
        "//:upb",
        "//upb/util:compare",
        "//upb/util:def_to_proto",
        "//upb/util:required_fields",
        "@system_python//:python_headers",
    ],
 )
-cc_binary(
+py_extension(
-    name = "api_implementation",
+    name = "_api_implementation",
    srcs = ["api_implementation.c"],
 )
 # copybara:strip_for_google3_begin
 py_test(
    name = "minimal_test",
    srcs = [
-        "api_implementation.c",
+        "minimal_test.py",
    ],
    imports = ["."],
    legacy_create_init = False,
    deps = [
        "//python:message_ext",
        "@com_google_protobuf//:python_common_test_protos",
        "@com_google_protobuf//:python_specific_test_protos",
        "@com_google_protobuf//:python_srcs",
    ],
    linkshared = True,
    linkstatic = True,
    # Enable once linker script is available.
    #copts = ["-fvisibility=hidden"],
    deps = ["@system_python//:python_headers"],
 )
 # Copy the extensions into the location recognized by Python.
@ -108,14 +92,14 @@ EXT_SUFFIX = ".abi3.so"
 genrule(
    name = "copy_message",
-    srcs = [":message"],
+    srcs = [":_message"],
    outs = ["google/protobuf/pyext/_message" + EXT_SUFFIX],
    cmd = "cp $< $@",
 )
 genrule(
    name = "copy_api_implementation",
-    srcs = [":api_implementation"],
+    srcs = [":_api_implementation"],
    outs = ["google/protobuf/internal/_api_implementation" + EXT_SUFFIX],
    cmd = "cp $< $@",
    visibility = ["//python:__subpackages__"],
@ -136,20 +120,6 @@ py_library(
    visibility = ["//python:__subpackages__"],
 )
 py_test(
    name = "minimal_test",
    srcs = [
        "minimal_test.py",
    ],
    imports = ["."],
    legacy_create_init = False,
    deps = [
        "//python:message_ext",
        "@com_google_protobuf//:python_common_test_protos",
        "@com_google_protobuf//:python_specific_test_protos",
        "@com_google_protobuf//:python_srcs",
    ],
 )
 py_proto_library(
    name = "well_known_proto_pb2",
@ -189,3 +159,5 @@ py_wheel(
        "@com_google_protobuf//:python_srcs",
    ],
 )
 # copybara:strip_end
--- a/python/descriptor.c
+++ b/python/descriptor.c
@ -107,7 +107,7 @@ static PyObject* PyUpb_DescriptorBase_GetOptions(PyUpb_DescriptorBase* self,
  if (!self->options) {
    // Load descriptors protos if they are not loaded already. We have to do
    // this lazily, otherwise, it would lead to circular imports.
-    PyObject* mod = PyImport_ImportModule("google.protobuf.descriptor_pb2");
+    PyObject* mod = PyImport_ImportModule(PYUPB_DESCRIPTOR_MODULE);
    Py_DECREF(mod);
    // Find the correct options message.
@ -323,7 +323,7 @@ static PyObject* PyUpb_Descriptor_GetOptions(PyObject* _self, PyObject* args) {
  return PyUpb_DescriptorBase_GetOptions(
      self, upb_MessageDef_Options(self->def),
      &google_protobuf_MessageOptions_msginit,
-      "google.protobuf.MessageOptions");
+      PYUPB_DESCRIPTOR_PROTO_PACKAGE ".MessageOptions");
 }
 static PyObject* PyUpb_Descriptor_CopyToProto(PyObject* _self,
@ -331,7 +331,7 @@ static PyObject* PyUpb_Descriptor_CopyToProto(PyObject* _self,
  return PyUpb_DescriptorBase_CopyToProto(
      _self, (PyUpb_ToProto_Func*)&upb_MessageDef_ToProto,
      &google_protobuf_DescriptorProto_msginit,
-      "google.protobuf.DescriptorProto", py_proto);
+      PYUPB_DESCRIPTOR_PROTO_PACKAGE ".DescriptorProto", py_proto);
 }
 static PyObject* PyUpb_Descriptor_EnumValueName(PyObject* _self,
@ -392,7 +392,7 @@ static PyObject* PyUpb_Descriptor_GetFieldsByNumber(PyObject* _self,
          (void*)&upb_MessageDef_Field,
          (void*)&PyUpb_FieldDescriptor_Get,
      },
-      (void*)&upb_MessageDef_FindFieldByNumberWithSize,
+      (void*)&upb_MessageDef_FindFieldByNumber,
      (void*)&upb_FieldDef_Number,
  };
  PyUpb_DescriptorBase* self = (void*)_self;
@ -733,7 +733,8 @@ static PyObject* PyUpb_EnumDescriptor_GetOptions(PyObject* _self,
  PyUpb_DescriptorBase* self = (void*)_self;
  return PyUpb_DescriptorBase_GetOptions(self, upb_EnumDef_Options(self->def),
                                         &google_protobuf_EnumOptions_msginit,
-                                         "google.protobuf.EnumOptions");
+                                         PYUPB_DESCRIPTOR_PROTO_PACKAGE
                                         ".EnumOptions");
 }
 static PyObject* PyUpb_EnumDescriptor_CopyToProto(PyObject* _self,
@ -741,7 +742,7 @@ static PyObject* PyUpb_EnumDescriptor_CopyToProto(PyObject* _self,
  return PyUpb_DescriptorBase_CopyToProto(
      _self, (PyUpb_ToProto_Func*)&upb_EnumDef_ToProto,
      &google_protobuf_EnumDescriptorProto_msginit,
-      "google.protobuf.EnumDescriptorProto", py_proto);
+      PYUPB_DESCRIPTOR_PROTO_PACKAGE ".EnumDescriptorProto", py_proto);
 }
 static PyGetSetDef PyUpb_EnumDescriptor_Getters[] = {
@ -822,7 +823,7 @@ static PyObject* PyUpb_EnumValueDescriptor_GetOptions(PyObject* _self,
  return PyUpb_DescriptorBase_GetOptions(
      self, upb_EnumValueDef_Options(self->def),
      &google_protobuf_EnumValueOptions_msginit,
-      "google.protobuf.EnumValueOptions");
+      PYUPB_DESCRIPTOR_PROTO_PACKAGE ".EnumValueOptions");
 }
 static PyGetSetDef PyUpb_EnumValueDescriptor_Getters[] = {
@ -1015,7 +1016,8 @@ static PyObject* PyUpb_FieldDescriptor_GetOptions(PyObject* _self,
  PyUpb_DescriptorBase* self = (void*)_self;
  return PyUpb_DescriptorBase_GetOptions(self, upb_FieldDef_Options(self->def),
                                         &google_protobuf_FieldOptions_msginit,
-                                         "google.protobuf.FieldOptions");
+                                         PYUPB_DESCRIPTOR_PROTO_PACKAGE
                                         ".FieldOptions");
 }
 static PyGetSetDef PyUpb_FieldDescriptor_Getters[] = {
@ -1254,7 +1256,8 @@ static PyObject* PyUpb_FileDescriptor_GetOptions(PyObject* _self,
  PyUpb_DescriptorBase* self = (void*)_self;
  return PyUpb_DescriptorBase_GetOptions(self, upb_FileDef_Options(self->def),
                                         &google_protobuf_FileOptions_msginit,
-                                         "google.protobuf.FileOptions");
+                                         PYUPB_DESCRIPTOR_PROTO_PACKAGE
                                         ".FileOptions");
 }
 static PyObject* PyUpb_FileDescriptor_CopyToProto(PyObject* _self,
@ -1262,7 +1265,7 @@ static PyObject* PyUpb_FileDescriptor_CopyToProto(PyObject* _self,
  return PyUpb_DescriptorBase_CopyToProto(
      _self, (PyUpb_ToProto_Func*)&upb_FileDef_ToProto,
      &google_protobuf_FileDescriptorProto_msginit,
-      "google.protobuf.FileDescriptorProto", py_proto);
+      PYUPB_DESCRIPTOR_PROTO_PACKAGE ".FileDescriptorProto", py_proto);
 }
 static PyGetSetDef PyUpb_FileDescriptor_Getters[] = {
@ -1338,6 +1341,12 @@ static PyObject* PyUpb_MethodDescriptor_GetFullName(PyObject* self,
  return PyUnicode_FromString(upb_MethodDef_FullName(m));
 }
 static PyObject* PyUpb_MethodDescriptor_GetIndex(PyObject* self,
                                                 void* closure) {
  const upb_MethodDef* oneof = PyUpb_MethodDescriptor_GetDef(self);
  return PyLong_FromLong(upb_MethodDef_Index(oneof));
 }
 static PyObject* PyUpb_MethodDescriptor_GetContainingService(PyObject* self,
                                                             void* closure) {
  const upb_MethodDef* m = PyUpb_MethodDescriptor_GetDef(self);
@ -1361,7 +1370,8 @@ static PyObject* PyUpb_MethodDescriptor_GetOptions(PyObject* _self,
  PyUpb_DescriptorBase* self = (void*)_self;
  return PyUpb_DescriptorBase_GetOptions(self, upb_MethodDef_Options(self->def),
                                         &google_protobuf_MethodOptions_msginit,
-                                         "google.protobuf.MethodOptions");
+                                         PYUPB_DESCRIPTOR_PROTO_PACKAGE
                                         ".MethodOptions");
 }
 static PyObject* PyUpb_MethodDescriptor_CopyToProto(PyObject* _self,
@ -1369,14 +1379,13 @@ static PyObject* PyUpb_MethodDescriptor_CopyToProto(PyObject* _self,
  return PyUpb_DescriptorBase_CopyToProto(
      _self, (PyUpb_ToProto_Func*)&upb_MethodDef_ToProto,
      &google_protobuf_MethodDescriptorProto_msginit,
-      "google.protobuf.MethodDescriptorProto", py_proto);
+      PYUPB_DESCRIPTOR_PROTO_PACKAGE ".MethodDescriptorProto", py_proto);
 }
 static PyGetSetDef PyUpb_MethodDescriptor_Getters[] = {
    {"name", PyUpb_MethodDescriptor_GetName, NULL, "Name", NULL},
    {"full_name", PyUpb_MethodDescriptor_GetFullName, NULL, "Full name", NULL},
-    // TODO(https://github.com/protocolbuffers/upb/issues/459)
+    {"index", PyUpb_MethodDescriptor_GetIndex, NULL, "Index", NULL},
    //{ "index", PyUpb_MethodDescriptor_GetIndex, NULL, "Index", NULL},
    {"containing_service", PyUpb_MethodDescriptor_GetContainingService, NULL,
     "Containing service", NULL},
    {"input_type", PyUpb_MethodDescriptor_GetInputType, NULL, "Input type",
@ -1466,7 +1475,8 @@ static PyObject* PyUpb_OneofDescriptor_GetOptions(PyObject* _self,
  PyUpb_DescriptorBase* self = (void*)_self;
  return PyUpb_DescriptorBase_GetOptions(self, upb_OneofDef_Options(self->def),
                                         &google_protobuf_OneofOptions_msginit,
-                                         "google.protobuf.OneofOptions");
+                                         PYUPB_DESCRIPTOR_PROTO_PACKAGE
                                         ".OneofOptions");
 }
 static PyGetSetDef PyUpb_OneofDescriptor_Getters[] = {
@ -1567,7 +1577,7 @@ static PyObject* PyUpb_ServiceDescriptor_GetOptions(PyObject* _self,
  return PyUpb_DescriptorBase_GetOptions(
      self, upb_ServiceDef_Options(self->def),
      &google_protobuf_ServiceOptions_msginit,
-      "google.protobuf.ServiceOptions");
+      PYUPB_DESCRIPTOR_PROTO_PACKAGE ".ServiceOptions");
 }
 static PyObject* PyUpb_ServiceDescriptor_CopyToProto(PyObject* _self,
@ -1575,7 +1585,7 @@ static PyObject* PyUpb_ServiceDescriptor_CopyToProto(PyObject* _self,
  return PyUpb_DescriptorBase_CopyToProto(
      _self, (PyUpb_ToProto_Func*)&upb_ServiceDef_ToProto,
      &google_protobuf_ServiceDescriptorProto_msginit,
-      "google.protobuf.ServiceDescriptorProto", py_proto);
+      PYUPB_DESCRIPTOR_PROTO_PACKAGE ".ServiceDescriptorProto", py_proto);
 }
 static PyObject* PyUpb_ServiceDescriptor_FindMethodByName(PyObject* _self,
--- a/python/descriptor_pool.c
+++ b/python/descriptor_pool.c
@ -251,7 +251,8 @@ static PyObject* PyUpb_DescriptorPool_DoAdd(PyObject* _self,
                                            PyObject* file_desc) {
  if (!PyUpb_CMessage_Verify(file_desc)) return NULL;
  const upb_MessageDef* m = PyUpb_CMessage_GetMsgdef(file_desc);
-  const char* file_proto_name = "google.protobuf.FileDescriptorProto";
+  const char* file_proto_name =
      PYUPB_DESCRIPTOR_PROTO_PACKAGE ".FileDescriptorProto";
  if (strcmp(upb_MessageDef_FullName(m), file_proto_name) != 0) {
    return PyErr_Format(PyExc_TypeError, "Can only add FileDescriptorProto");
  }
--- a/python/extension_dict.c
+++ b/python/extension_dict.c
@ -88,6 +88,23 @@ static void PyUpb_ExtensionDict_Dealloc(PyUpb_ExtensionDict* self) {
  PyUpb_Dealloc(self);
 }
 static PyObject* PyUpb_ExtensionDict_RichCompare(PyObject* _self,
                                                 PyObject* _other, int opid) {
  // Only equality comparisons are implemented.
  if (opid != Py_EQ && opid != Py_NE) {
    Py_INCREF(Py_NotImplemented);
    return Py_NotImplemented;
  }
  PyUpb_ExtensionDict* self = (PyUpb_ExtensionDict*)_self;
  bool equals = false;
  if (PyObject_TypeCheck(_other, Py_TYPE(_self))) {
    PyUpb_ExtensionDict* other = (PyUpb_ExtensionDict*)_other;
    equals = self->msg == other->msg;
  }
  bool ret = opid == Py_EQ ? equals : !equals;
  return PyBool_FromLong(ret);
 }
 static int PyUpb_ExtensionDict_Contains(PyObject* _self, PyObject* key) {
  PyUpb_ExtensionDict* self = (PyUpb_ExtensionDict*)_self;
  const upb_FieldDef* f = PyUpb_CMessage_GetExtensionDef(self->msg, key);
@ -143,7 +160,7 @@ static PyType_Slot PyUpb_ExtensionDict_Slots[] = {
    {Py_tp_methods, PyUpb_ExtensionDict_Methods},
    //{Py_tp_getset, PyUpb_ExtensionDict_Getters},
    //{Py_tp_hash, PyObject_HashNotImplemented},
-    //{Py_tp_richcompare, PyUpb_ExtensionDict_RichCompare},
+    {Py_tp_richcompare, PyUpb_ExtensionDict_RichCompare},
    {Py_tp_iter, PyUpb_ExtensionIterator_New},
    {Py_sq_contains, PyUpb_ExtensionDict_Contains},
    {Py_sq_length, PyUpb_ExtensionDict_Length},
--- a/python/message.c
+++ b/python/message.c
@ -1482,6 +1482,7 @@ static PyObject* PyUpb_CMessage_GetExtensionDict(PyObject* _self,
                                                 void* closure) {
  PyUpb_CMessage* self = (void*)_self;
  if (self->ext_dict) {
    Py_INCREF(self->ext_dict);
    return self->ext_dict;
  }
@ -1832,7 +1833,8 @@ bool PyUpb_InitMessage(PyObject* m) {
  state->listfields_item_key = PyObject_GetAttrString(
      (PyObject*)state->cmessage_type, "_ListFieldsItemKey");
-  PyObject* mod = PyImport_ImportModule("google.protobuf.message");
+  PyObject* mod =
      PyImport_ImportModule(PYUPB_PROTOBUF_PUBLIC_PACKAGE ".message");
  if (mod == NULL) return false;
  state->encode_error_class = PyObject_GetAttrString(mod, "EncodeError");
@ -1840,8 +1842,8 @@ bool PyUpb_InitMessage(PyObject* m) {
  state->message_class = PyObject_GetAttrString(mod, "Message");
  Py_DECREF(mod);
-  PyObject* enum_type_wrapper =
+  PyObject* enum_type_wrapper = PyImport_ImportModule(
-      PyImport_ImportModule("google.protobuf.internal.enum_type_wrapper");
+      PYUPB_PROTOBUF_INTERNAL_PACKAGE ".enum_type_wrapper");
  if (enum_type_wrapper == NULL) return false;
  state->enum_type_wrapper_class =
--- a/python/pb_unit_tests/BUILD
+++ b/python/pb_unit_tests/BUILD
@ -23,231 +23,40 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 load("//bazel:pyproto_test_wrapper.bzl", "pyproto_test_wrapper")
 licenses(["notice"])
-py_test(
+pyproto_test_wrapper(name = "descriptor_database_test")
-    name = "descriptor_database_test",
+
-    srcs = ["descriptor_database_test_wrapper.py"],
+pyproto_test_wrapper(name = "descriptor_pool_test")
-    legacy_create_init = False,
+
-    main = "descriptor_database_test_wrapper.py",
+pyproto_test_wrapper(name = "descriptor_test")
-    deps = [
+
-        "//python:message_ext",
+pyproto_test_wrapper(name = "generator_test")  # copybara:strip_for_google3
-        "@com_google_protobuf//:python_common_test_protos",
+
-        "@com_google_protobuf//:python_specific_test_protos",
+pyproto_test_wrapper(name = "json_format_test")
-        "@com_google_protobuf//:python_srcs",
+
-    ],
+pyproto_test_wrapper(name = "keywords_test")
-)
+
-
+pyproto_test_wrapper(name = "message_factory_test")
-py_test(
+
-    name = "descriptor_pool_test",
+pyproto_test_wrapper(name = "message_test")
-    srcs = ["descriptor_pool_test_wrapper.py"],
+
-    legacy_create_init = False,
+pyproto_test_wrapper(name = "proto_builder_test")
-    main = "descriptor_pool_test_wrapper.py",
+
-    deps = [
+pyproto_test_wrapper(name = "reflection_test")
-        "//python:message_ext",
+
-        "@com_google_protobuf//:python_common_test_protos",
+pyproto_test_wrapper(name = "service_reflection_test")
-        "@com_google_protobuf//:python_specific_test_protos",
+
-        "@com_google_protobuf//:python_srcs",
+pyproto_test_wrapper(name = "symbol_database_test")
-    ],
+
-)
+pyproto_test_wrapper(name = "text_encoding_test")
-
+
-py_test(
+pyproto_test_wrapper(name = "text_format_test")
-    name = "descriptor_test",
+
-    srcs = ["descriptor_test_wrapper.py"],
+pyproto_test_wrapper(name = "unknown_fields_test")
-    legacy_create_init = False,
+
-    main = "descriptor_test_wrapper.py",
+pyproto_test_wrapper(name = "well_known_types_test")
-    deps = [
+
-        "//python:message_ext",
+pyproto_test_wrapper(name = "wire_format_test")
        "@com_google_protobuf//:python_common_test_protos",
        "@com_google_protobuf//:python_specific_test_protos",
        "@com_google_protobuf//:python_srcs",
    ],
 )
 py_test(
    name = "generator_test",
    srcs = ["generator_test_wrapper.py"],
    legacy_create_init = False,
    main = "generator_test_wrapper.py",
    deps = [
        "//python:message_ext",
        "@com_google_protobuf//:python_common_test_protos",
        "@com_google_protobuf//:python_specific_test_protos",
        "@com_google_protobuf//:python_srcs",
    ],
 )
 py_test(
    name = "json_format_test",
    srcs = ["json_format_test_wrapper.py"],
    legacy_create_init = False,
    main = "json_format_test_wrapper.py",
    deps = [
        "//python:message_ext",
        "@com_google_protobuf//:python_common_test_protos",
        "@com_google_protobuf//:python_specific_test_protos",
        "@com_google_protobuf//:python_srcs",
    ],
 )
 py_test(
    name = "keywords_test",
    srcs = ["keywords_test_wrapper.py"],
    legacy_create_init = False,
    main = "keywords_test_wrapper.py",
    deps = [
        "//python:message_ext",
        "@com_google_protobuf//:python_common_test_protos",
        "@com_google_protobuf//:python_specific_test_protos",
        "@com_google_protobuf//:python_srcs",
    ],
 )
 py_test(
    name = "message_factory_test",
    srcs = ["message_factory_test_wrapper.py"],
    legacy_create_init = False,
    main = "message_factory_test_wrapper.py",
    deps = [
        "//python:message_ext",
        "@com_google_protobuf//:python_common_test_protos",
        "@com_google_protobuf//:python_specific_test_protos",
        "@com_google_protobuf//:python_srcs",
    ],
 )
 py_test(
    name = "message_test",
    srcs = ["message_test_wrapper.py"],
    data = [
        "@com_google_protobuf//:testdata",
    ],
    legacy_create_init = False,
    main = "message_test_wrapper.py",
    deps = [
        "//python:message_ext",
        "@com_google_protobuf//:python_common_test_protos",
        "@com_google_protobuf//:python_specific_test_protos",
        "@com_google_protobuf//:python_srcs",
    ],
 )
 py_test(
    name = "proto_builder_test",
    srcs = ["proto_builder_test_wrapper.py"],
    legacy_create_init = False,
    main = "proto_builder_test_wrapper.py",
    deps = [
        "//python:message_ext",
        "@com_google_protobuf//:python_common_test_protos",
        "@com_google_protobuf//:python_specific_test_protos",
        "@com_google_protobuf//:python_srcs",
    ],
 )
 py_test(
    name = "reflection_test",
    srcs = ["reflection_test_wrapper.py"],
    legacy_create_init = False,
    main = "reflection_test_wrapper.py",
    deps = [
        "//python:message_ext",
        "@com_google_protobuf//:python_common_test_protos",
        "@com_google_protobuf//:python_specific_test_protos",
        "@com_google_protobuf//:python_srcs",
    ],
 )
 py_test(
    name = "service_reflection_test",
    srcs = ["service_reflection_test_wrapper.py"],
    legacy_create_init = False,
    main = "service_reflection_test_wrapper.py",
    deps = [
        "//python:message_ext",
        "@com_google_protobuf//:python_common_test_protos",
        "@com_google_protobuf//:python_specific_test_protos",
        "@com_google_protobuf//:python_srcs",
    ],
 )
 py_test(
    name = "symbol_database_test",
    srcs = ["symbol_database_test_wrapper.py"],
    legacy_create_init = False,
    main = "symbol_database_test_wrapper.py",
    deps = [
        "//python:message_ext",
        "@com_google_protobuf//:python_common_test_protos",
        "@com_google_protobuf//:python_specific_test_protos",
        "@com_google_protobuf//:python_srcs",
    ],
 )
 py_test(
    name = "text_encoding_test",
    srcs = ["text_encoding_test_wrapper.py"],
    legacy_create_init = False,
    main = "text_encoding_test_wrapper.py",
    deps = [
        "//python:message_ext",
        "@com_google_protobuf//:python_common_test_protos",
        "@com_google_protobuf//:python_specific_test_protos",
        "@com_google_protobuf//:python_srcs",
    ],
 )
 py_test(
    name = "text_format_test",
    srcs = ["text_format_test_wrapper.py"],
    data = [
        "@com_google_protobuf//:testdata",
    ],
    legacy_create_init = False,
    main = "text_format_test_wrapper.py",
    deps = [
        "//python:message_ext",
        "@com_google_protobuf//:python_common_test_protos",
        "@com_google_protobuf//:python_specific_test_protos",
        "@com_google_protobuf//:python_srcs",
    ],
 )
 py_test(
    name = "unknown_fields_test",
    srcs = ["unknown_fields_test_wrapper.py"],
    legacy_create_init = False,
    main = "unknown_fields_test_wrapper.py",
    deps = [
        "//python:message_ext",
        "@com_google_protobuf//:python_common_test_protos",
        "@com_google_protobuf//:python_specific_test_protos",
        "@com_google_protobuf//:python_srcs",
    ],
 )
 py_test(
    name = "well_known_types_test",
    srcs = ["well_known_types_test_wrapper.py"],
    legacy_create_init = False,
    main = "well_known_types_test_wrapper.py",
    deps = [
        "//python:message_ext",
        "@com_google_protobuf//:python_common_test_protos",
        "@com_google_protobuf//:python_specific_test_protos",
        "@com_google_protobuf//:python_srcs",
    ],
 )
 py_test(
    name = "wire_format_test",
    srcs = ["wire_format_test_wrapper.py"],
    legacy_create_init = False,
    main = "wire_format_test_wrapper.py",
    deps = [
        "//python:message_ext",
        "@com_google_protobuf//:python_common_test_protos",
        "@com_google_protobuf//:python_specific_test_protos",
        "@com_google_protobuf//:python_srcs",
    ],
 )
--- a/python/pb_unit_tests/descriptor_pool_test_wrapper.py
+++ b/python/pb_unit_tests/descriptor_pool_test_wrapper.py
@ -27,6 +27,10 @@ from google.protobuf.internal import descriptor_pool_test
 import unittest
 import copy
 # copybara:insert_for_google3_begin
 # from google3.testing.pybase import googletest
 # copybara:insert_end
 # This is testing that certain methods unconditionally throw TypeError.
 # In the new extension we simply don't define them at all.
 descriptor_pool_test.AddDescriptorTest.testAddTypeError.__unittest_expecting_failure__ = True
@ -34,4 +38,8 @@ descriptor_pool_test.AddDescriptorTest.testAddTypeError.__unittest_expecting_fai
 descriptor_pool_test.SecondaryDescriptorFromDescriptorDB.testErrorCollector.__unittest_expecting_failure__ = True
 if __name__ == '__main__':
  # copybara:strip_for_google3_begin
  unittest.main(module=descriptor_pool_test, verbosity=2)
  # copybara:replace_for_google3_begin
  # googletest.main()
  # copybara:replace_for_google3_end
--- a/python/pb_unit_tests/text_format_test_wrapper.py
+++ b/python/pb_unit_tests/text_format_test_wrapper.py
@ -25,14 +25,16 @@
 from google.protobuf.internal import text_format_test
 import unittest
 from google.protobuf.internal import _parameterized
 sep = _parameterized._SEPARATOR
 # These rely on the UnknownFields accessor, which we are trying to deprecate.
 text_format_test.OnlyWorksWithProto2RightNowTests.testPrintUnknownFields.__unittest_expecting_failure__ = True
 # copybara:strip_for_google3_begin
 from google.protobuf.internal import _parameterized  # copybara:strip_for_google3
 sep = _parameterized._SEPARATOR
 getattr(text_format_test.TextFormatMessageToStringTests, "testPrintUnknownFieldsEmbeddedMessageInBytes" + sep + "0").__unittest_expecting_failure__ = True
 getattr(text_format_test.TextFormatMessageToStringTests, "testPrintUnknownFieldsEmbeddedMessageInBytes" + sep + "1").__unittest_expecting_failure__ = True
 # copybara:strip_end
 if __name__ == '__main__':
  unittest.main(module=text_format_test, verbosity=2)
--- a/python/protobuf.c
+++ b/python/protobuf.c
@ -94,8 +94,8 @@ PyUpb_ModuleState* PyUpb_ModuleState_Get(void) {
 PyObject* PyUpb_GetWktBases(PyUpb_ModuleState* state) {
  if (!state->wkt_bases) {
-    PyObject* wkt_module =
+    PyObject* wkt_module = PyImport_ImportModule(PYUPB_PROTOBUF_INTERNAL_PACKAGE
-        PyImport_ImportModule("google.protobuf.internal.well_known_types");
+                                                 ".well_known_types");
    if (wkt_module == NULL) {
      return false;
--- a/python/protobuf.h
+++ b/python/protobuf.h
@ -34,7 +34,19 @@
 #include "python/python.h"
 #include "upb/table_internal.h"
 // copybara:strip_for_google3_begin
 #define PYUPB_PROTOBUF_PUBLIC_PACKAGE "google.protobuf"
 #define PYUPB_PROTOBUF_INTERNAL_PACKAGE "google.protobuf.internal"
 #define PYUPB_DESCRIPTOR_PROTO_PACKAGE "google.protobuf"
 #define PYUPB_DESCRIPTOR_MODULE "google.protobuf.descriptor_pb2"
 #define PYUPB_MODULE_NAME "google.protobuf.pyext._message"
 // copybara:replace_for_google3_begin
 // #define PYUPB_PROTOBUF_PUBLIC_PACKAGE "google3.net.proto2.python.public"
 // #define PYUPB_PROTOBUF_INTERNAL_PACKAGE "google3.net.proto2.python.internal"
 // #define PYUPB_DESCRIPTOR_PROTO_PACKAGE "proto2"
 // #define PYUPB_DESCRIPTOR_MODULE "google3.net.proto2.proto.descriptor_pb2"
 // #define PYUPB_MODULE_NAME "google3.third_party.upb.python._message"
 // copybara:replace_for_google3_end
 #define PYUPB_RETURN_OOM return PyErr_SetNone(PyExc_MemoryError), NULL
@ -193,7 +205,7 @@ PyObject* PyUpb_Forbidden_New(PyObject* cls, PyObject* args, PyObject* kwds);
 static inline void PyUpb_Dealloc(void* self) {
  PyTypeObject* tp = Py_TYPE(self);
  assert(PyType_GetFlags(tp) & Py_TPFLAGS_HEAPTYPE);
-  freefunc tp_free = PyType_GetSlot(tp, Py_tp_free);
+  freefunc tp_free = (freefunc)PyType_GetSlot(tp, Py_tp_free);
  tp_free(self);
  Py_DECREF(tp);
 }
--- a/rename.sed
+++ b/rename.sed
@ -71,7 +71,7 @@ s/upb_msgdef_field/upb_MessageDef_Field/g;
 s/upb_msgdef_oneof/upb_MessageDef_Oneof/g;
 s/upb_msgdef_ntooz/upb_MessageDef_FindOneofByName/g;
 s/upb_msgdef_ntofz/upb_MessageDef_FindFieldByName/g;
-s/upb_msgdef_itof/upb_MessageDef_FindFieldByNumberWithSize/g;
+s/upb_msgdef_itof/upb_MessageDef_FindFieldByNumber/g;
 s/upb_msgdef_ntof/upb_MessageDef_FindFieldByNameWithSize/g;
 s/upb_msgdef_ntoo/upb_MessageDef_FindOneofByNameWithSize/g;
 s/upb_msgdef_layout/upb_MessageDef_MiniTable/g;
@ -89,6 +89,7 @@ s/upb_msgdef_lookupjsonnamez/upb_MessageDef_FindByNameName/g;
 s/upb_msgdef_lookupjsonname/upb_MessageDef_FindByJsonNameWithSize/g;
 s/upb_msgdef/upb_MessageDef/g;
 s/WithSizez//g;
 s/upb_MessageDef_FindFieldByNumberWithSize/upb_MessageDef_FindFieldByNumber/g;
 s/upb_extrange_options/upb_ExtensionRange_Options/g;
 s/upb_extrange_hasoptions/upb_ExtensionRange_HasOptions/g;
--- a/upb/bindings/lua/def.c
+++ b/upb/bindings/lua/def.c
@ -368,7 +368,7 @@ static int lupb_MessageDef_Field(lua_State* L) {
  switch (lua_type(L, 2)) {
    case LUA_TNUMBER:
-      f = upb_MessageDef_FindFieldByNumberWithSize(m, lua_tointeger(L, 2));
+      f = upb_MessageDef_FindFieldByNumber(m, lua_tointeger(L, 2));
      break;
    case LUA_TSTRING:
      f = upb_MessageDef_FindFieldByName(m, lua_tostring(L, 2));
--- a/upb/bindings/lua/msg.c
+++ b/upb/bindings/lua/msg.c
@ -733,10 +733,10 @@ static void lupb_Message_Newwrapper(lua_State* L, int narg,
                                    upb_MutableMessageValue val) {
  if (upb_FieldDef_IsMap(f)) {
    const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f);
-    const upb_FieldDef* key_f = upb_MessageDef_FindFieldByNumberWithSize(
+    const upb_FieldDef* key_f =
-        entry, kUpb_MapEntry_KeyFieldNumber);
+        upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_KeyFieldNumber);
-    const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumberWithSize(
+    const upb_FieldDef* val_f =
-        entry, kUpb_MapEntry_ValueFieldNumber);
+        upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_ValueFieldNumber);
    lupb_map* lmap =
        lupb_Message_Newud(L, narg, sizeof(*lmap), LUPB_MAP, val_f);
    lmap->key_type = upb_FieldDef_CType(key_f);
@ -849,10 +849,10 @@ static int lupb_Message_Newindex(lua_State* L) {
  if (upb_FieldDef_IsMap(f)) {
    lupb_map* lmap = lupb_map_check(L, 3);
    const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f);
-    const upb_FieldDef* key_f = upb_MessageDef_FindFieldByNumberWithSize(
+    const upb_FieldDef* key_f =
-        entry, kUpb_MapEntry_KeyFieldNumber);
+        upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_KeyFieldNumber);
-    const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumberWithSize(
+    const upb_FieldDef* val_f =
-        entry, kUpb_MapEntry_ValueFieldNumber);
+        upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_ValueFieldNumber);
    upb_CType key_type = upb_FieldDef_CType(key_f);
    upb_CType value_type = upb_FieldDef_CType(val_f);
    luaL_argcheck(L, lmap->key_type == key_type, 3, "key type mismatch");
--- a/upb/def.c
+++ b/upb/def.c
@ -86,6 +86,9 @@ struct upb_FieldDef {
  bool has_json_name_;
  upb_FieldType type_;
  upb_Label label_;
 #if UINTPTR_MAX == 0xffffffff
  uint32_t padding;  // Increase size to a multiple of 8.
 #endif
 };
 struct upb_ExtensionRange {
@ -106,8 +109,8 @@ struct upb_MessageDef {
  upb_strtable ntof;
  /* All nested defs.
-   * MEM: We could save some space here by putting nested defs in a contigous
+   * MEM: We could save some space here by putting nested defs in a contiguous
-   * region and calculating counts from offets or vice-versa. */
+   * region and calculating counts from offsets or vice-versa. */
  const upb_FieldDef* fields;
  const upb_OneofDef* oneofs;
  const upb_ExtensionRange* ext_ranges;
@ -123,6 +126,9 @@ struct upb_MessageDef {
  int nested_ext_count;
  bool in_message_set;
  upb_WellKnown well_known_type;
 #if UINTPTR_MAX == 0xffffffff
  uint32_t padding;  // Increase size to a multiple of 8.
 #endif
 };
 struct upb_EnumDef {
@ -136,6 +142,9 @@ struct upb_EnumDef {
  const upb_EnumValueDef* values;
  int value_count;
  int32_t defaultval;
 #if UINTPTR_MAX == 0xffffffff
  uint32_t padding;  // Increase size to a multiple of 8.
 #endif
 };
 struct upb_EnumValueDef {
@ -154,6 +163,9 @@ struct upb_OneofDef {
  const upb_FieldDef** fields;
  upb_strtable ntof;
  upb_inttable itof;
 #if UINTPTR_MAX == 0xffffffff
  uint32_t padding;  // Increase size to a multiple of 8.
 #endif
 };
 struct upb_FileDef {
@ -188,6 +200,7 @@ struct upb_MethodDef {
  const char* full_name;
  const upb_MessageDef* input_type;
  const upb_MessageDef* output_type;
  int index;
  bool client_streaming;
  bool server_streaming;
 };
@ -246,6 +259,20 @@ static const void* unpack_def(upb_value v, upb_deftype_t type) {
 }
 static upb_value pack_def(const void* ptr, upb_deftype_t type) {
  // Our 3-bit pointer tagging requires all pointers to be multiples of 8.
  // The arena will always yield 8-byte-aligned addresses, however we put
  // the defs into arrays.  For each element in the array to be 8-byte-aligned,
  // the sizes of each def type must also be a multiple of 8.
  //
  // If any of these asserts fail, we need to add or remove padding on 32-bit
  // machines (64-bit machines will have 8-byte alignment already due to
  // pointers, which all of these structs have).
  UPB_ASSERT((sizeof(upb_FieldDef) & UPB_DEFTYPE_MASK) == 0);
  UPB_ASSERT((sizeof(upb_MessageDef) & UPB_DEFTYPE_MASK) == 0);
  UPB_ASSERT((sizeof(upb_EnumDef) & UPB_DEFTYPE_MASK) == 0);
  UPB_ASSERT((sizeof(upb_EnumValueDef) & UPB_DEFTYPE_MASK) == 0);
  UPB_ASSERT((sizeof(upb_ServiceDef) & UPB_DEFTYPE_MASK) == 0);
  UPB_ASSERT((sizeof(upb_OneofDef) & UPB_DEFTYPE_MASK) == 0);
  uintptr_t num = (uintptr_t)ptr;
  UPB_ASSERT((num & UPB_DEFTYPE_MASK) == 0);
  num |= type;
@ -691,8 +718,8 @@ upb_Syntax upb_MessageDef_Syntax(const upb_MessageDef* m) {
  return m->file->syntax;
 }
-const upb_FieldDef* upb_MessageDef_FindFieldByNumberWithSize(
+const upb_FieldDef* upb_MessageDef_FindFieldByNumber(const upb_MessageDef* m,
-    const upb_MessageDef* m, uint32_t i) {
+                                                     uint32_t i) {
  upb_value val;
  return upb_inttable_lookup(&m->itof, i, &val) ? upb_value_getconstptr(val)
                                                : NULL;
@ -980,6 +1007,8 @@ const char* upb_MethodDef_FullName(const upb_MethodDef* m) {
  return m->full_name;
 }
 int upb_MethodDef_Index(const upb_MethodDef* m) { return m->index; }
 const char* upb_MethodDef_Name(const upb_MethodDef* m) {
  return shortdefname(m->full_name);
 }
@ -1365,8 +1394,8 @@ static void assign_layout_indices(const upb_MessageDef* m, upb_MiniTable* l,
  int n = upb_MessageDef_numfields(m);
  int dense_below = 0;
  for (i = 0; i < n; i++) {
-    upb_FieldDef* f = (upb_FieldDef*)upb_MessageDef_FindFieldByNumberWithSize(
+    upb_FieldDef* f =
-        m, fields[i].number);
+        (upb_FieldDef*)upb_MessageDef_FindFieldByNumber(m, fields[i].number);
    UPB_ASSERT(f);
    f->layout_index = i;
    if (i < UINT8_MAX && fields[i].number == i + 1 &&
@ -1487,8 +1516,8 @@ static void make_layout(symtab_addctx* ctx, const upb_MessageDef* m) {
  if (upb_MessageDef_IsMapEntry(m)) {
    /* TODO(haberman): refactor this method so this special case is more
     * elegant. */
-    const upb_FieldDef* key = upb_MessageDef_FindFieldByNumberWithSize(m, 1);
+    const upb_FieldDef* key = upb_MessageDef_FindFieldByNumber(m, 1);
-    const upb_FieldDef* val = upb_MessageDef_FindFieldByNumberWithSize(m, 2);
+    const upb_FieldDef* val = upb_MessageDef_FindFieldByNumber(m, 2);
    fields[0].number = 1;
    fields[1].number = 2;
    fields[0].mode = kUpb_FieldMode_Scalar;
@ -1555,7 +1584,7 @@ static void make_layout(symtab_addctx* ctx, const upb_MessageDef* m) {
      field->submsg_index = sublayout_count++;
      subs[field->submsg_index].submsg = upb_FieldDef_MessageSubDef(f)->layout;
    } else if (upb_FieldDef_CType(f) == kUpb_CType_Enum &&
-               f->file->syntax == kUpb_Syntax_Proto2) {
+               f->sub.enumdef->file->syntax == kUpb_Syntax_Proto2) {
      field->submsg_index = sublayout_count++;
      subs[field->submsg_index].subenum = upb_FieldDef_EnumSubDef(f)->layout;
      UPB_ASSERT(subs[field->submsg_index].subenum);
@ -1601,6 +1630,10 @@ static void make_layout(symtab_addctx* ctx, const upb_MessageDef* m) {
    if (upb_OneofDef_IsSynthetic(o)) continue;
    if (o->field_count == 0) {
      symtab_errf(ctx, "Oneof must have at least one field (%s)", o->full_name);
    }
    /* Calculate field size: the max of all field sizes. */
    for (int j = 0; j < o->field_count; j++) {
      const upb_FieldDef* f = o->fields[j];
@ -1623,7 +1656,10 @@ static void make_layout(symtab_addctx* ctx, const upb_MessageDef* m) {
  l->size = UPB_ALIGN_UP(l->size, 8);
  /* Sort fields by number. */
-  qsort(fields, upb_MessageDef_numfields(m), sizeof(*fields), field_number_cmp);
+  if (fields) {
    qsort(fields, upb_MessageDef_numfields(m), sizeof(*fields),
          field_number_cmp);
  }
  assign_layout_indices(m, l, fields);
 }
@ -1784,8 +1820,8 @@ static const void* symtab_resolveany(symtab_addctx* ctx,
    }
  } else {
    /* Remove components from base until we find an entry or run out. */
-    size_t baselen = strlen(base);
+    size_t baselen = base ? strlen(base) : 0;
-    char* tmp = malloc(sym.size + strlen(base) + 1);
+    char* tmp = malloc(sym.size + baselen + 1);
    while (1) {
      char* p = tmp;
      if (baselen) {
@ -1821,10 +1857,10 @@ static const void* symtab_resolve(symtab_addctx* ctx, const char* from_name_dbg,
  const void* ret =
      symtab_resolveany(ctx, from_name_dbg, base, sym, &found_type);
  if (ret && found_type != type) {
-    symtab_errf(
+    symtab_errf(ctx,
-        ctx,
+                "type mismatch when resolving %s: couldn't find "
-        "type mismatch when resolving %s: couldn't find name %s with type=%d",
+                "name " UPB_STRINGVIEW_FORMAT " with type=%d",
-        from_name_dbg, sym.data, (int)type);
+                from_name_dbg, UPB_STRINGVIEW_ARGS(sym), (int)type);
  }
  return ret;
 }
@ -1844,6 +1880,11 @@ static void create_oneofdef(
  SET_OPTIONS(o->opts, OneofDescriptorProto, OneofOptions, oneof_proto);
  upb_value existing_v;
  if (upb_strtable_lookup2(&m->ntof, name.data, name.size, &existing_v)) {
    symtab_errf(ctx, "duplicate oneof name (%s)", o->full_name);
  }
  v = pack_def(o, UPB_DEFTYPE_ONEOF);
  CHK_OOM(upb_strtable_insert(&m->ntof, name.data, name.size, v, ctx->arena));
@ -2153,7 +2194,7 @@ static void create_fielddef(
  f->file = ctx->file; /* Must happen prior to symtab_add(). */
  if (!google_protobuf_FieldDescriptorProto_has_name(field_proto)) {
-    symtab_errf(ctx, "field has no name (%s)", upb_MessageDef_FullName(m));
+    symtab_errf(ctx, "field has no name");
  }
  name = google_protobuf_FieldDescriptorProto_name(field_proto);
@ -2370,6 +2411,7 @@ static void create_service(
    m->service = s;
    m->full_name = makefullname(ctx, s->full_name, name);
    m->index = i;
    m->client_streaming =
        google_protobuf_MethodDescriptorProto_client_streaming(method_proto);
    m->server_streaming =
@ -2397,6 +2439,12 @@ static int count_bits_debug(uint64_t x) {
  return n;
 }
 static int compare_int32(const void* a_ptr, const void* b_ptr) {
  int32_t a = *(int32_t*)a_ptr;
  int32_t b = *(int32_t*)b_ptr;
  return a < b ? -1 : (a == b ? 0 : 1);
 }
 upb_MiniTable_Enum* create_enumlayout(symtab_addctx* ctx,
                                      const upb_EnumDef* e) {
  int n = 0;
@ -2405,7 +2453,7 @@ upb_MiniTable_Enum* create_enumlayout(symtab_addctx* ctx,
  for (int i = 0; i < e->value_count; i++) {
    uint32_t val = (uint32_t)e->values[i].number;
    if (val < 64) {
-      mask |= 1 << val;
+      mask |= 1ULL << val;
    } else {
      n++;
    }
@ -2427,6 +2475,17 @@ upb_MiniTable_Enum* create_enumlayout(symtab_addctx* ctx,
    UPB_ASSERT(p == values + n);
  }
  // Enums can have duplicate values; we must sort+uniq them.
  if (values) qsort(values, n, sizeof(*values), &compare_int32);
  int dst = 0;
  for (int i = 0; i < n; dst++) {
    int32_t val = values[i];
    while (i < n && values[i] == val) i++;  // Skip duplicates.
    values[dst] = val;
  }
  n = dst;
  UPB_ASSERT(upb_inttable_count(&e->iton) == n + count_bits_debug(mask));
  upb_MiniTable_Enum* layout = symtab_alloc(ctx, sizeof(*layout));
@ -2510,7 +2569,7 @@ static void create_enumdef(
    if (ctx->layout) {
      UPB_ASSERT(ctx->enum_count < ctx->layout->enum_count);
      e->layout = ctx->layout->enums[ctx->enum_count++];
-      UPB_ASSERT(n ==
+      UPB_ASSERT(upb_inttable_count(&e->iton) ==
                 e->layout->value_count + count_bits_debug(e->layout->mask));
    } else {
      e->layout = create_enumlayout(ctx, e);
@ -2781,15 +2840,10 @@ static void resolve_msgdef(symtab_addctx* ctx, upb_MessageDef* m) {
    resolve_fielddef(ctx, m->full_name, (upb_FieldDef*)&m->fields[i]);
  }
  for (int i = 0; i < m->nested_ext_count; i++) {
    resolve_fielddef(ctx, m->full_name, (upb_FieldDef*)&m->nested_exts[i]);
  }
  if (!ctx->layout) make_layout(ctx, m);
  m->in_message_set = false;
-  if (m->nested_ext_count == 1) {
+  for (int i = 0; i < m->nested_ext_count; i++) {
-    const upb_FieldDef* ext = &m->nested_exts[0];
+    upb_FieldDef* ext = (upb_FieldDef*)&m->nested_exts[i];
    resolve_fielddef(ctx, m->full_name, ext);
    if (ext->type_ == kUpb_FieldType_Message &&
        ext->label_ == kUpb_Label_Optional && ext->sub.msgdef == m &&
        google_protobuf_MessageOptions_message_set_wire_format(
@ -2798,6 +2852,8 @@ static void resolve_msgdef(symtab_addctx* ctx, upb_MessageDef* m) {
    }
  }
  if (!ctx->layout) make_layout(ctx, m);
  for (int i = 0; i < m->nested_msg_count; i++) {
    resolve_msgdef(ctx, (upb_MessageDef*)&m->nested_msgs[i]);
  }
@ -2929,7 +2985,7 @@ static void build_filedef(
  int32_t* mutable_weak_deps = (int32_t*)file->weak_deps;
  for (i = 0; i < n; i++) {
    if (weak_deps[i] >= file->dep_count) {
-      symtab_errf(ctx, "public_dep %d is out of range", (int)public_deps[i]);
+      symtab_errf(ctx, "weak_dep %d is out of range", (int)weak_deps[i]);
    }
    mutable_weak_deps[i] = weak_deps[i];
  }
@ -3085,7 +3141,8 @@ const upb_FileDef* upb_DefPool_AddFile(
 /* Include here since we want most of this file to be stdio-free. */
 #include <stdio.h>
-bool _upb_DefPool_LoadDefInit(upb_DefPool* s, const _upb_DefPool_Init* init) {
+bool _upb_DefPool_LoadDefInitEx(upb_DefPool* s, const _upb_DefPool_Init* init,
                                bool rebuild_minitable) {
  /* Since this function should never fail (it would indicate a bug in upb) we
   * print errors to stderr instead of returning error status to the user. */
  _upb_DefPool_Init** deps = init->deps;
@ -3102,7 +3159,7 @@ bool _upb_DefPool_LoadDefInit(upb_DefPool* s, const _upb_DefPool_Init* init) {
  arena = upb_Arena_New();
  for (; *deps; deps++) {
-    if (!_upb_DefPool_LoadDefInit(s, *deps)) goto err;
+    if (!_upb_DefPool_LoadDefInitEx(s, *deps, rebuild_minitable)) goto err;
  }
  file = google_protobuf_FileDescriptorProto_parse_ex(
@ -3119,7 +3176,8 @@ bool _upb_DefPool_LoadDefInit(upb_DefPool* s, const _upb_DefPool_Init* init) {
    goto err;
  }
-  if (!_upb_DefPool_AddFile(s, file, init->layout, &status)) {
+  const upb_MiniTable_File* mt = rebuild_minitable ? NULL : init->layout;
  if (!_upb_DefPool_AddFile(s, file, mt, &status)) {
    goto err;
  }
--- a/upb/def.h
+++ b/upb/def.h
@ -188,8 +188,8 @@ const upb_ExtensionRange* upb_MessageDef_ExtensionRange(const upb_MessageDef* m,
                                                        int i);
 const upb_FieldDef* upb_MessageDef_Field(const upb_MessageDef* m, int i);
 const upb_OneofDef* upb_MessageDef_Oneof(const upb_MessageDef* m, int i);
-const upb_FieldDef* upb_MessageDef_FindFieldByNumberWithSize(
+const upb_FieldDef* upb_MessageDef_FindFieldByNumber(const upb_MessageDef* m,
-    const upb_MessageDef* m, uint32_t i);
+                                                     uint32_t i);
 const upb_FieldDef* upb_MessageDef_FindFieldByNameWithSize(
    const upb_MessageDef* m, const char* name, size_t len);
 const upb_OneofDef* upb_MessageDef_FindOneofByNameWithSize(
@ -317,6 +317,7 @@ const google_protobuf_MethodOptions* upb_MethodDef_Options(
    const upb_MethodDef* m);
 bool upb_MethodDef_HasOptions(const upb_MethodDef* m);
 const char* upb_MethodDef_FullName(const upb_MethodDef* m);
 int upb_MethodDef_Index(const upb_MethodDef* m);
 const char* upb_MethodDef_Name(const upb_MethodDef* m);
 const upb_ServiceDef* upb_MethodDef_Service(const upb_MethodDef* m);
 const upb_MessageDef* upb_MethodDef_InputType(const upb_MethodDef* m);
@ -389,7 +390,15 @@ typedef struct _upb_DefPool_Init {
  upb_StringView descriptor; /* Serialized descriptor. */
 } _upb_DefPool_Init;
-bool _upb_DefPool_LoadDefInit(upb_DefPool* s, const _upb_DefPool_Init* init);
+// Should only be directly called by tests.  This variant lets us suppress
 // the use of compiled-in tables, forcing a rebuild of the tables at runtime.
 bool _upb_DefPool_LoadDefInitEx(upb_DefPool* s, const _upb_DefPool_Init* init,
                                bool rebuild_minitable);
 UPB_INLINE bool _upb_DefPool_LoadDefInit(upb_DefPool* s,
                                         const _upb_DefPool_Init* init) {
  return _upb_DefPool_LoadDefInitEx(s, init, false);
 }
 #include "upb/port_undef.inc"
--- a/upb/def.hpp
+++ b/upb/def.hpp
@ -194,7 +194,7 @@ class MessageDefPtr {
  // These return null pointers if the field is not found.
  FieldDefPtr FindFieldByNumber(uint32_t number) const {
-    return FieldDefPtr(upb_MessageDef_FindFieldByNumberWithSize(ptr_, number));
+    return FieldDefPtr(upb_MessageDef_FindFieldByNumber(ptr_, number));
  }
  FieldDefPtr FindFieldByName(const char* name, size_t len) const {
    return FieldDefPtr(upb_MessageDef_FindFieldByNameWithSize(ptr_, name, len));
--- a/upb/fuzz/BUILD
+++ b/upb/fuzz/BUILD
@ -7,6 +7,7 @@ cc_fuzz_test(
    srcs = ["file_descriptor_parsenew_fuzzer.cc"],
    deps = [
        "//:descriptor_upb_proto",
        "//:reflection",
        "//:upb",
    ],
 )
--- a/upb/fuzz/file_descriptor_parsenew_fuzzer.cc
+++ b/upb/fuzz/file_descriptor_parsenew_fuzzer.cc
@ -26,11 +26,18 @@
 #include <cstdint>
 #include "google/protobuf/descriptor.upb.h"
 #include "upb/def.hpp"
 #include "upb/upb.hpp"
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
  upb::Arena arena;
-  google_protobuf_FileDescriptorProto_parse(reinterpret_cast<const char*>(data),
+  google_protobuf_FileDescriptorProto* proto =
-                                            size, arena.ptr());
+      google_protobuf_FileDescriptorProto_parse(
          reinterpret_cast<const char*>(data), size, arena.ptr());
  if (proto) {
    upb::SymbolTable symtab;
    upb::Status status;
    symtab.AddFile(proto, &status);
  }
  return 0;
 }
--- a/upb/json_decode.c
+++ b/upb/json_decode.c
@ -894,10 +894,8 @@ static void jsondec_array(jsondec* d, upb_Message* msg, const upb_FieldDef* f) {
 static void jsondec_map(jsondec* d, upb_Message* msg, const upb_FieldDef* f) {
  upb_Map* map = upb_Message_Mutable(msg, f, d->arena).map;
  const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f);
-  const upb_FieldDef* key_f =
+  const upb_FieldDef* key_f = upb_MessageDef_FindFieldByNumber(entry, 1);
-      upb_MessageDef_FindFieldByNumberWithSize(entry, 1);
+  const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(entry, 2);
  const upb_FieldDef* val_f =
      upb_MessageDef_FindFieldByNumberWithSize(entry, 2);
  jsondec_objstart(d);
  while (jsondec_objnext(d)) {
@ -1139,10 +1137,9 @@ static void jsondec_timestamp(jsondec* d, upb_Message* msg,
    jsondec_err(d, "Timestamp out of range");
  }
-  upb_Message_Set(msg, upb_MessageDef_FindFieldByNumberWithSize(m, 1), seconds,
+  upb_Message_Set(msg, upb_MessageDef_FindFieldByNumber(m, 1), seconds,
                  d->arena);
  upb_Message_Set(msg, upb_MessageDef_FindFieldByNumberWithSize(m, 2), nanos,
                  d->arena);
  upb_Message_Set(msg, upb_MessageDef_FindFieldByNumber(m, 2), nanos, d->arena);
  return;
 malformed:
@ -1174,15 +1171,14 @@ static void jsondec_duration(jsondec* d, upb_Message* msg,
    nanos.int32_val = -nanos.int32_val;
  }
-  upb_Message_Set(msg, upb_MessageDef_FindFieldByNumberWithSize(m, 1), seconds,
+  upb_Message_Set(msg, upb_MessageDef_FindFieldByNumber(m, 1), seconds,
                  d->arena);
  upb_Message_Set(msg, upb_MessageDef_FindFieldByNumberWithSize(m, 2), nanos,
                  d->arena);
  upb_Message_Set(msg, upb_MessageDef_FindFieldByNumber(m, 2), nanos, d->arena);
 }
 static void jsondec_listvalue(jsondec* d, upb_Message* msg,
                              const upb_MessageDef* m) {
-  const upb_FieldDef* values_f = upb_MessageDef_FindFieldByNumberWithSize(m, 1);
+  const upb_FieldDef* values_f = upb_MessageDef_FindFieldByNumber(m, 1);
  const upb_MessageDef* value_m = upb_FieldDef_MessageSubDef(values_f);
  upb_Array* values = upb_Message_Mutable(msg, values_f, d->arena).array;
@ -1199,10 +1195,9 @@ static void jsondec_listvalue(jsondec* d, upb_Message* msg,
 static void jsondec_struct(jsondec* d, upb_Message* msg,
                           const upb_MessageDef* m) {
-  const upb_FieldDef* fields_f = upb_MessageDef_FindFieldByNumberWithSize(m, 1);
+  const upb_FieldDef* fields_f = upb_MessageDef_FindFieldByNumber(m, 1);
  const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(fields_f);
-  const upb_FieldDef* value_f =
+  const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(entry_m, 2);
      upb_MessageDef_FindFieldByNumberWithSize(entry_m, 2);
  const upb_MessageDef* value_m = upb_FieldDef_MessageSubDef(value_f);
  upb_Map* fields = upb_Message_Mutable(msg, fields_f, d->arena).map;
@ -1228,42 +1223,42 @@ static void jsondec_wellknownvalue(jsondec* d, upb_Message* msg,
  switch (jsondec_peek(d)) {
    case JD_NUMBER:
      /* double number_value = 2; */
-      f = upb_MessageDef_FindFieldByNumberWithSize(m, 2);
+      f = upb_MessageDef_FindFieldByNumber(m, 2);
      val.double_val = jsondec_number(d);
      break;
    case JD_STRING:
      /* string string_value = 3; */
-      f = upb_MessageDef_FindFieldByNumberWithSize(m, 3);
+      f = upb_MessageDef_FindFieldByNumber(m, 3);
      val.str_val = jsondec_string(d);
      break;
    case JD_FALSE:
      /* bool bool_value = 4; */
-      f = upb_MessageDef_FindFieldByNumberWithSize(m, 4);
+      f = upb_MessageDef_FindFieldByNumber(m, 4);
      val.bool_val = false;
      jsondec_false(d);
      break;
    case JD_TRUE:
      /* bool bool_value = 4; */
-      f = upb_MessageDef_FindFieldByNumberWithSize(m, 4);
+      f = upb_MessageDef_FindFieldByNumber(m, 4);
      val.bool_val = true;
      jsondec_true(d);
      break;
    case JD_NULL:
      /* NullValue null_value = 1; */
-      f = upb_MessageDef_FindFieldByNumberWithSize(m, 1);
+      f = upb_MessageDef_FindFieldByNumber(m, 1);
      val.int32_val = 0;
      jsondec_null(d);
      break;
    /* Note: these cases return, because upb_Message_Mutable() is enough. */
    case JD_OBJECT:
      /* Struct struct_value = 5; */
-      f = upb_MessageDef_FindFieldByNumberWithSize(m, 5);
+      f = upb_MessageDef_FindFieldByNumber(m, 5);
      submsg = upb_Message_Mutable(msg, f, d->arena).msg;
      jsondec_struct(d, submsg, upb_FieldDef_MessageSubDef(f));
      return;
    case JD_ARRAY:
      /* ListValue list_value = 6; */
-      f = upb_MessageDef_FindFieldByNumberWithSize(m, 6);
+      f = upb_MessageDef_FindFieldByNumber(m, 6);
      submsg = upb_Message_Mutable(msg, f, d->arena).msg;
      jsondec_listvalue(d, submsg, upb_FieldDef_MessageSubDef(f));
      return;
@ -1310,7 +1305,7 @@ static upb_StringView jsondec_mask(jsondec* d, const char* buf,
 static void jsondec_fieldmask(jsondec* d, upb_Message* msg,
                              const upb_MessageDef* m) {
  /* repeated string paths = 1; */
-  const upb_FieldDef* paths_f = upb_MessageDef_FindFieldByNumberWithSize(m, 1);
+  const upb_FieldDef* paths_f = upb_MessageDef_FindFieldByNumber(m, 1);
  upb_Array* arr = upb_Message_Mutable(msg, paths_f, d->arena).array;
  upb_StringView str = jsondec_string(d);
  const char* ptr = str.data;
@ -1350,8 +1345,7 @@ static void jsondec_anyfield(jsondec* d, upb_Message* msg,
 static const upb_MessageDef* jsondec_typeurl(jsondec* d, upb_Message* msg,
                                             const upb_MessageDef* m) {
-  const upb_FieldDef* type_url_f =
+  const upb_FieldDef* type_url_f = upb_MessageDef_FindFieldByNumber(m, 1);
      upb_MessageDef_FindFieldByNumberWithSize(m, 1);
  const upb_MessageDef* type_m;
  upb_StringView type_url = jsondec_string(d);
  const char* end = type_url.data + type_url.size;
@ -1382,7 +1376,7 @@ static const upb_MessageDef* jsondec_typeurl(jsondec* d, upb_Message* msg,
 static void jsondec_any(jsondec* d, upb_Message* msg, const upb_MessageDef* m) {
  /* string type_url = 1;
   * bytes value = 2; */
-  const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumberWithSize(m, 2);
+  const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(m, 2);
  upb_Message* any_msg;
  const upb_MessageDef* any_m = NULL;
  const char* pre_type_data = NULL;
@ -1444,7 +1438,7 @@ static void jsondec_any(jsondec* d, upb_Message* msg, const upb_MessageDef* m) {
 static void jsondec_wrapper(jsondec* d, upb_Message* msg,
                            const upb_MessageDef* m) {
-  const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumberWithSize(m, 1);
+  const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(m, 1);
  upb_MessageValue val = jsondec_value(d, value_f);
  upb_Message_Set(msg, value_f, val, d->arena);
 }
--- a/upb/json_encode.c
+++ b/upb/json_encode.c
@ -141,9 +141,8 @@ static void jsonenc_nanos(jsonenc* e, int32_t nanos) {
 static void jsonenc_timestamp(jsonenc* e, const upb_Message* msg,
                              const upb_MessageDef* m) {
-  const upb_FieldDef* seconds_f =
+  const upb_FieldDef* seconds_f = upb_MessageDef_FindFieldByNumber(m, 1);
-      upb_MessageDef_FindFieldByNumberWithSize(m, 1);
+  const upb_FieldDef* nanos_f = upb_MessageDef_FindFieldByNumber(m, 2);
  const upb_FieldDef* nanos_f = upb_MessageDef_FindFieldByNumberWithSize(m, 2);
  int64_t seconds = upb_Message_Get(msg, seconds_f).int64_val;
  int32_t nanos = upb_Message_Get(msg, nanos_f).int32_val;
  int L, N, I, J, K, hour, min, sec;
@ -185,9 +184,8 @@ static void jsonenc_timestamp(jsonenc* e, const upb_Message* msg,
 static void jsonenc_duration(jsonenc* e, const upb_Message* msg,
                             const upb_MessageDef* m) {
-  const upb_FieldDef* seconds_f =
+  const upb_FieldDef* seconds_f = upb_MessageDef_FindFieldByNumber(m, 1);
-      upb_MessageDef_FindFieldByNumberWithSize(m, 1);
+  const upb_FieldDef* nanos_f = upb_MessageDef_FindFieldByNumber(m, 2);
  const upb_FieldDef* nanos_f = upb_MessageDef_FindFieldByNumberWithSize(m, 2);
  int64_t seconds = upb_Message_Get(msg, seconds_f).int64_val;
  int32_t nanos = upb_Message_Get(msg, nanos_f).int32_val;
@ -336,7 +334,7 @@ static void upb_JsonEncode_Float(jsonenc* e, float val) {
 static void jsonenc_wrapper(jsonenc* e, const upb_Message* msg,
                            const upb_MessageDef* m) {
-  const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumberWithSize(m, 1);
+  const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(m, 1);
  upb_MessageValue val = upb_Message_Get(msg, val_f);
  jsonenc_scalar(e, val, val_f);
 }
@ -380,9 +378,8 @@ badurl:
 static void jsonenc_any(jsonenc* e, const upb_Message* msg,
                        const upb_MessageDef* m) {
-  const upb_FieldDef* type_url_f =
+  const upb_FieldDef* type_url_f = upb_MessageDef_FindFieldByNumber(m, 1);
-      upb_MessageDef_FindFieldByNumberWithSize(m, 1);
+  const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(m, 2);
  const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumberWithSize(m, 2);
  upb_StringView type_url = upb_Message_Get(msg, type_url_f).str_val;
  upb_StringView value = upb_Message_Get(msg, value_f).str_val;
  const upb_MessageDef* any_m = jsonenc_getanymsg(e, type_url);
@ -441,7 +438,7 @@ static void jsonenc_fieldpath(jsonenc* e, upb_StringView path) {
 static void jsonenc_fieldmask(jsonenc* e, const upb_Message* msg,
                              const upb_MessageDef* m) {
-  const upb_FieldDef* paths_f = upb_MessageDef_FindFieldByNumberWithSize(m, 1);
+  const upb_FieldDef* paths_f = upb_MessageDef_FindFieldByNumber(m, 1);
  const upb_Array* paths = upb_Message_Get(msg, paths_f).array_val;
  bool first = true;
  size_t i, n = 0;
@ -460,11 +457,10 @@ static void jsonenc_fieldmask(jsonenc* e, const upb_Message* msg,
 static void jsonenc_struct(jsonenc* e, const upb_Message* msg,
                           const upb_MessageDef* m) {
-  const upb_FieldDef* fields_f = upb_MessageDef_FindFieldByNumberWithSize(m, 1);
+  const upb_FieldDef* fields_f = upb_MessageDef_FindFieldByNumber(m, 1);
  const upb_Map* fields = upb_Message_Get(msg, fields_f).map_val;
  const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(fields_f);
-  const upb_FieldDef* value_f =
+  const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(entry_m, 2);
      upb_MessageDef_FindFieldByNumberWithSize(entry_m, 2);
  size_t iter = kUpb_Map_Begin;
  bool first = true;
@ -487,7 +483,7 @@ static void jsonenc_struct(jsonenc* e, const upb_Message* msg,
 static void jsonenc_listvalue(jsonenc* e, const upb_Message* msg,
                              const upb_MessageDef* m) {
-  const upb_FieldDef* values_f = upb_MessageDef_FindFieldByNumberWithSize(m, 1);
+  const upb_FieldDef* values_f = upb_MessageDef_FindFieldByNumber(m, 1);
  const upb_MessageDef* values_m = upb_FieldDef_MessageSubDef(values_f);
  const upb_Array* values = upb_Message_Get(msg, values_f).array_val;
  size_t i;
@ -669,10 +665,8 @@ static void jsonenc_array(jsonenc* e, const upb_Array* arr,
 static void jsonenc_map(jsonenc* e, const upb_Map* map, const upb_FieldDef* f) {
  const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f);
-  const upb_FieldDef* key_f =
+  const upb_FieldDef* key_f = upb_MessageDef_FindFieldByNumber(entry, 1);
-      upb_MessageDef_FindFieldByNumberWithSize(entry, 1);
+  const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(entry, 2);
  const upb_FieldDef* val_f =
      upb_MessageDef_FindFieldByNumberWithSize(entry, 2);
  size_t iter = kUpb_Map_Begin;
  bool first = true;
--- a/upb/reflection.c
+++ b/upb/reflection.c
@ -180,10 +180,10 @@ make:
  if (!a) return (upb_MutableMessageValue){.array = NULL};
  if (upb_FieldDef_IsMap(f)) {
    const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f);
-    const upb_FieldDef* key = upb_MessageDef_FindFieldByNumberWithSize(
+    const upb_FieldDef* key =
-        entry, kUpb_MapEntry_KeyFieldNumber);
+        upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_KeyFieldNumber);
-    const upb_FieldDef* value = upb_MessageDef_FindFieldByNumberWithSize(
+    const upb_FieldDef* value =
-        entry, kUpb_MapEntry_ValueFieldNumber);
+        upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_ValueFieldNumber);
    ret.map =
        upb_Map_New(a, upb_FieldDef_CType(key), upb_FieldDef_CType(value));
  } else if (upb_FieldDef_IsRepeated(f)) {
@ -313,8 +313,7 @@ bool _upb_Message_DiscardUnknown(upb_Message* msg, const upb_MessageDef* m,
    const upb_MessageDef* subm = upb_FieldDef_MessageSubDef(f);
    if (!subm) continue;
    if (upb_FieldDef_IsMap(f)) {
-      const upb_FieldDef* val_f =
+      const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(subm, 2);
          upb_MessageDef_FindFieldByNumberWithSize(subm, 2);
      const upb_MessageDef* val_m = upb_FieldDef_MessageSubDef(val_f);
      upb_Map* map = (upb_Map*)val.map_val;
      size_t iter = kUpb_Map_Begin;
--- a/upb/util/BUILD
+++ b/upb/util/BUILD
@ -13,7 +13,10 @@ cc_library(
    srcs = ["def_to_proto.c"],
    hdrs = ["def_to_proto.h"],
    visibility = ["//visibility:public"],
-    deps = ["//:reflection"],
+    deps = [
        "//:port",
        "//:reflection",
    ],
 )
 proto_library(
@ -22,7 +25,7 @@ proto_library(
        "def_to_proto_public_import_test.proto",
        "def_to_proto_regular_import_test.proto",
        "def_to_proto_test.proto",
-        "def_to_proto_weak_import_test.proto",
+        "def_to_proto_weak_import_test.proto",  # copybara:strip_for_google3
    ],
 )
@ -44,6 +47,8 @@ cc_test(
        ":def_to_proto_test_upb_proto",
        ":def_to_proto_test_upb_proto_reflection",
        "//:descriptor_upb_proto_reflection",
        "//:reflection",
        "//:upb",
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest_main",
        "@com_google_protobuf//:protobuf",
@ -57,7 +62,10 @@ cc_library(
    srcs = ["required_fields.c"],
    hdrs = ["required_fields.h"],
    visibility = ["//visibility:public"],
-    deps = ["//:reflection"],
+    deps = [
        "//:port",
        "//:reflection",
    ],
 )
 proto_library(
@ -83,6 +91,8 @@ cc_test(
        ":required_fields_test_upb_proto",
        ":required_fields_test_upb_proto_reflection",
        "//:json",
        "//:reflection",
        "//:upb",
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest_main",
    ],
@ -95,7 +105,10 @@ cc_library(
    srcs = ["compare.c"],
    hdrs = ["compare.h"],
    visibility = ["//visibility:public"],
-    deps = ["//:reflection"],
+    deps = [
        "//:port",
        "//:reflection",
    ],
 )
 cc_test(
--- a/upb/util/def_to_proto_public_import_test.proto
+++ b/upb/util/def_to_proto_public_import_test.proto
@ -26,3 +26,7 @@
 */
 syntax = "proto3";
 package pkg;
 message PublicImportMessage {}
--- a/upb/util/def_to_proto_regular_import_test.proto
+++ b/upb/util/def_to_proto_regular_import_test.proto
@ -26,3 +26,11 @@
 */
 syntax = "proto3";
 package pkg;
 message RegularImportMessage {}
 enum Proto3Enum {
    PROTO3_ENUM_ZERO = 0;
 }
--- a/upb/util/def_to_proto_test.cc
+++ b/upb/util/def_to_proto_test.cc
@ -125,3 +125,19 @@ TEST(DefToProto, Test) {
  upb::FileDefPtr file = msgdef.file();
  CheckFile(file, file_desc);
 }
 // Like the previous test, but uses a message layout built at runtime.
 TEST(DefToProto, TestRuntimeReflection) {
  upb::Arena arena;
  upb::SymbolTable symtab;
  upb_StringView test_file_desc =
      upb_util_def_to_proto_test_proto_upbdefinit.descriptor;
  const auto* file_desc = google_protobuf_FileDescriptorProto_parse(
      test_file_desc.data, test_file_desc.size, arena.ptr());
  _upb_DefPool_LoadDefInitEx(
      symtab.ptr(), &upb_util_def_to_proto_test_proto_upbdefinit, true);
  upb::FileDefPtr file = symtab.FindFileByName(
      upb_util_def_to_proto_test_proto_upbdefinit.filename);
  CheckFile(file, file_desc);
 }
--- a/upb/util/def_to_proto_test.proto
+++ b/upb/util/def_to_proto_test.proto
@ -29,7 +29,7 @@ syntax = "proto2";
 import "upb/util/def_to_proto_regular_import_test.proto";
 import public "upb/util/def_to_proto_public_import_test.proto";
-import weak "upb/util/def_to_proto_weak_import_test.proto";
+import weak "upb/util/def_to_proto_weak_import_test.proto";  // copybara:strip_for_google3
 package pkg;
@ -48,6 +48,9 @@ message Message {
    string oneof_bool = 3 [default = "true"];
    bytes oneof_bytes = 4 [default = "abc\xef\xfe"];
  }
  optional pkg.RegularImportMessage regular_import_message = 6;
  optional pkg.PublicImportMessage public_import_message = 7;
  optional pkg.Proto3Enum proto3_enum = 8;
  extensions 1000 to max;
  extend Message {
    optional int32 ext = 1000;
@ -67,6 +70,26 @@ enum Enum {
  NEGATIVE_ONE = -1;
 }
 enum EnumUpper32Value {
  UPPER32_VALUE = 40;
 }
 enum HasDuplicateValues {
  option allow_alias = true;
  A = 0;
  B = 1;
  C = 120;
  D = 130;
  G = 120;
  F = 1;
  E = 0;
  H = 121;
  I = 121;
  J = 121;
  K = 121;
 }
 service Service {
  rpc Bar(Message) returns (Message);
 }
@ -75,6 +98,10 @@ extend Message {
  optional int32 ext = 1001;
 }
 enum Has31 {
  VALUE_31 = 31;
 }
 message PretendMessageSet {
  option message_set_wire_format = true;
  // Since this is message_set_wire_format, "max" here means INT32_MAX.
--- a/upbc/protoc-gen-upb.cc
+++ b/upbc/protoc-gen-upb.cc
@ -96,6 +96,18 @@ std::vector<const protobuf::EnumDescriptor*> SortedEnums(
  return enums;
 }
 std::vector<int32_t> SortedUniqueEnumNumbers(
    const protobuf::EnumDescriptor* e) {
  std::vector<int32_t> values;
  for (int i = 0; i < e->value_count(); i++) {
    values.push_back(e->value(i)->number());
  }
  std::sort(values.begin(), values.end());
  auto last = std::unique(values.begin(), values.end());
  values.erase(last, values.end());
  return values;
 }
 void AddMessages(const protobuf::Descriptor* message,
                 std::vector<const protobuf::Descriptor*>* messages) {
  messages->push_back(message);
@ -1112,7 +1124,8 @@ SubLayoutArray::SubLayoutArray(const protobuf::Descriptor* message) {
  std::vector<const protobuf::FieldDescriptor*> sorted_subenums =
      SortedSubEnums(message);
  for (const auto* field : sorted_subenums) {
-    if (field->file()->syntax() != protobuf::FileDescriptor::SYNTAX_PROTO2) {
+    if (field->enum_type()->file()->syntax() !=
        protobuf::FileDescriptor::SYNTAX_PROTO2) {
      continue;
    }
    if (!indexes_.try_emplace(field->enum_type(), i).second) {
@ -1160,7 +1173,8 @@ bool TryFillTableEntry(const protobuf::Descriptor* message,
      type = "b1";
      break;
    case protobuf::FieldDescriptor::TYPE_ENUM:
-      if (field->file()->syntax() == protobuf::FileDescriptor::SYNTAX_PROTO2) {
+      if (field->enum_type()->file()->syntax() ==
          protobuf::FileDescriptor::SYNTAX_PROTO2) {
        // We don't have the means to test proto2 enum fields for valid values.
        return false;
      }
@ -1495,23 +1509,19 @@ int WriteEnums(const protobuf::FileDescriptor* file, Output& output) {
  for (const auto* e : this_file_enums) {
    uint64_t mask = 0;
-    absl::flat_hash_set<int32_t> values;
+    std::vector<int32_t> values;
-    for (int i = 0; i < e->value_count(); i++) {
+    for (auto number : SortedUniqueEnumNumbers(e)) {
      int32_t number = e->value(i)->number();
      if (static_cast<uint32_t>(number) < 64) {
-        mask |= 1 << number;
+        mask |= 1ULL << number;
      } else {
-        values.insert(number);
+        values.push_back(number);
      }
    }
    std::vector<int32_t> values_vec(values.begin(), values.end());
    std::sort(values_vec.begin(), values_vec.end());
-    if (!values_vec.empty()) {
+    if (!values.empty()) {
      values_init = EnumInit(e) + "_values";
-      output("static const int32_t $0[$1] = {\n", values_init,
+      output("static const int32_t $0[$1] = {\n", values_init, values.size());
-             values_vec.size());
+      for (int32_t value : values) {
      for (auto value : values_vec) {
        output("  $0,\n", value);
      }
      output("};\n\n");
@ -1520,7 +1530,7 @@ int WriteEnums(const protobuf::FileDescriptor* file, Output& output) {
    output("const upb_MiniTable_Enum $0 = {\n", EnumInit(e));
    output("  $0,\n", values_init);
    output("  0x$0ULL,\n", absl::Hex(mask));
-    output("  $0,\n", values_vec.size());
+    output("  $0,\n", values.size());
    output("};\n\n");
  }
@ -1682,10 +1692,14 @@ bool Generator::Generate(const protobuf::FileDescriptor* file,
  FileLayout layout(file);
-  Output h_output(context->Open(HeaderFilename(file)));
+  std::unique_ptr<protobuf::io::ZeroCopyOutputStream> h_output_stream(
      context->Open(HeaderFilename(file)));
  Output h_output(h_output_stream.get());
  WriteHeader(file, h_output);
-  Output c_output(context->Open(SourceFilename(file)));
+  std::unique_ptr<protobuf::io::ZeroCopyOutputStream> c_output_stream(
      context->Open(SourceFilename(file)));
  Output c_output(c_output_stream.get());
  WriteSource(layout, c_output, fasttable_enabled);
  return true;
--- a/upbc/protoc-gen-upbdefs.cc
+++ b/upbc/protoc-gen-upbdefs.cc
@ -170,10 +170,14 @@ bool Generator::Generate(const protobuf::FileDescriptor* file,
    return false;
  }
-  Output h_def_output(context->Open(DefHeaderFilename(file->name())));
+  std::unique_ptr<protobuf::io::ZeroCopyOutputStream> h_output_stream(
      context->Open(DefHeaderFilename(file->name())));
  Output h_def_output(h_output_stream.get());
  WriteDefHeader(file, h_def_output);
-  Output c_def_output(context->Open(DefSourceFilename(file->name())));
+  std::unique_ptr<protobuf::io::ZeroCopyOutputStream> c_output_stream(
      context->Open(DefSourceFilename(file->name())));
  Output c_def_output(c_output_stream.get());
  WriteDefSource(file, c_def_output);
  return true;