diff --git a/.github/workflows/bazel_tests.yml b/.github/workflows/bazel_tests.yml index c45348c43c..3592710713 100644 --- a/.github/workflows/bazel_tests.yml +++ b/.github/workflows/bazel_tests.yml @@ -24,6 +24,7 @@ jobs: - { CC: clang, os: ubuntu-20.04, flags: "--//:fasttable_enabled=true -- -cmake:test_generated_files" } - { CC: clang, os: ubuntu-20.04, flags: "--config=asan -c dbg -- -benchmarks:benchmark -python/..." } - { CC: clang, os: ubuntu-20.04, flags: "--config=ubsan -c dbg -- -benchmarks:benchmark -python/... -upb/bindings/lua/...", install: "libunwind-dev" } + - { CC: clang, os: ubuntu-20.04, flags: "--copt=-m32 --linkopt=-m32 -- -... benchmarks:benchmark ", install: "g++-multilib" } - { CC: clang, os: macos-11, flags: "" } steps: @@ -31,7 +32,7 @@ jobs: - name: Setup Python venv run: rm -rf /tmp/venv && python3 -m venv /tmp/venv - name: Install dependencies - run: sudo apt install -y ${{ matrix.install }} + run: sudo apt update && sudo apt install -y ${{ matrix.install }} if: matrix.install != '' - name: Run tests run: cd ${{ github.workspace }} && PATH=/tmp/venv/bin:$PATH CC=${{ matrix.CC }} bazel test --test_output=errors ... ${{ matrix.flags }} diff --git a/BUILD b/BUILD index 56a8720828..1158e48d0e 100644 --- a/BUILD +++ b/BUILD @@ -77,7 +77,7 @@ cc_library( "upb/port_def.inc", "upb/port_undef.inc", ], - visibility = ["//tests:__pkg__"], + visibility = ["//:__subpackages__"], ) cc_library( @@ -159,7 +159,9 @@ cc_library( cc_library( name = "generated_code_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me", hdrs = [ + "upb/decode.h", "upb/decode_fast.h", + "upb/encode.h", "upb/msg.h", "upb/msg_internal.h", "upb/port_def.inc", @@ -173,6 +175,22 @@ cc_library( ], ) +cc_library( + name = "generated_reflection_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me", + hdrs = [ + "upb/def.h", + "upb/port_def.inc", + "upb/port_undef.inc", + ], + copts = UPB_DEFAULT_COPTS, + visibility = ["//visibility:public"], + deps = [ + ":descriptor_upb_proto", + ":reflection", + ":table", + ], +) + upb_proto_library( name = "descriptor_upb_proto", visibility = ["//visibility:public"], @@ -222,6 +240,7 @@ cc_library( deps = [ ":port", ":reflection", + ":table", ], ) @@ -254,6 +273,7 @@ cc_test( ":empty_upbdefs_proto", ":test_messages_proto3_proto_upb", ":test_upb_proto", + ":upb", "@com_google_googletest//:gtest_main", ], ) @@ -292,7 +312,11 @@ cc_test( srcs = ["upb/msg_test.cc"], deps = [ ":json", + ":msg_test_upb_proto", ":msg_test_upb_proto_reflection", + ":reflection", + ":test_messages_proto3_proto_upb", + ":upb", "@com_google_googletest//:gtest_main", ], ) @@ -304,6 +328,12 @@ proto_library( deps = ["@com_google_protobuf//:test_messages_proto3_proto"], ) +upb_proto_library( + name = "msg_test_upb_proto", + testonly = 1, + deps = [":msg_test_proto"], +) + upb_proto_reflection_library( name = "msg_test_upb_proto_reflection", testonly = 1, @@ -436,7 +466,10 @@ cc_library( "upb/table_internal.h", "upb/upb.h", ], - visibility = ["//tests:__pkg__"], + visibility = [ + "//python:__pkg__", + "//tests:__pkg__", + ], deps = [ ":port", ], diff --git a/bazel/py_extension.bzl b/bazel/py_extension.bzl new file mode 100644 index 0000000000..beee32148f --- /dev/null +++ b/bazel/py_extension.bzl @@ -0,0 +1,39 @@ + +load( + "//bazel:build_defs.bzl", + "UPB_DEFAULT_COPTS", +) + +def py_extension(name, srcs, deps=[]): + version_script = name + "_version_script.lds" + symbol = "PyInit_" + name + native.genrule( + name = "gen_" + version_script, + outs = [version_script], + cmd = "echo 'message { global: " + symbol + "; local: *; };' > $@", + ) + + native.cc_binary( + name = name, + srcs = srcs, + copts = UPB_DEFAULT_COPTS + [ + # The Python API requires patterns that are ISO C incompatible, like + # casts between function pointers and object pointers. + "-Wno-pedantic", + ], + # We use a linker script to hide all symbols except the entry point for + # the module. + linkopts = select({ + "@platforms//os:linux": ["-Wl,--version-script,$(location :" + version_script + ")"], + "@platforms//os:macos": [ + "-Wl,-exported_symbol", + "-Wl,_" + symbol, + ], + }), + linkshared = True, + linkstatic = True, + deps = deps + [ + ":" + version_script, + "@system_python//:python_headers", + ], + ) diff --git a/bazel/py_proto_library.bzl b/bazel/py_proto_library.bzl index 9f99112346..8ef4d9708d 100644 --- a/bazel/py_proto_library.bzl +++ b/bazel/py_proto_library.bzl @@ -103,7 +103,7 @@ def _py_proto_library_aspect_impl(target, ctx): ) outs_depset = depset(srcs) return [ - PyInfo(transitive_sources = outs_depset) + PyInfo(transitive_sources = outs_depset), ] _py_proto_library_aspect = aspect( diff --git a/bazel/pyproto_test_wrapper.bzl b/bazel/pyproto_test_wrapper.bzl new file mode 100644 index 0000000000..fb909a1db8 --- /dev/null +++ b/bazel/pyproto_test_wrapper.bzl @@ -0,0 +1,34 @@ + +# copybara:strip_for_google3_begin + +def pyproto_test_wrapper(name): + src = name + "_wrapper.py" + native.py_test( + name = name, + srcs = [src], + legacy_create_init = False, + main = src, + data = ["@com_google_protobuf//:testdata"], + deps = [ + "//python:message_ext", + "@com_google_protobuf//:python_common_test_protos", + "@com_google_protobuf//:python_specific_test_protos", + "@com_google_protobuf//:python_srcs", + ], + ) + +# copybara:replace_for_google3_begin +# +# def pyproto_test_wrapper(name): +# src = name + "_wrapper.py" +# native.py_test( +# name = name, +# srcs = [src], +# main = src, +# deps = [ +# "//net/proto2/python/internal:" + name + "_for_deps", +# "//net/proto2/python/public:use_upb_protos", +# ], +# ) +# +# copybara:replace_for_google3_end diff --git a/bazel/upb_proto_library.bzl b/bazel/upb_proto_library.bzl index ae0434af6b..0a30eee611 100644 --- a/bazel/upb_proto_library.bzl +++ b/bazel/upb_proto_library.bzl @@ -299,7 +299,6 @@ _upb_proto_library_aspect = aspect( ), "_upb": attr.label_list(default = [ "//:generated_code_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me", - "//:upb", ]), "_fasttable_enabled": attr.label(default = "//:fasttable_enabled"), }), @@ -348,9 +347,7 @@ _upb_proto_reflection_library_aspect = aspect( ), "_upbdefs": attr.label_list( default = [ - "//:generated_code_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me", - "//:upb", - "//:reflection", + "//:generated_reflection_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me", ], ), }), diff --git a/bazel/workspace_defs.bzl b/bazel/workspace_defs.bzl index 2f596f01fb..03655b2409 100644 --- a/bazel/workspace_defs.bzl +++ b/bazel/workspace_defs.bzl @@ -53,17 +53,17 @@ toolchain( """ def _get_config_var(repository_ctx, name): - py_program = "import sysconfig; print(sysconfig.get_config_var('%s'), end='')" - result = repository_ctx.execute(["python3", "-c", py_program % (name)]) - if result.return_code != 0: - fail("No python3 executable available on the system") - return result.stdout + py_program = "import sysconfig; print(sysconfig.get_config_var('%s'), end='')" + result = repository_ctx.execute(["python3", "-c", py_program % (name)]) + if result.return_code != 0: + fail("No python3 executable available on the system") + return result.stdout def _python_headers_impl(repository_ctx): - path = _get_config_var(repository_ctx, "INCLUDEPY") - repository_ctx.symlink(path, "python") - python3 = repository_ctx.which("python3") - repository_ctx.file("BUILD.bazel", _build_file % python3) + path = _get_config_var(repository_ctx, "INCLUDEPY") + repository_ctx.symlink(path, "python") + python3 = repository_ctx.which("python3") + repository_ctx.file("BUILD.bazel", _build_file % python3) # The system_python() repository rule exposes Python headers from the system. # diff --git a/bazel/workspace_deps.bzl b/bazel/workspace_deps.bzl index a5a3564449..db3afd7a79 100644 --- a/bazel/workspace_deps.bzl +++ b/bazel/workspace_deps.bzl @@ -23,10 +23,10 @@ def upb_deps(): "rm python/google/protobuf/__init__.py", "rm python/google/protobuf/pyext/__init__.py", "rm python/google/protobuf/internal/__init__.py", - ] + ], ) - rules_python_version = "740825b7f74930c62f44af95c9a4c1bd428d2c53" # Latest @ 2021-06-23 + rules_python_version = "740825b7f74930c62f44af95c9a4c1bd428d2c53" # Latest @ 2021-06-23 maybe( http_archive, diff --git a/benchmarks/BUILD b/benchmarks/BUILD index cce5cb11da..ae4b1995b2 100644 --- a/benchmarks/BUILD +++ b/benchmarks/BUILD @@ -73,7 +73,7 @@ cc_proto_library( deps = [":benchmark_descriptor_sv_proto"], ) -cc_binary( +cc_test( name = "benchmark", testonly = 1, srcs = ["benchmark.cc"], diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 2e45fb3c4e..a06bd604ac 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -103,6 +103,11 @@ add_library(generated_code_support__only_for_generated_code_do_not_use__i_give_p target_link_libraries(generated_code_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me INTERFACE table upb) +add_library(generated_reflection_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me INTERFACE) +target_link_libraries(generated_reflection_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me INTERFACE + descriptor_upb_proto + reflection + table) add_library(reflection ../upb/def.c ../upb/msg.h @@ -122,7 +127,8 @@ add_library(textformat ../upb/text_encode.h) target_link_libraries(textformat port - reflection) + reflection + table) add_library(json ../upb/json_decode.c ../upb/json_encode.c diff --git a/doc/vs-cpp-protos.md b/doc/vs-cpp-protos.md new file mode 100644 index 0000000000..5d30c54a84 --- /dev/null +++ b/doc/vs-cpp-protos.md @@ -0,0 +1,254 @@ + +# upb vs. C++ Protobuf Design + +[upb](https://github.com/protocolbuffers/upb) is a small C protobuf library. +While some of the design follows in the footsteps of the C++ Protobuf Library, +upb departs from C++'s design in several key ways. This document compares +and contrasts the two libraries on several design points. + +## Design Goals + +Before we begin, it is worth calling out that upb and C++ have different design +goals, and this motivates some of the differences we will see. + +C++ protobuf is a user-level library: it is designed to be used directly by C++ +applications. These applications will expect a full-featured C++ API surface +that uses C++ idioms. The C++ library is also willing to add features to +increase server performance, even if these features would add size or complexity +to the library. Because C++ protobuf is a user-level library, API stability is +of utmost importance: breaking API changes are rare and carefully managed when +they do occur. The focus on C++ also means that ABI compatibility with C is not +a priority. + +upb, on the other hand, is designed primarily to be wrapped by other languages. +It is a C protobuf kernel that forms the basis on which a user-level protobuf +library can be built. This means we prefer to keep the API surface as small and +orthogonal as possible. While upb supports all protobuf features required for +full conformance, upb prioritizes simplicity and small code size, and avoids +adding features like lazy fields that can accelerate some use cases but at great +cost in terms of complexity. As upb is not aimed directly at users, there is +much more freedom to make API-breaking changes when necessary, which helps the +core to stay small and simple. We want to be compatible with all FFI +interfaces, so C ABI compatibility is a must. + +Despite these differences, C++ protos and upb offer [roughly the same core set +of features](https://github.com/protocolbuffers/upb#features). + +## Arenas + +upb and C++ protos both offer arena allocation, but there are some key +differences. + +### C++ + +As a matter of history, when C++ protos were open-sourced in 2008, they did not +support arenas. Originally there was only unique ownership, whereby each +message uniquely owns all child messages and will free them when the parent is +freed. + +Arena allocation was added as a feature in 2014 as a way of dramatically +reducing allocation and (especially) deallocation costs. But the library was +not at liberty to remove the unique ownership model, because it would break far +too many users. As a result, C++ has supported a **hybrid allocation model** +ever since, allowing users to allocate messages either directly from the +stack/heap or from an arena. The library attempts to ensure that there are +no dangling pointers by performing automatic copies in some cases (for example +`a->set_allocated_b(b)`, where `a` and `b` are on different arenas). + +C++'s arena object itself `google::protobuf::Arena` is **thread-safe** by +design, which allows users to allocate from multiple threads simultaneously +without external synchronization. The user can supply an initial block of +memory to the arena, and can choose some parameters to control the arena block +size. The user can also supply block alloc/dealloc functions, but the alloc +function is expected to always return some memory. The C++ library in general +does not attempt to handle out of memory conditions. + +### upb + +upb uses **arena allocation exclusively**. All messages must be allocated from +an arena, and can only be freed by freeing the arena. It is entirely the user's +responsibility to ensure that there are no dangling pointers: when a user sets a +message field, this will always trivially overwrite the pointer and will never +perform an implicit copy. + +upb's `upb::Arena` is **thread-compatible**, which means it cannot be used +concurrently without synchronization. The arena can be seeded with an initial +block of memory, but it does not explicitly support any parameters for choosing +block size. It support a custom alloc/dealloc function, and this function is +allowed to return `NULL` if no dynamic memory is available. This allows upb +arenas to have a max/fixed size, and makes it possible in theory to write code +that is tolerant to out-of-memory errors. + +upb's arena also supports a novel operation known as **fuse**, which joins two +arenas together into a single lifetime. Though both arenas must still be freed +separately, none of the memory will actually be freed until *both* arenas have +been freed. This is useful for avoiding dangling pointers when reparenting a +message with one that may be on a different arena. + +### Comparison + +**hybrid allocation vs. arena-only**: +* The C++ hybrid allocation model introduces a great deal of complexity and + unpredictability into the library. upb benefits from having a much simpler + and more predictable design. +* Some of the complexity in C++'s hybrid model arises from the fact that arenas + were added after the fact. Designing for a hybrid model from the outset + would likely yield a simpler result. +* Unique ownership does support some usage patterns that arenas cannot directly + accommodate. For example, you can reparent a message and the child will precisely + follow the lifetime of its new parent. An arena would require you to either + perform a deep copy or extend the lifetime. + +**thread-compatible vs. thread-safe arena** +* A thread-safe arena (as in C++) is safer and easier to use. A thread-compatible + arena requires that the user prove that the arena cannot be used concurrently. +* [Thread Sanitizer](https://github.com/google/sanitizers/wiki/ThreadSanitizerCppManual) + is far more accessible than it was in 2014 (when C++ introduced a thread-safe + arena). We now have more tools at our disposal to ensure that we do not trigger + data races in a thread-compatible arena like upb. +* Thread-compatible arenas are more performant. +* Thread-compatible arenas have a far simpler implementation. The C++ thread-safe + arena relies on thread-local variables, which introduce complications on some + platforms. It also requires far more subtle reasoning for correctness and + performance. + +**fuse vs. no fuse** +* The `upb_Arena_Fuse()` operation is a key part of how upb supports reparenting + of messages when the parent may be on a different arena. Without this, upb has + no way of supporting `foo.bar = bar` in dynamic languages without performing a + deep copy. +* A downside of `upb_Arena_Fuse()` is that passing an arena to a function can allow + that function to extend the lifetime of the arena in potentially + unpredictable ways. This can be prevented if necessary, as fuse can fail, eg. if + one arena has an initial block. But this adds some complexity by requiring callers + to handle the case where fuse fails. + +## Code Generation vs. Tables + +The C++ protobuf library has always been built around code generation, while upb +generates only tables. In other words, `foo.pb.cc` files contain functions, +whereas `foo.upb.c` files emit only data structures. + +### C++ + +C++ generated code emits a large number of functions into `foo.pb.cc` files. +An incomplete list: + +* `FooMsg::FooMsg()` (constructor): initializes all fields to their default value. +* `FooMsg::~FooMsg()` (destructor): frees any present child messages. +* `FooMsg::Clear()`: clears all fields back to their default/empty value. +* `FooMsg::_InternalParse()`: generated code for parsing a message. +* `FooMsg::_InternalSerialize()`: generated code for serializing a message. +* `FooMsg::ByteSizeLong()`: calculates serialized size, as a first pass before serializing. +* `FooMsg::MergeFrom()`: copies/appends present fields from another message. +* `FooMsg::IsInitialized()`: checks whether required fields are set. + +This code lives in the `.text` section and contains function calls to the generated +classes for child messages. + +### upb + +upb does not generate any code into `foo.upb.c` files, only data structures. upb uses a +compact data table known as a *mini table* to represent the schema and all fields. + +upb uses mini tables to perform all of the operations that would traditionally be done +with generated code. Revisiting the list from the previous section: + +* `FooMsg::FooMsg()` (constructor): upb instead initializes all messages with `memset(msg, 0, size)`. + Non-zero defaults are injected in the accessors. +* `FooMsg::~FooMsg()` (destructor): upb messages are freed by freeing the arena. +* `FooMsg::Clear()`: can be performed with `memset(msg, 0, size)`. +* `FooMsg::_InternalParse()`: upb's parser uses mini tables as data, instead of generating code. +* `FooMsg::_InternalSerialize()`: upb's serializer also uses mini-tables instead of generated code. +* `FooMsg::ByteSizeLong()`: upb performs serialization in reverse so that an initial pass is not required. +* `FooMsg::MergeFrom()`: upb supports this via serialize+parse from the other message. +* `FooMsg::IsInitialized()`: upb's encoder and decoder have special flags to check for required fields. + A util library `upb/util/required_fields.h` handles the corner cases. + +### Comparison + +If we compare compiled code size, upb is far smaller. Here is a comparison of the code +size of a trivial binary that does nothing but a parse and serialize of `descriptor.proto`. +This means we are seeing both the overhead of the core library itself as well as the +generated code (or table) for `descriptor.proto`. (For extra clarity we should break this +down by generated code vs core library in the future). + + +| Library | `.text` | `.data` | `.bss` | +|------------ |---------|---------|--------| +| upb | 26Ki | 0.6Ki | 0.01Ki | +| C++ (lite) | 187Ki | 2.8Ki | 1.25Ki | +| C++ (code size) | 904Ki | 6.1Ki | 1.88Ki | +| C++ (full) | 983Ki | 6.1Ki | 1.88Ki | + +"C++ (code size)" refers to protos compiled with `optimize_for = CODE_SIZE`, a mode +in which generated code contains reflection only, in an attempt to make the +generated code size smaller (however it requires the full runtime instead +of the lite runtime). + +## Bifurcated vs. Optional Reflection + +upb and C++ protos both offer reflection without making it mandatory. However +the models for enabling/disabling reflection are very different. + +### C++ + +C++ messages offer full reflection by default. Messages in C++ generally +derive from `Message`, and the base class provides a member function +`Reflection* Message::GetReflection()` which returns the reflection object. + +It follows that any message deriving from `Message` will always have reflection +linked into the binary, whether or not the reflection object is ever used. +Because `GetReflection()` is a function on the base class, it is not possible +to statically determine if a given message's reflection is used: + +```c++ +Reflection* GetReflection(const Message& message) { + // Can refer to any message in the whole binary. + return message.GetReflection(); +} +``` + +The C++ library does provide a way of omitting reflection: `MessageLite`. We can +cause a message to be lite in two different ways: + +* `optimize_for = LITE_RUNTIME` in a `.proto` file will cause all messages in that + file to be lite. +* `lite` as a codegen param: this will force all messages to lite, even if the + `.proto` file does not have `optimize_for = LITE_RUNTIME`. + +A lite message will derive from `MessageLite` instead of `Message`. Since +`MessageLite` has no `GetReflection()` function, this means no reflection is +available, so we can avoid taking the code size hit. + +### upb + +upb does not have the `Message` vs. `MessageLite` bifurcation. There is only one +kind of message type `upb_Message`, which means there is no need to configure in +a `.proto` file which messages will need reflection and which will not. +Every message has the *option* to link in reflection from a separate `foo.upbdefs.o` +file, without needing to change the message itself in any way. + +upb does not provide the equivalent of `Message::GetReflection()`: there is no +facility for retrieving the reflection of a message whose type is not known statically. +It would be possible to layer such a facility on top of the upb core, though this +would probably require some kind of code generation. + +### Comparison + +* Most messages in C++ will not bother to declare themselves as "lite". This means + that many C++ messages will link in reflection even when it is never used, bloating + binaries unnecessarily. +* `optimize_for = LITE_RUNTIME` is difficult to use in practice, because it prevents + any non-lite protos from `import`ing that file. +* Forcing all protos to lite via a codegen parameter (for example, when building for + mobile) is more practical than `optimize_for = LITE_RUNTIME`. But this will break + the compile for any code that tries to upcast to `Message`, or tries to use a + non-lite method. +* The one major advantage of the C++ model is that it can support `msg.DebugString()` + on a type-erased proto. For upb you have to explicitly pass the `upb_MessageDef*` + separately if you want to perform an operation like printing a proto to text format. + +## Explicit Registration vs. Globals + +TODO diff --git a/python/BUILD b/python/BUILD index 8126f52200..cb48bc6109 100644 --- a/python/BUILD +++ b/python/BUILD @@ -23,23 +23,14 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -load( - "//bazel:build_defs.bzl", - "UPB_DEFAULT_COPTS", -) -load( - "//bazel:py_proto_library.bzl", - "py_proto_library", -) -load( - "@rules_python//python:packaging.bzl", - "py_wheel", -) +load("//bazel:py_proto_library.bzl", "py_proto_library") +load("//bazel:py_extension.bzl", "py_extension") # copybara:strip_for_google3 +load("@rules_python//python:packaging.bzl", "py_wheel") licenses(["notice"]) -cc_binary( - name = "message", +py_extension( + name = "_message", srcs = [ "convert.c", "convert.h", @@ -61,45 +52,38 @@ cc_binary( "repeated.c", "repeated.h", ], - copts = UPB_DEFAULT_COPTS + [ - # The Python API requires patterns that are ISO C incompatible, like - # casts between function pointers and object pointers. - "-Wno-pedantic", - ], - # We use a linker script to hide all symbols except the entry point for - # the module. - linkopts = select({ - "@platforms//os:linux": ["-Wl,--version-script,$(location :version_script.lds)"], - "@platforms//os:macos": [ - "-Wl,-exported_symbol", - "-Wl,_PyInit__message", - ], - }), - linkshared = True, - linkstatic = True, deps = [ - ":version_script.lds", "//:descriptor_upb_proto_reflection", "//:reflection", + "//:table", "//:textformat", "//:upb", "//upb/util:compare", "//upb/util:def_to_proto", "//upb/util:required_fields", - "@system_python//:python_headers", ], ) -cc_binary( - name = "api_implementation", +py_extension( + name = "_api_implementation", + srcs = ["api_implementation.c"], +) + +# copybara:strip_for_google3_begin + +py_test( + name = "minimal_test", srcs = [ - "api_implementation.c", + "minimal_test.py", + ], + imports = ["."], + legacy_create_init = False, + deps = [ + "//python:message_ext", + "@com_google_protobuf//:python_common_test_protos", + "@com_google_protobuf//:python_specific_test_protos", + "@com_google_protobuf//:python_srcs", ], - linkshared = True, - linkstatic = True, - # Enable once linker script is available. - #copts = ["-fvisibility=hidden"], - deps = ["@system_python//:python_headers"], ) # Copy the extensions into the location recognized by Python. @@ -108,14 +92,14 @@ EXT_SUFFIX = ".abi3.so" genrule( name = "copy_message", - srcs = [":message"], + srcs = [":_message"], outs = ["google/protobuf/pyext/_message" + EXT_SUFFIX], cmd = "cp $< $@", ) genrule( name = "copy_api_implementation", - srcs = [":api_implementation"], + srcs = [":_api_implementation"], outs = ["google/protobuf/internal/_api_implementation" + EXT_SUFFIX], cmd = "cp $< $@", visibility = ["//python:__subpackages__"], @@ -136,20 +120,6 @@ py_library( visibility = ["//python:__subpackages__"], ) -py_test( - name = "minimal_test", - srcs = [ - "minimal_test.py", - ], - imports = ["."], - legacy_create_init = False, - deps = [ - "//python:message_ext", - "@com_google_protobuf//:python_common_test_protos", - "@com_google_protobuf//:python_specific_test_protos", - "@com_google_protobuf//:python_srcs", - ], -) py_proto_library( name = "well_known_proto_pb2", @@ -189,3 +159,5 @@ py_wheel( "@com_google_protobuf//:python_srcs", ], ) + +# copybara:strip_end diff --git a/python/descriptor.c b/python/descriptor.c index 815157c7a9..543c9d7a8c 100644 --- a/python/descriptor.c +++ b/python/descriptor.c @@ -107,7 +107,7 @@ static PyObject* PyUpb_DescriptorBase_GetOptions(PyUpb_DescriptorBase* self, if (!self->options) { // Load descriptors protos if they are not loaded already. We have to do // this lazily, otherwise, it would lead to circular imports. - PyObject* mod = PyImport_ImportModule("google.protobuf.descriptor_pb2"); + PyObject* mod = PyImport_ImportModule(PYUPB_DESCRIPTOR_MODULE); Py_DECREF(mod); // Find the correct options message. @@ -323,7 +323,7 @@ static PyObject* PyUpb_Descriptor_GetOptions(PyObject* _self, PyObject* args) { return PyUpb_DescriptorBase_GetOptions( self, upb_MessageDef_Options(self->def), &google_protobuf_MessageOptions_msginit, - "google.protobuf.MessageOptions"); + PYUPB_DESCRIPTOR_PROTO_PACKAGE ".MessageOptions"); } static PyObject* PyUpb_Descriptor_CopyToProto(PyObject* _self, @@ -331,7 +331,7 @@ static PyObject* PyUpb_Descriptor_CopyToProto(PyObject* _self, return PyUpb_DescriptorBase_CopyToProto( _self, (PyUpb_ToProto_Func*)&upb_MessageDef_ToProto, &google_protobuf_DescriptorProto_msginit, - "google.protobuf.DescriptorProto", py_proto); + PYUPB_DESCRIPTOR_PROTO_PACKAGE ".DescriptorProto", py_proto); } static PyObject* PyUpb_Descriptor_EnumValueName(PyObject* _self, @@ -392,7 +392,7 @@ static PyObject* PyUpb_Descriptor_GetFieldsByNumber(PyObject* _self, (void*)&upb_MessageDef_Field, (void*)&PyUpb_FieldDescriptor_Get, }, - (void*)&upb_MessageDef_FindFieldByNumberWithSize, + (void*)&upb_MessageDef_FindFieldByNumber, (void*)&upb_FieldDef_Number, }; PyUpb_DescriptorBase* self = (void*)_self; @@ -733,7 +733,8 @@ static PyObject* PyUpb_EnumDescriptor_GetOptions(PyObject* _self, PyUpb_DescriptorBase* self = (void*)_self; return PyUpb_DescriptorBase_GetOptions(self, upb_EnumDef_Options(self->def), &google_protobuf_EnumOptions_msginit, - "google.protobuf.EnumOptions"); + PYUPB_DESCRIPTOR_PROTO_PACKAGE + ".EnumOptions"); } static PyObject* PyUpb_EnumDescriptor_CopyToProto(PyObject* _self, @@ -741,7 +742,7 @@ static PyObject* PyUpb_EnumDescriptor_CopyToProto(PyObject* _self, return PyUpb_DescriptorBase_CopyToProto( _self, (PyUpb_ToProto_Func*)&upb_EnumDef_ToProto, &google_protobuf_EnumDescriptorProto_msginit, - "google.protobuf.EnumDescriptorProto", py_proto); + PYUPB_DESCRIPTOR_PROTO_PACKAGE ".EnumDescriptorProto", py_proto); } static PyGetSetDef PyUpb_EnumDescriptor_Getters[] = { @@ -822,7 +823,7 @@ static PyObject* PyUpb_EnumValueDescriptor_GetOptions(PyObject* _self, return PyUpb_DescriptorBase_GetOptions( self, upb_EnumValueDef_Options(self->def), &google_protobuf_EnumValueOptions_msginit, - "google.protobuf.EnumValueOptions"); + PYUPB_DESCRIPTOR_PROTO_PACKAGE ".EnumValueOptions"); } static PyGetSetDef PyUpb_EnumValueDescriptor_Getters[] = { @@ -1015,7 +1016,8 @@ static PyObject* PyUpb_FieldDescriptor_GetOptions(PyObject* _self, PyUpb_DescriptorBase* self = (void*)_self; return PyUpb_DescriptorBase_GetOptions(self, upb_FieldDef_Options(self->def), &google_protobuf_FieldOptions_msginit, - "google.protobuf.FieldOptions"); + PYUPB_DESCRIPTOR_PROTO_PACKAGE + ".FieldOptions"); } static PyGetSetDef PyUpb_FieldDescriptor_Getters[] = { @@ -1254,7 +1256,8 @@ static PyObject* PyUpb_FileDescriptor_GetOptions(PyObject* _self, PyUpb_DescriptorBase* self = (void*)_self; return PyUpb_DescriptorBase_GetOptions(self, upb_FileDef_Options(self->def), &google_protobuf_FileOptions_msginit, - "google.protobuf.FileOptions"); + PYUPB_DESCRIPTOR_PROTO_PACKAGE + ".FileOptions"); } static PyObject* PyUpb_FileDescriptor_CopyToProto(PyObject* _self, @@ -1262,7 +1265,7 @@ static PyObject* PyUpb_FileDescriptor_CopyToProto(PyObject* _self, return PyUpb_DescriptorBase_CopyToProto( _self, (PyUpb_ToProto_Func*)&upb_FileDef_ToProto, &google_protobuf_FileDescriptorProto_msginit, - "google.protobuf.FileDescriptorProto", py_proto); + PYUPB_DESCRIPTOR_PROTO_PACKAGE ".FileDescriptorProto", py_proto); } static PyGetSetDef PyUpb_FileDescriptor_Getters[] = { @@ -1338,6 +1341,12 @@ static PyObject* PyUpb_MethodDescriptor_GetFullName(PyObject* self, return PyUnicode_FromString(upb_MethodDef_FullName(m)); } +static PyObject* PyUpb_MethodDescriptor_GetIndex(PyObject* self, + void* closure) { + const upb_MethodDef* oneof = PyUpb_MethodDescriptor_GetDef(self); + return PyLong_FromLong(upb_MethodDef_Index(oneof)); +} + static PyObject* PyUpb_MethodDescriptor_GetContainingService(PyObject* self, void* closure) { const upb_MethodDef* m = PyUpb_MethodDescriptor_GetDef(self); @@ -1361,7 +1370,8 @@ static PyObject* PyUpb_MethodDescriptor_GetOptions(PyObject* _self, PyUpb_DescriptorBase* self = (void*)_self; return PyUpb_DescriptorBase_GetOptions(self, upb_MethodDef_Options(self->def), &google_protobuf_MethodOptions_msginit, - "google.protobuf.MethodOptions"); + PYUPB_DESCRIPTOR_PROTO_PACKAGE + ".MethodOptions"); } static PyObject* PyUpb_MethodDescriptor_CopyToProto(PyObject* _self, @@ -1369,14 +1379,13 @@ static PyObject* PyUpb_MethodDescriptor_CopyToProto(PyObject* _self, return PyUpb_DescriptorBase_CopyToProto( _self, (PyUpb_ToProto_Func*)&upb_MethodDef_ToProto, &google_protobuf_MethodDescriptorProto_msginit, - "google.protobuf.MethodDescriptorProto", py_proto); + PYUPB_DESCRIPTOR_PROTO_PACKAGE ".MethodDescriptorProto", py_proto); } static PyGetSetDef PyUpb_MethodDescriptor_Getters[] = { {"name", PyUpb_MethodDescriptor_GetName, NULL, "Name", NULL}, {"full_name", PyUpb_MethodDescriptor_GetFullName, NULL, "Full name", NULL}, - // TODO(https://github.com/protocolbuffers/upb/issues/459) - //{ "index", PyUpb_MethodDescriptor_GetIndex, NULL, "Index", NULL}, + {"index", PyUpb_MethodDescriptor_GetIndex, NULL, "Index", NULL}, {"containing_service", PyUpb_MethodDescriptor_GetContainingService, NULL, "Containing service", NULL}, {"input_type", PyUpb_MethodDescriptor_GetInputType, NULL, "Input type", @@ -1466,7 +1475,8 @@ static PyObject* PyUpb_OneofDescriptor_GetOptions(PyObject* _self, PyUpb_DescriptorBase* self = (void*)_self; return PyUpb_DescriptorBase_GetOptions(self, upb_OneofDef_Options(self->def), &google_protobuf_OneofOptions_msginit, - "google.protobuf.OneofOptions"); + PYUPB_DESCRIPTOR_PROTO_PACKAGE + ".OneofOptions"); } static PyGetSetDef PyUpb_OneofDescriptor_Getters[] = { @@ -1567,7 +1577,7 @@ static PyObject* PyUpb_ServiceDescriptor_GetOptions(PyObject* _self, return PyUpb_DescriptorBase_GetOptions( self, upb_ServiceDef_Options(self->def), &google_protobuf_ServiceOptions_msginit, - "google.protobuf.ServiceOptions"); + PYUPB_DESCRIPTOR_PROTO_PACKAGE ".ServiceOptions"); } static PyObject* PyUpb_ServiceDescriptor_CopyToProto(PyObject* _self, @@ -1575,7 +1585,7 @@ static PyObject* PyUpb_ServiceDescriptor_CopyToProto(PyObject* _self, return PyUpb_DescriptorBase_CopyToProto( _self, (PyUpb_ToProto_Func*)&upb_ServiceDef_ToProto, &google_protobuf_ServiceDescriptorProto_msginit, - "google.protobuf.ServiceDescriptorProto", py_proto); + PYUPB_DESCRIPTOR_PROTO_PACKAGE ".ServiceDescriptorProto", py_proto); } static PyObject* PyUpb_ServiceDescriptor_FindMethodByName(PyObject* _self, diff --git a/python/descriptor_pool.c b/python/descriptor_pool.c index 06011d702e..5a61b4e6c7 100644 --- a/python/descriptor_pool.c +++ b/python/descriptor_pool.c @@ -251,7 +251,8 @@ static PyObject* PyUpb_DescriptorPool_DoAdd(PyObject* _self, PyObject* file_desc) { if (!PyUpb_CMessage_Verify(file_desc)) return NULL; const upb_MessageDef* m = PyUpb_CMessage_GetMsgdef(file_desc); - const char* file_proto_name = "google.protobuf.FileDescriptorProto"; + const char* file_proto_name = + PYUPB_DESCRIPTOR_PROTO_PACKAGE ".FileDescriptorProto"; if (strcmp(upb_MessageDef_FullName(m), file_proto_name) != 0) { return PyErr_Format(PyExc_TypeError, "Can only add FileDescriptorProto"); } diff --git a/python/extension_dict.c b/python/extension_dict.c index 2a95bdf733..9fefcb06d9 100644 --- a/python/extension_dict.c +++ b/python/extension_dict.c @@ -88,6 +88,23 @@ static void PyUpb_ExtensionDict_Dealloc(PyUpb_ExtensionDict* self) { PyUpb_Dealloc(self); } +static PyObject* PyUpb_ExtensionDict_RichCompare(PyObject* _self, + PyObject* _other, int opid) { + // Only equality comparisons are implemented. + if (opid != Py_EQ && opid != Py_NE) { + Py_INCREF(Py_NotImplemented); + return Py_NotImplemented; + } + PyUpb_ExtensionDict* self = (PyUpb_ExtensionDict*)_self; + bool equals = false; + if (PyObject_TypeCheck(_other, Py_TYPE(_self))) { + PyUpb_ExtensionDict* other = (PyUpb_ExtensionDict*)_other; + equals = self->msg == other->msg; + } + bool ret = opid == Py_EQ ? equals : !equals; + return PyBool_FromLong(ret); +} + static int PyUpb_ExtensionDict_Contains(PyObject* _self, PyObject* key) { PyUpb_ExtensionDict* self = (PyUpb_ExtensionDict*)_self; const upb_FieldDef* f = PyUpb_CMessage_GetExtensionDef(self->msg, key); @@ -143,7 +160,7 @@ static PyType_Slot PyUpb_ExtensionDict_Slots[] = { {Py_tp_methods, PyUpb_ExtensionDict_Methods}, //{Py_tp_getset, PyUpb_ExtensionDict_Getters}, //{Py_tp_hash, PyObject_HashNotImplemented}, - //{Py_tp_richcompare, PyUpb_ExtensionDict_RichCompare}, + {Py_tp_richcompare, PyUpb_ExtensionDict_RichCompare}, {Py_tp_iter, PyUpb_ExtensionIterator_New}, {Py_sq_contains, PyUpb_ExtensionDict_Contains}, {Py_sq_length, PyUpb_ExtensionDict_Length}, diff --git a/python/message.c b/python/message.c index 11b3a0828e..384c5b31d1 100644 --- a/python/message.c +++ b/python/message.c @@ -1482,6 +1482,7 @@ static PyObject* PyUpb_CMessage_GetExtensionDict(PyObject* _self, void* closure) { PyUpb_CMessage* self = (void*)_self; if (self->ext_dict) { + Py_INCREF(self->ext_dict); return self->ext_dict; } @@ -1832,7 +1833,8 @@ bool PyUpb_InitMessage(PyObject* m) { state->listfields_item_key = PyObject_GetAttrString( (PyObject*)state->cmessage_type, "_ListFieldsItemKey"); - PyObject* mod = PyImport_ImportModule("google.protobuf.message"); + PyObject* mod = + PyImport_ImportModule(PYUPB_PROTOBUF_PUBLIC_PACKAGE ".message"); if (mod == NULL) return false; state->encode_error_class = PyObject_GetAttrString(mod, "EncodeError"); @@ -1840,8 +1842,8 @@ bool PyUpb_InitMessage(PyObject* m) { state->message_class = PyObject_GetAttrString(mod, "Message"); Py_DECREF(mod); - PyObject* enum_type_wrapper = - PyImport_ImportModule("google.protobuf.internal.enum_type_wrapper"); + PyObject* enum_type_wrapper = PyImport_ImportModule( + PYUPB_PROTOBUF_INTERNAL_PACKAGE ".enum_type_wrapper"); if (enum_type_wrapper == NULL) return false; state->enum_type_wrapper_class = diff --git a/python/pb_unit_tests/BUILD b/python/pb_unit_tests/BUILD index bcea96ecf1..ecb1d7b900 100644 --- a/python/pb_unit_tests/BUILD +++ b/python/pb_unit_tests/BUILD @@ -23,231 +23,40 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +load("//bazel:pyproto_test_wrapper.bzl", "pyproto_test_wrapper") + licenses(["notice"]) -py_test( - name = "descriptor_database_test", - srcs = ["descriptor_database_test_wrapper.py"], - legacy_create_init = False, - main = "descriptor_database_test_wrapper.py", - deps = [ - "//python:message_ext", - "@com_google_protobuf//:python_common_test_protos", - "@com_google_protobuf//:python_specific_test_protos", - "@com_google_protobuf//:python_srcs", - ], -) - -py_test( - name = "descriptor_pool_test", - srcs = ["descriptor_pool_test_wrapper.py"], - legacy_create_init = False, - main = "descriptor_pool_test_wrapper.py", - deps = [ - "//python:message_ext", - "@com_google_protobuf//:python_common_test_protos", - "@com_google_protobuf//:python_specific_test_protos", - "@com_google_protobuf//:python_srcs", - ], -) - -py_test( - name = "descriptor_test", - srcs = ["descriptor_test_wrapper.py"], - legacy_create_init = False, - main = "descriptor_test_wrapper.py", - deps = [ - "//python:message_ext", - "@com_google_protobuf//:python_common_test_protos", - "@com_google_protobuf//:python_specific_test_protos", - "@com_google_protobuf//:python_srcs", - ], -) - -py_test( - name = "generator_test", - srcs = ["generator_test_wrapper.py"], - legacy_create_init = False, - main = "generator_test_wrapper.py", - deps = [ - "//python:message_ext", - "@com_google_protobuf//:python_common_test_protos", - "@com_google_protobuf//:python_specific_test_protos", - "@com_google_protobuf//:python_srcs", - ], -) - -py_test( - name = "json_format_test", - srcs = ["json_format_test_wrapper.py"], - legacy_create_init = False, - main = "json_format_test_wrapper.py", - deps = [ - "//python:message_ext", - "@com_google_protobuf//:python_common_test_protos", - "@com_google_protobuf//:python_specific_test_protos", - "@com_google_protobuf//:python_srcs", - ], -) - -py_test( - name = "keywords_test", - srcs = ["keywords_test_wrapper.py"], - legacy_create_init = False, - main = "keywords_test_wrapper.py", - deps = [ - "//python:message_ext", - "@com_google_protobuf//:python_common_test_protos", - "@com_google_protobuf//:python_specific_test_protos", - "@com_google_protobuf//:python_srcs", - ], -) - -py_test( - name = "message_factory_test", - srcs = ["message_factory_test_wrapper.py"], - legacy_create_init = False, - main = "message_factory_test_wrapper.py", - deps = [ - "//python:message_ext", - "@com_google_protobuf//:python_common_test_protos", - "@com_google_protobuf//:python_specific_test_protos", - "@com_google_protobuf//:python_srcs", - ], -) - -py_test( - name = "message_test", - srcs = ["message_test_wrapper.py"], - data = [ - "@com_google_protobuf//:testdata", - ], - legacy_create_init = False, - main = "message_test_wrapper.py", - deps = [ - "//python:message_ext", - "@com_google_protobuf//:python_common_test_protos", - "@com_google_protobuf//:python_specific_test_protos", - "@com_google_protobuf//:python_srcs", - ], -) - -py_test( - name = "proto_builder_test", - srcs = ["proto_builder_test_wrapper.py"], - legacy_create_init = False, - main = "proto_builder_test_wrapper.py", - deps = [ - "//python:message_ext", - "@com_google_protobuf//:python_common_test_protos", - "@com_google_protobuf//:python_specific_test_protos", - "@com_google_protobuf//:python_srcs", - ], -) - -py_test( - name = "reflection_test", - srcs = ["reflection_test_wrapper.py"], - legacy_create_init = False, - main = "reflection_test_wrapper.py", - deps = [ - "//python:message_ext", - "@com_google_protobuf//:python_common_test_protos", - "@com_google_protobuf//:python_specific_test_protos", - "@com_google_protobuf//:python_srcs", - ], -) - -py_test( - name = "service_reflection_test", - srcs = ["service_reflection_test_wrapper.py"], - legacy_create_init = False, - main = "service_reflection_test_wrapper.py", - deps = [ - "//python:message_ext", - "@com_google_protobuf//:python_common_test_protos", - "@com_google_protobuf//:python_specific_test_protos", - "@com_google_protobuf//:python_srcs", - ], -) - -py_test( - name = "symbol_database_test", - srcs = ["symbol_database_test_wrapper.py"], - legacy_create_init = False, - main = "symbol_database_test_wrapper.py", - deps = [ - "//python:message_ext", - "@com_google_protobuf//:python_common_test_protos", - "@com_google_protobuf//:python_specific_test_protos", - "@com_google_protobuf//:python_srcs", - ], -) - -py_test( - name = "text_encoding_test", - srcs = ["text_encoding_test_wrapper.py"], - legacy_create_init = False, - main = "text_encoding_test_wrapper.py", - deps = [ - "//python:message_ext", - "@com_google_protobuf//:python_common_test_protos", - "@com_google_protobuf//:python_specific_test_protos", - "@com_google_protobuf//:python_srcs", - ], -) - -py_test( - name = "text_format_test", - srcs = ["text_format_test_wrapper.py"], - data = [ - "@com_google_protobuf//:testdata", - ], - legacy_create_init = False, - main = "text_format_test_wrapper.py", - deps = [ - "//python:message_ext", - "@com_google_protobuf//:python_common_test_protos", - "@com_google_protobuf//:python_specific_test_protos", - "@com_google_protobuf//:python_srcs", - ], -) - -py_test( - name = "unknown_fields_test", - srcs = ["unknown_fields_test_wrapper.py"], - legacy_create_init = False, - main = "unknown_fields_test_wrapper.py", - deps = [ - "//python:message_ext", - "@com_google_protobuf//:python_common_test_protos", - "@com_google_protobuf//:python_specific_test_protos", - "@com_google_protobuf//:python_srcs", - ], -) - -py_test( - name = "well_known_types_test", - srcs = ["well_known_types_test_wrapper.py"], - legacy_create_init = False, - main = "well_known_types_test_wrapper.py", - deps = [ - "//python:message_ext", - "@com_google_protobuf//:python_common_test_protos", - "@com_google_protobuf//:python_specific_test_protos", - "@com_google_protobuf//:python_srcs", - ], -) - -py_test( - name = "wire_format_test", - srcs = ["wire_format_test_wrapper.py"], - legacy_create_init = False, - main = "wire_format_test_wrapper.py", - deps = [ - "//python:message_ext", - "@com_google_protobuf//:python_common_test_protos", - "@com_google_protobuf//:python_specific_test_protos", - "@com_google_protobuf//:python_srcs", - ], -) +pyproto_test_wrapper(name = "descriptor_database_test") + +pyproto_test_wrapper(name = "descriptor_pool_test") + +pyproto_test_wrapper(name = "descriptor_test") + +pyproto_test_wrapper(name = "generator_test") # copybara:strip_for_google3 + +pyproto_test_wrapper(name = "json_format_test") + +pyproto_test_wrapper(name = "keywords_test") + +pyproto_test_wrapper(name = "message_factory_test") + +pyproto_test_wrapper(name = "message_test") + +pyproto_test_wrapper(name = "proto_builder_test") + +pyproto_test_wrapper(name = "reflection_test") + +pyproto_test_wrapper(name = "service_reflection_test") + +pyproto_test_wrapper(name = "symbol_database_test") + +pyproto_test_wrapper(name = "text_encoding_test") + +pyproto_test_wrapper(name = "text_format_test") + +pyproto_test_wrapper(name = "unknown_fields_test") + +pyproto_test_wrapper(name = "well_known_types_test") + +pyproto_test_wrapper(name = "wire_format_test") diff --git a/python/pb_unit_tests/descriptor_pool_test_wrapper.py b/python/pb_unit_tests/descriptor_pool_test_wrapper.py index a4dd686c71..800ff4826c 100644 --- a/python/pb_unit_tests/descriptor_pool_test_wrapper.py +++ b/python/pb_unit_tests/descriptor_pool_test_wrapper.py @@ -27,6 +27,10 @@ from google.protobuf.internal import descriptor_pool_test import unittest import copy +# copybara:insert_for_google3_begin +# from google3.testing.pybase import googletest +# copybara:insert_end + # This is testing that certain methods unconditionally throw TypeError. # In the new extension we simply don't define them at all. descriptor_pool_test.AddDescriptorTest.testAddTypeError.__unittest_expecting_failure__ = True @@ -34,4 +38,8 @@ descriptor_pool_test.AddDescriptorTest.testAddTypeError.__unittest_expecting_fai descriptor_pool_test.SecondaryDescriptorFromDescriptorDB.testErrorCollector.__unittest_expecting_failure__ = True if __name__ == '__main__': + # copybara:strip_for_google3_begin unittest.main(module=descriptor_pool_test, verbosity=2) + # copybara:replace_for_google3_begin + # googletest.main() + # copybara:replace_for_google3_end diff --git a/python/pb_unit_tests/text_format_test_wrapper.py b/python/pb_unit_tests/text_format_test_wrapper.py index 151196458a..1183a5b8be 100644 --- a/python/pb_unit_tests/text_format_test_wrapper.py +++ b/python/pb_unit_tests/text_format_test_wrapper.py @@ -25,14 +25,16 @@ from google.protobuf.internal import text_format_test import unittest -from google.protobuf.internal import _parameterized - -sep = _parameterized._SEPARATOR # These rely on the UnknownFields accessor, which we are trying to deprecate. text_format_test.OnlyWorksWithProto2RightNowTests.testPrintUnknownFields.__unittest_expecting_failure__ = True + +# copybara:strip_for_google3_begin +from google.protobuf.internal import _parameterized # copybara:strip_for_google3 +sep = _parameterized._SEPARATOR getattr(text_format_test.TextFormatMessageToStringTests, "testPrintUnknownFieldsEmbeddedMessageInBytes" + sep + "0").__unittest_expecting_failure__ = True getattr(text_format_test.TextFormatMessageToStringTests, "testPrintUnknownFieldsEmbeddedMessageInBytes" + sep + "1").__unittest_expecting_failure__ = True +# copybara:strip_end if __name__ == '__main__': unittest.main(module=text_format_test, verbosity=2) diff --git a/python/protobuf.c b/python/protobuf.c index 35600ae788..ae6b6d5c29 100644 --- a/python/protobuf.c +++ b/python/protobuf.c @@ -94,8 +94,8 @@ PyUpb_ModuleState* PyUpb_ModuleState_Get(void) { PyObject* PyUpb_GetWktBases(PyUpb_ModuleState* state) { if (!state->wkt_bases) { - PyObject* wkt_module = - PyImport_ImportModule("google.protobuf.internal.well_known_types"); + PyObject* wkt_module = PyImport_ImportModule(PYUPB_PROTOBUF_INTERNAL_PACKAGE + ".well_known_types"); if (wkt_module == NULL) { return false; diff --git a/python/protobuf.h b/python/protobuf.h index a45632b6d5..d09ab0e8db 100644 --- a/python/protobuf.h +++ b/python/protobuf.h @@ -34,7 +34,19 @@ #include "python/python.h" #include "upb/table_internal.h" +// copybara:strip_for_google3_begin +#define PYUPB_PROTOBUF_PUBLIC_PACKAGE "google.protobuf" +#define PYUPB_PROTOBUF_INTERNAL_PACKAGE "google.protobuf.internal" +#define PYUPB_DESCRIPTOR_PROTO_PACKAGE "google.protobuf" +#define PYUPB_DESCRIPTOR_MODULE "google.protobuf.descriptor_pb2" #define PYUPB_MODULE_NAME "google.protobuf.pyext._message" +// copybara:replace_for_google3_begin +// #define PYUPB_PROTOBUF_PUBLIC_PACKAGE "google3.net.proto2.python.public" +// #define PYUPB_PROTOBUF_INTERNAL_PACKAGE "google3.net.proto2.python.internal" +// #define PYUPB_DESCRIPTOR_PROTO_PACKAGE "proto2" +// #define PYUPB_DESCRIPTOR_MODULE "google3.net.proto2.proto.descriptor_pb2" +// #define PYUPB_MODULE_NAME "google3.third_party.upb.python._message" +// copybara:replace_for_google3_end #define PYUPB_RETURN_OOM return PyErr_SetNone(PyExc_MemoryError), NULL @@ -193,7 +205,7 @@ PyObject* PyUpb_Forbidden_New(PyObject* cls, PyObject* args, PyObject* kwds); static inline void PyUpb_Dealloc(void* self) { PyTypeObject* tp = Py_TYPE(self); assert(PyType_GetFlags(tp) & Py_TPFLAGS_HEAPTYPE); - freefunc tp_free = PyType_GetSlot(tp, Py_tp_free); + freefunc tp_free = (freefunc)PyType_GetSlot(tp, Py_tp_free); tp_free(self); Py_DECREF(tp); } diff --git a/rename.sed b/rename.sed index 815c62b497..6447dfdbb7 100644 --- a/rename.sed +++ b/rename.sed @@ -71,7 +71,7 @@ s/upb_msgdef_field/upb_MessageDef_Field/g; s/upb_msgdef_oneof/upb_MessageDef_Oneof/g; s/upb_msgdef_ntooz/upb_MessageDef_FindOneofByName/g; s/upb_msgdef_ntofz/upb_MessageDef_FindFieldByName/g; -s/upb_msgdef_itof/upb_MessageDef_FindFieldByNumberWithSize/g; +s/upb_msgdef_itof/upb_MessageDef_FindFieldByNumber/g; s/upb_msgdef_ntof/upb_MessageDef_FindFieldByNameWithSize/g; s/upb_msgdef_ntoo/upb_MessageDef_FindOneofByNameWithSize/g; s/upb_msgdef_layout/upb_MessageDef_MiniTable/g; @@ -89,6 +89,7 @@ s/upb_msgdef_lookupjsonnamez/upb_MessageDef_FindByNameName/g; s/upb_msgdef_lookupjsonname/upb_MessageDef_FindByJsonNameWithSize/g; s/upb_msgdef/upb_MessageDef/g; s/WithSizez//g; +s/upb_MessageDef_FindFieldByNumberWithSize/upb_MessageDef_FindFieldByNumber/g; s/upb_extrange_options/upb_ExtensionRange_Options/g; s/upb_extrange_hasoptions/upb_ExtensionRange_HasOptions/g; diff --git a/upb/bindings/lua/def.c b/upb/bindings/lua/def.c index 6716b26a72..132116719a 100644 --- a/upb/bindings/lua/def.c +++ b/upb/bindings/lua/def.c @@ -368,7 +368,7 @@ static int lupb_MessageDef_Field(lua_State* L) { switch (lua_type(L, 2)) { case LUA_TNUMBER: - f = upb_MessageDef_FindFieldByNumberWithSize(m, lua_tointeger(L, 2)); + f = upb_MessageDef_FindFieldByNumber(m, lua_tointeger(L, 2)); break; case LUA_TSTRING: f = upb_MessageDef_FindFieldByName(m, lua_tostring(L, 2)); diff --git a/upb/bindings/lua/msg.c b/upb/bindings/lua/msg.c index dd846570d9..a314f4dbee 100644 --- a/upb/bindings/lua/msg.c +++ b/upb/bindings/lua/msg.c @@ -733,10 +733,10 @@ static void lupb_Message_Newwrapper(lua_State* L, int narg, upb_MutableMessageValue val) { if (upb_FieldDef_IsMap(f)) { const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f); - const upb_FieldDef* key_f = upb_MessageDef_FindFieldByNumberWithSize( - entry, kUpb_MapEntry_KeyFieldNumber); - const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumberWithSize( - entry, kUpb_MapEntry_ValueFieldNumber); + const upb_FieldDef* key_f = + upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_KeyFieldNumber); + const upb_FieldDef* val_f = + upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_ValueFieldNumber); lupb_map* lmap = lupb_Message_Newud(L, narg, sizeof(*lmap), LUPB_MAP, val_f); lmap->key_type = upb_FieldDef_CType(key_f); @@ -849,10 +849,10 @@ static int lupb_Message_Newindex(lua_State* L) { if (upb_FieldDef_IsMap(f)) { lupb_map* lmap = lupb_map_check(L, 3); const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f); - const upb_FieldDef* key_f = upb_MessageDef_FindFieldByNumberWithSize( - entry, kUpb_MapEntry_KeyFieldNumber); - const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumberWithSize( - entry, kUpb_MapEntry_ValueFieldNumber); + const upb_FieldDef* key_f = + upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_KeyFieldNumber); + const upb_FieldDef* val_f = + upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_ValueFieldNumber); upb_CType key_type = upb_FieldDef_CType(key_f); upb_CType value_type = upb_FieldDef_CType(val_f); luaL_argcheck(L, lmap->key_type == key_type, 3, "key type mismatch"); diff --git a/upb/def.c b/upb/def.c index c0d28e663b..c83f72858c 100644 --- a/upb/def.c +++ b/upb/def.c @@ -86,6 +86,9 @@ struct upb_FieldDef { bool has_json_name_; upb_FieldType type_; upb_Label label_; +#if UINTPTR_MAX == 0xffffffff + uint32_t padding; // Increase size to a multiple of 8. +#endif }; struct upb_ExtensionRange { @@ -106,8 +109,8 @@ struct upb_MessageDef { upb_strtable ntof; /* All nested defs. - * MEM: We could save some space here by putting nested defs in a contigous - * region and calculating counts from offets or vice-versa. */ + * MEM: We could save some space here by putting nested defs in a contiguous + * region and calculating counts from offsets or vice-versa. */ const upb_FieldDef* fields; const upb_OneofDef* oneofs; const upb_ExtensionRange* ext_ranges; @@ -123,6 +126,9 @@ struct upb_MessageDef { int nested_ext_count; bool in_message_set; upb_WellKnown well_known_type; +#if UINTPTR_MAX == 0xffffffff + uint32_t padding; // Increase size to a multiple of 8. +#endif }; struct upb_EnumDef { @@ -136,6 +142,9 @@ struct upb_EnumDef { const upb_EnumValueDef* values; int value_count; int32_t defaultval; +#if UINTPTR_MAX == 0xffffffff + uint32_t padding; // Increase size to a multiple of 8. +#endif }; struct upb_EnumValueDef { @@ -154,6 +163,9 @@ struct upb_OneofDef { const upb_FieldDef** fields; upb_strtable ntof; upb_inttable itof; +#if UINTPTR_MAX == 0xffffffff + uint32_t padding; // Increase size to a multiple of 8. +#endif }; struct upb_FileDef { @@ -188,6 +200,7 @@ struct upb_MethodDef { const char* full_name; const upb_MessageDef* input_type; const upb_MessageDef* output_type; + int index; bool client_streaming; bool server_streaming; }; @@ -246,6 +259,20 @@ static const void* unpack_def(upb_value v, upb_deftype_t type) { } static upb_value pack_def(const void* ptr, upb_deftype_t type) { + // Our 3-bit pointer tagging requires all pointers to be multiples of 8. + // The arena will always yield 8-byte-aligned addresses, however we put + // the defs into arrays. For each element in the array to be 8-byte-aligned, + // the sizes of each def type must also be a multiple of 8. + // + // If any of these asserts fail, we need to add or remove padding on 32-bit + // machines (64-bit machines will have 8-byte alignment already due to + // pointers, which all of these structs have). + UPB_ASSERT((sizeof(upb_FieldDef) & UPB_DEFTYPE_MASK) == 0); + UPB_ASSERT((sizeof(upb_MessageDef) & UPB_DEFTYPE_MASK) == 0); + UPB_ASSERT((sizeof(upb_EnumDef) & UPB_DEFTYPE_MASK) == 0); + UPB_ASSERT((sizeof(upb_EnumValueDef) & UPB_DEFTYPE_MASK) == 0); + UPB_ASSERT((sizeof(upb_ServiceDef) & UPB_DEFTYPE_MASK) == 0); + UPB_ASSERT((sizeof(upb_OneofDef) & UPB_DEFTYPE_MASK) == 0); uintptr_t num = (uintptr_t)ptr; UPB_ASSERT((num & UPB_DEFTYPE_MASK) == 0); num |= type; @@ -691,8 +718,8 @@ upb_Syntax upb_MessageDef_Syntax(const upb_MessageDef* m) { return m->file->syntax; } -const upb_FieldDef* upb_MessageDef_FindFieldByNumberWithSize( - const upb_MessageDef* m, uint32_t i) { +const upb_FieldDef* upb_MessageDef_FindFieldByNumber(const upb_MessageDef* m, + uint32_t i) { upb_value val; return upb_inttable_lookup(&m->itof, i, &val) ? upb_value_getconstptr(val) : NULL; @@ -980,6 +1007,8 @@ const char* upb_MethodDef_FullName(const upb_MethodDef* m) { return m->full_name; } +int upb_MethodDef_Index(const upb_MethodDef* m) { return m->index; } + const char* upb_MethodDef_Name(const upb_MethodDef* m) { return shortdefname(m->full_name); } @@ -1365,8 +1394,8 @@ static void assign_layout_indices(const upb_MessageDef* m, upb_MiniTable* l, int n = upb_MessageDef_numfields(m); int dense_below = 0; for (i = 0; i < n; i++) { - upb_FieldDef* f = (upb_FieldDef*)upb_MessageDef_FindFieldByNumberWithSize( - m, fields[i].number); + upb_FieldDef* f = + (upb_FieldDef*)upb_MessageDef_FindFieldByNumber(m, fields[i].number); UPB_ASSERT(f); f->layout_index = i; if (i < UINT8_MAX && fields[i].number == i + 1 && @@ -1487,8 +1516,8 @@ static void make_layout(symtab_addctx* ctx, const upb_MessageDef* m) { if (upb_MessageDef_IsMapEntry(m)) { /* TODO(haberman): refactor this method so this special case is more * elegant. */ - const upb_FieldDef* key = upb_MessageDef_FindFieldByNumberWithSize(m, 1); - const upb_FieldDef* val = upb_MessageDef_FindFieldByNumberWithSize(m, 2); + const upb_FieldDef* key = upb_MessageDef_FindFieldByNumber(m, 1); + const upb_FieldDef* val = upb_MessageDef_FindFieldByNumber(m, 2); fields[0].number = 1; fields[1].number = 2; fields[0].mode = kUpb_FieldMode_Scalar; @@ -1555,7 +1584,7 @@ static void make_layout(symtab_addctx* ctx, const upb_MessageDef* m) { field->submsg_index = sublayout_count++; subs[field->submsg_index].submsg = upb_FieldDef_MessageSubDef(f)->layout; } else if (upb_FieldDef_CType(f) == kUpb_CType_Enum && - f->file->syntax == kUpb_Syntax_Proto2) { + f->sub.enumdef->file->syntax == kUpb_Syntax_Proto2) { field->submsg_index = sublayout_count++; subs[field->submsg_index].subenum = upb_FieldDef_EnumSubDef(f)->layout; UPB_ASSERT(subs[field->submsg_index].subenum); @@ -1601,6 +1630,10 @@ static void make_layout(symtab_addctx* ctx, const upb_MessageDef* m) { if (upb_OneofDef_IsSynthetic(o)) continue; + if (o->field_count == 0) { + symtab_errf(ctx, "Oneof must have at least one field (%s)", o->full_name); + } + /* Calculate field size: the max of all field sizes. */ for (int j = 0; j < o->field_count; j++) { const upb_FieldDef* f = o->fields[j]; @@ -1623,7 +1656,10 @@ static void make_layout(symtab_addctx* ctx, const upb_MessageDef* m) { l->size = UPB_ALIGN_UP(l->size, 8); /* Sort fields by number. */ - qsort(fields, upb_MessageDef_numfields(m), sizeof(*fields), field_number_cmp); + if (fields) { + qsort(fields, upb_MessageDef_numfields(m), sizeof(*fields), + field_number_cmp); + } assign_layout_indices(m, l, fields); } @@ -1784,8 +1820,8 @@ static const void* symtab_resolveany(symtab_addctx* ctx, } } else { /* Remove components from base until we find an entry or run out. */ - size_t baselen = strlen(base); - char* tmp = malloc(sym.size + strlen(base) + 1); + size_t baselen = base ? strlen(base) : 0; + char* tmp = malloc(sym.size + baselen + 1); while (1) { char* p = tmp; if (baselen) { @@ -1821,10 +1857,10 @@ static const void* symtab_resolve(symtab_addctx* ctx, const char* from_name_dbg, const void* ret = symtab_resolveany(ctx, from_name_dbg, base, sym, &found_type); if (ret && found_type != type) { - symtab_errf( - ctx, - "type mismatch when resolving %s: couldn't find name %s with type=%d", - from_name_dbg, sym.data, (int)type); + symtab_errf(ctx, + "type mismatch when resolving %s: couldn't find " + "name " UPB_STRINGVIEW_FORMAT " with type=%d", + from_name_dbg, UPB_STRINGVIEW_ARGS(sym), (int)type); } return ret; } @@ -1844,6 +1880,11 @@ static void create_oneofdef( SET_OPTIONS(o->opts, OneofDescriptorProto, OneofOptions, oneof_proto); + upb_value existing_v; + if (upb_strtable_lookup2(&m->ntof, name.data, name.size, &existing_v)) { + symtab_errf(ctx, "duplicate oneof name (%s)", o->full_name); + } + v = pack_def(o, UPB_DEFTYPE_ONEOF); CHK_OOM(upb_strtable_insert(&m->ntof, name.data, name.size, v, ctx->arena)); @@ -2153,7 +2194,7 @@ static void create_fielddef( f->file = ctx->file; /* Must happen prior to symtab_add(). */ if (!google_protobuf_FieldDescriptorProto_has_name(field_proto)) { - symtab_errf(ctx, "field has no name (%s)", upb_MessageDef_FullName(m)); + symtab_errf(ctx, "field has no name"); } name = google_protobuf_FieldDescriptorProto_name(field_proto); @@ -2370,6 +2411,7 @@ static void create_service( m->service = s; m->full_name = makefullname(ctx, s->full_name, name); + m->index = i; m->client_streaming = google_protobuf_MethodDescriptorProto_client_streaming(method_proto); m->server_streaming = @@ -2397,6 +2439,12 @@ static int count_bits_debug(uint64_t x) { return n; } +static int compare_int32(const void* a_ptr, const void* b_ptr) { + int32_t a = *(int32_t*)a_ptr; + int32_t b = *(int32_t*)b_ptr; + return a < b ? -1 : (a == b ? 0 : 1); +} + upb_MiniTable_Enum* create_enumlayout(symtab_addctx* ctx, const upb_EnumDef* e) { int n = 0; @@ -2405,7 +2453,7 @@ upb_MiniTable_Enum* create_enumlayout(symtab_addctx* ctx, for (int i = 0; i < e->value_count; i++) { uint32_t val = (uint32_t)e->values[i].number; if (val < 64) { - mask |= 1 << val; + mask |= 1ULL << val; } else { n++; } @@ -2427,6 +2475,17 @@ upb_MiniTable_Enum* create_enumlayout(symtab_addctx* ctx, UPB_ASSERT(p == values + n); } + // Enums can have duplicate values; we must sort+uniq them. + if (values) qsort(values, n, sizeof(*values), &compare_int32); + + int dst = 0; + for (int i = 0; i < n; dst++) { + int32_t val = values[i]; + while (i < n && values[i] == val) i++; // Skip duplicates. + values[dst] = val; + } + n = dst; + UPB_ASSERT(upb_inttable_count(&e->iton) == n + count_bits_debug(mask)); upb_MiniTable_Enum* layout = symtab_alloc(ctx, sizeof(*layout)); @@ -2510,7 +2569,7 @@ static void create_enumdef( if (ctx->layout) { UPB_ASSERT(ctx->enum_count < ctx->layout->enum_count); e->layout = ctx->layout->enums[ctx->enum_count++]; - UPB_ASSERT(n == + UPB_ASSERT(upb_inttable_count(&e->iton) == e->layout->value_count + count_bits_debug(e->layout->mask)); } else { e->layout = create_enumlayout(ctx, e); @@ -2781,15 +2840,10 @@ static void resolve_msgdef(symtab_addctx* ctx, upb_MessageDef* m) { resolve_fielddef(ctx, m->full_name, (upb_FieldDef*)&m->fields[i]); } - for (int i = 0; i < m->nested_ext_count; i++) { - resolve_fielddef(ctx, m->full_name, (upb_FieldDef*)&m->nested_exts[i]); - } - - if (!ctx->layout) make_layout(ctx, m); - m->in_message_set = false; - if (m->nested_ext_count == 1) { - const upb_FieldDef* ext = &m->nested_exts[0]; + for (int i = 0; i < m->nested_ext_count; i++) { + upb_FieldDef* ext = (upb_FieldDef*)&m->nested_exts[i]; + resolve_fielddef(ctx, m->full_name, ext); if (ext->type_ == kUpb_FieldType_Message && ext->label_ == kUpb_Label_Optional && ext->sub.msgdef == m && google_protobuf_MessageOptions_message_set_wire_format( @@ -2798,6 +2852,8 @@ static void resolve_msgdef(symtab_addctx* ctx, upb_MessageDef* m) { } } + if (!ctx->layout) make_layout(ctx, m); + for (int i = 0; i < m->nested_msg_count; i++) { resolve_msgdef(ctx, (upb_MessageDef*)&m->nested_msgs[i]); } @@ -2929,7 +2985,7 @@ static void build_filedef( int32_t* mutable_weak_deps = (int32_t*)file->weak_deps; for (i = 0; i < n; i++) { if (weak_deps[i] >= file->dep_count) { - symtab_errf(ctx, "public_dep %d is out of range", (int)public_deps[i]); + symtab_errf(ctx, "weak_dep %d is out of range", (int)weak_deps[i]); } mutable_weak_deps[i] = weak_deps[i]; } @@ -3085,7 +3141,8 @@ const upb_FileDef* upb_DefPool_AddFile( /* Include here since we want most of this file to be stdio-free. */ #include -bool _upb_DefPool_LoadDefInit(upb_DefPool* s, const _upb_DefPool_Init* init) { +bool _upb_DefPool_LoadDefInitEx(upb_DefPool* s, const _upb_DefPool_Init* init, + bool rebuild_minitable) { /* Since this function should never fail (it would indicate a bug in upb) we * print errors to stderr instead of returning error status to the user. */ _upb_DefPool_Init** deps = init->deps; @@ -3102,7 +3159,7 @@ bool _upb_DefPool_LoadDefInit(upb_DefPool* s, const _upb_DefPool_Init* init) { arena = upb_Arena_New(); for (; *deps; deps++) { - if (!_upb_DefPool_LoadDefInit(s, *deps)) goto err; + if (!_upb_DefPool_LoadDefInitEx(s, *deps, rebuild_minitable)) goto err; } file = google_protobuf_FileDescriptorProto_parse_ex( @@ -3119,7 +3176,8 @@ bool _upb_DefPool_LoadDefInit(upb_DefPool* s, const _upb_DefPool_Init* init) { goto err; } - if (!_upb_DefPool_AddFile(s, file, init->layout, &status)) { + const upb_MiniTable_File* mt = rebuild_minitable ? NULL : init->layout; + if (!_upb_DefPool_AddFile(s, file, mt, &status)) { goto err; } diff --git a/upb/def.h b/upb/def.h index 895aa1a90e..1c7adb4b15 100644 --- a/upb/def.h +++ b/upb/def.h @@ -188,8 +188,8 @@ const upb_ExtensionRange* upb_MessageDef_ExtensionRange(const upb_MessageDef* m, int i); const upb_FieldDef* upb_MessageDef_Field(const upb_MessageDef* m, int i); const upb_OneofDef* upb_MessageDef_Oneof(const upb_MessageDef* m, int i); -const upb_FieldDef* upb_MessageDef_FindFieldByNumberWithSize( - const upb_MessageDef* m, uint32_t i); +const upb_FieldDef* upb_MessageDef_FindFieldByNumber(const upb_MessageDef* m, + uint32_t i); const upb_FieldDef* upb_MessageDef_FindFieldByNameWithSize( const upb_MessageDef* m, const char* name, size_t len); const upb_OneofDef* upb_MessageDef_FindOneofByNameWithSize( @@ -317,6 +317,7 @@ const google_protobuf_MethodOptions* upb_MethodDef_Options( const upb_MethodDef* m); bool upb_MethodDef_HasOptions(const upb_MethodDef* m); const char* upb_MethodDef_FullName(const upb_MethodDef* m); +int upb_MethodDef_Index(const upb_MethodDef* m); const char* upb_MethodDef_Name(const upb_MethodDef* m); const upb_ServiceDef* upb_MethodDef_Service(const upb_MethodDef* m); const upb_MessageDef* upb_MethodDef_InputType(const upb_MethodDef* m); @@ -389,7 +390,15 @@ typedef struct _upb_DefPool_Init { upb_StringView descriptor; /* Serialized descriptor. */ } _upb_DefPool_Init; -bool _upb_DefPool_LoadDefInit(upb_DefPool* s, const _upb_DefPool_Init* init); +// Should only be directly called by tests. This variant lets us suppress +// the use of compiled-in tables, forcing a rebuild of the tables at runtime. +bool _upb_DefPool_LoadDefInitEx(upb_DefPool* s, const _upb_DefPool_Init* init, + bool rebuild_minitable); + +UPB_INLINE bool _upb_DefPool_LoadDefInit(upb_DefPool* s, + const _upb_DefPool_Init* init) { + return _upb_DefPool_LoadDefInitEx(s, init, false); +} #include "upb/port_undef.inc" diff --git a/upb/def.hpp b/upb/def.hpp index 0fd093bbcd..aaf9496226 100644 --- a/upb/def.hpp +++ b/upb/def.hpp @@ -194,7 +194,7 @@ class MessageDefPtr { // These return null pointers if the field is not found. FieldDefPtr FindFieldByNumber(uint32_t number) const { - return FieldDefPtr(upb_MessageDef_FindFieldByNumberWithSize(ptr_, number)); + return FieldDefPtr(upb_MessageDef_FindFieldByNumber(ptr_, number)); } FieldDefPtr FindFieldByName(const char* name, size_t len) const { return FieldDefPtr(upb_MessageDef_FindFieldByNameWithSize(ptr_, name, len)); diff --git a/upb/fuzz/BUILD b/upb/fuzz/BUILD index 4719531fb2..fa0767e3df 100644 --- a/upb/fuzz/BUILD +++ b/upb/fuzz/BUILD @@ -7,6 +7,7 @@ cc_fuzz_test( srcs = ["file_descriptor_parsenew_fuzzer.cc"], deps = [ "//:descriptor_upb_proto", + "//:reflection", "//:upb", ], ) diff --git a/upb/fuzz/file_descriptor_parsenew_fuzzer.cc b/upb/fuzz/file_descriptor_parsenew_fuzzer.cc index 91983bec0f..3d9c4c6f05 100644 --- a/upb/fuzz/file_descriptor_parsenew_fuzzer.cc +++ b/upb/fuzz/file_descriptor_parsenew_fuzzer.cc @@ -26,11 +26,18 @@ #include #include "google/protobuf/descriptor.upb.h" +#include "upb/def.hpp" #include "upb/upb.hpp" extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { upb::Arena arena; - google_protobuf_FileDescriptorProto_parse(reinterpret_cast(data), - size, arena.ptr()); + google_protobuf_FileDescriptorProto* proto = + google_protobuf_FileDescriptorProto_parse( + reinterpret_cast(data), size, arena.ptr()); + if (proto) { + upb::SymbolTable symtab; + upb::Status status; + symtab.AddFile(proto, &status); + } return 0; } diff --git a/upb/json_decode.c b/upb/json_decode.c index 566e9d4a1b..c4698f03bf 100644 --- a/upb/json_decode.c +++ b/upb/json_decode.c @@ -894,10 +894,8 @@ static void jsondec_array(jsondec* d, upb_Message* msg, const upb_FieldDef* f) { static void jsondec_map(jsondec* d, upb_Message* msg, const upb_FieldDef* f) { upb_Map* map = upb_Message_Mutable(msg, f, d->arena).map; const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f); - const upb_FieldDef* key_f = - upb_MessageDef_FindFieldByNumberWithSize(entry, 1); - const upb_FieldDef* val_f = - upb_MessageDef_FindFieldByNumberWithSize(entry, 2); + const upb_FieldDef* key_f = upb_MessageDef_FindFieldByNumber(entry, 1); + const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(entry, 2); jsondec_objstart(d); while (jsondec_objnext(d)) { @@ -1139,10 +1137,9 @@ static void jsondec_timestamp(jsondec* d, upb_Message* msg, jsondec_err(d, "Timestamp out of range"); } - upb_Message_Set(msg, upb_MessageDef_FindFieldByNumberWithSize(m, 1), seconds, - d->arena); - upb_Message_Set(msg, upb_MessageDef_FindFieldByNumberWithSize(m, 2), nanos, + upb_Message_Set(msg, upb_MessageDef_FindFieldByNumber(m, 1), seconds, d->arena); + upb_Message_Set(msg, upb_MessageDef_FindFieldByNumber(m, 2), nanos, d->arena); return; malformed: @@ -1174,15 +1171,14 @@ static void jsondec_duration(jsondec* d, upb_Message* msg, nanos.int32_val = -nanos.int32_val; } - upb_Message_Set(msg, upb_MessageDef_FindFieldByNumberWithSize(m, 1), seconds, - d->arena); - upb_Message_Set(msg, upb_MessageDef_FindFieldByNumberWithSize(m, 2), nanos, + upb_Message_Set(msg, upb_MessageDef_FindFieldByNumber(m, 1), seconds, d->arena); + upb_Message_Set(msg, upb_MessageDef_FindFieldByNumber(m, 2), nanos, d->arena); } static void jsondec_listvalue(jsondec* d, upb_Message* msg, const upb_MessageDef* m) { - const upb_FieldDef* values_f = upb_MessageDef_FindFieldByNumberWithSize(m, 1); + const upb_FieldDef* values_f = upb_MessageDef_FindFieldByNumber(m, 1); const upb_MessageDef* value_m = upb_FieldDef_MessageSubDef(values_f); upb_Array* values = upb_Message_Mutable(msg, values_f, d->arena).array; @@ -1199,10 +1195,9 @@ static void jsondec_listvalue(jsondec* d, upb_Message* msg, static void jsondec_struct(jsondec* d, upb_Message* msg, const upb_MessageDef* m) { - const upb_FieldDef* fields_f = upb_MessageDef_FindFieldByNumberWithSize(m, 1); + const upb_FieldDef* fields_f = upb_MessageDef_FindFieldByNumber(m, 1); const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(fields_f); - const upb_FieldDef* value_f = - upb_MessageDef_FindFieldByNumberWithSize(entry_m, 2); + const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(entry_m, 2); const upb_MessageDef* value_m = upb_FieldDef_MessageSubDef(value_f); upb_Map* fields = upb_Message_Mutable(msg, fields_f, d->arena).map; @@ -1228,42 +1223,42 @@ static void jsondec_wellknownvalue(jsondec* d, upb_Message* msg, switch (jsondec_peek(d)) { case JD_NUMBER: /* double number_value = 2; */ - f = upb_MessageDef_FindFieldByNumberWithSize(m, 2); + f = upb_MessageDef_FindFieldByNumber(m, 2); val.double_val = jsondec_number(d); break; case JD_STRING: /* string string_value = 3; */ - f = upb_MessageDef_FindFieldByNumberWithSize(m, 3); + f = upb_MessageDef_FindFieldByNumber(m, 3); val.str_val = jsondec_string(d); break; case JD_FALSE: /* bool bool_value = 4; */ - f = upb_MessageDef_FindFieldByNumberWithSize(m, 4); + f = upb_MessageDef_FindFieldByNumber(m, 4); val.bool_val = false; jsondec_false(d); break; case JD_TRUE: /* bool bool_value = 4; */ - f = upb_MessageDef_FindFieldByNumberWithSize(m, 4); + f = upb_MessageDef_FindFieldByNumber(m, 4); val.bool_val = true; jsondec_true(d); break; case JD_NULL: /* NullValue null_value = 1; */ - f = upb_MessageDef_FindFieldByNumberWithSize(m, 1); + f = upb_MessageDef_FindFieldByNumber(m, 1); val.int32_val = 0; jsondec_null(d); break; /* Note: these cases return, because upb_Message_Mutable() is enough. */ case JD_OBJECT: /* Struct struct_value = 5; */ - f = upb_MessageDef_FindFieldByNumberWithSize(m, 5); + f = upb_MessageDef_FindFieldByNumber(m, 5); submsg = upb_Message_Mutable(msg, f, d->arena).msg; jsondec_struct(d, submsg, upb_FieldDef_MessageSubDef(f)); return; case JD_ARRAY: /* ListValue list_value = 6; */ - f = upb_MessageDef_FindFieldByNumberWithSize(m, 6); + f = upb_MessageDef_FindFieldByNumber(m, 6); submsg = upb_Message_Mutable(msg, f, d->arena).msg; jsondec_listvalue(d, submsg, upb_FieldDef_MessageSubDef(f)); return; @@ -1310,7 +1305,7 @@ static upb_StringView jsondec_mask(jsondec* d, const char* buf, static void jsondec_fieldmask(jsondec* d, upb_Message* msg, const upb_MessageDef* m) { /* repeated string paths = 1; */ - const upb_FieldDef* paths_f = upb_MessageDef_FindFieldByNumberWithSize(m, 1); + const upb_FieldDef* paths_f = upb_MessageDef_FindFieldByNumber(m, 1); upb_Array* arr = upb_Message_Mutable(msg, paths_f, d->arena).array; upb_StringView str = jsondec_string(d); const char* ptr = str.data; @@ -1350,8 +1345,7 @@ static void jsondec_anyfield(jsondec* d, upb_Message* msg, static const upb_MessageDef* jsondec_typeurl(jsondec* d, upb_Message* msg, const upb_MessageDef* m) { - const upb_FieldDef* type_url_f = - upb_MessageDef_FindFieldByNumberWithSize(m, 1); + const upb_FieldDef* type_url_f = upb_MessageDef_FindFieldByNumber(m, 1); const upb_MessageDef* type_m; upb_StringView type_url = jsondec_string(d); const char* end = type_url.data + type_url.size; @@ -1382,7 +1376,7 @@ static const upb_MessageDef* jsondec_typeurl(jsondec* d, upb_Message* msg, static void jsondec_any(jsondec* d, upb_Message* msg, const upb_MessageDef* m) { /* string type_url = 1; * bytes value = 2; */ - const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumberWithSize(m, 2); + const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(m, 2); upb_Message* any_msg; const upb_MessageDef* any_m = NULL; const char* pre_type_data = NULL; @@ -1444,7 +1438,7 @@ static void jsondec_any(jsondec* d, upb_Message* msg, const upb_MessageDef* m) { static void jsondec_wrapper(jsondec* d, upb_Message* msg, const upb_MessageDef* m) { - const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumberWithSize(m, 1); + const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(m, 1); upb_MessageValue val = jsondec_value(d, value_f); upb_Message_Set(msg, value_f, val, d->arena); } diff --git a/upb/json_encode.c b/upb/json_encode.c index e0b35f1320..9668a426d2 100644 --- a/upb/json_encode.c +++ b/upb/json_encode.c @@ -141,9 +141,8 @@ static void jsonenc_nanos(jsonenc* e, int32_t nanos) { static void jsonenc_timestamp(jsonenc* e, const upb_Message* msg, const upb_MessageDef* m) { - const upb_FieldDef* seconds_f = - upb_MessageDef_FindFieldByNumberWithSize(m, 1); - const upb_FieldDef* nanos_f = upb_MessageDef_FindFieldByNumberWithSize(m, 2); + const upb_FieldDef* seconds_f = upb_MessageDef_FindFieldByNumber(m, 1); + const upb_FieldDef* nanos_f = upb_MessageDef_FindFieldByNumber(m, 2); int64_t seconds = upb_Message_Get(msg, seconds_f).int64_val; int32_t nanos = upb_Message_Get(msg, nanos_f).int32_val; int L, N, I, J, K, hour, min, sec; @@ -185,9 +184,8 @@ static void jsonenc_timestamp(jsonenc* e, const upb_Message* msg, static void jsonenc_duration(jsonenc* e, const upb_Message* msg, const upb_MessageDef* m) { - const upb_FieldDef* seconds_f = - upb_MessageDef_FindFieldByNumberWithSize(m, 1); - const upb_FieldDef* nanos_f = upb_MessageDef_FindFieldByNumberWithSize(m, 2); + const upb_FieldDef* seconds_f = upb_MessageDef_FindFieldByNumber(m, 1); + const upb_FieldDef* nanos_f = upb_MessageDef_FindFieldByNumber(m, 2); int64_t seconds = upb_Message_Get(msg, seconds_f).int64_val; int32_t nanos = upb_Message_Get(msg, nanos_f).int32_val; @@ -336,7 +334,7 @@ static void upb_JsonEncode_Float(jsonenc* e, float val) { static void jsonenc_wrapper(jsonenc* e, const upb_Message* msg, const upb_MessageDef* m) { - const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumberWithSize(m, 1); + const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(m, 1); upb_MessageValue val = upb_Message_Get(msg, val_f); jsonenc_scalar(e, val, val_f); } @@ -380,9 +378,8 @@ badurl: static void jsonenc_any(jsonenc* e, const upb_Message* msg, const upb_MessageDef* m) { - const upb_FieldDef* type_url_f = - upb_MessageDef_FindFieldByNumberWithSize(m, 1); - const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumberWithSize(m, 2); + const upb_FieldDef* type_url_f = upb_MessageDef_FindFieldByNumber(m, 1); + const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(m, 2); upb_StringView type_url = upb_Message_Get(msg, type_url_f).str_val; upb_StringView value = upb_Message_Get(msg, value_f).str_val; const upb_MessageDef* any_m = jsonenc_getanymsg(e, type_url); @@ -441,7 +438,7 @@ static void jsonenc_fieldpath(jsonenc* e, upb_StringView path) { static void jsonenc_fieldmask(jsonenc* e, const upb_Message* msg, const upb_MessageDef* m) { - const upb_FieldDef* paths_f = upb_MessageDef_FindFieldByNumberWithSize(m, 1); + const upb_FieldDef* paths_f = upb_MessageDef_FindFieldByNumber(m, 1); const upb_Array* paths = upb_Message_Get(msg, paths_f).array_val; bool first = true; size_t i, n = 0; @@ -460,11 +457,10 @@ static void jsonenc_fieldmask(jsonenc* e, const upb_Message* msg, static void jsonenc_struct(jsonenc* e, const upb_Message* msg, const upb_MessageDef* m) { - const upb_FieldDef* fields_f = upb_MessageDef_FindFieldByNumberWithSize(m, 1); + const upb_FieldDef* fields_f = upb_MessageDef_FindFieldByNumber(m, 1); const upb_Map* fields = upb_Message_Get(msg, fields_f).map_val; const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(fields_f); - const upb_FieldDef* value_f = - upb_MessageDef_FindFieldByNumberWithSize(entry_m, 2); + const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(entry_m, 2); size_t iter = kUpb_Map_Begin; bool first = true; @@ -487,7 +483,7 @@ static void jsonenc_struct(jsonenc* e, const upb_Message* msg, static void jsonenc_listvalue(jsonenc* e, const upb_Message* msg, const upb_MessageDef* m) { - const upb_FieldDef* values_f = upb_MessageDef_FindFieldByNumberWithSize(m, 1); + const upb_FieldDef* values_f = upb_MessageDef_FindFieldByNumber(m, 1); const upb_MessageDef* values_m = upb_FieldDef_MessageSubDef(values_f); const upb_Array* values = upb_Message_Get(msg, values_f).array_val; size_t i; @@ -669,10 +665,8 @@ static void jsonenc_array(jsonenc* e, const upb_Array* arr, static void jsonenc_map(jsonenc* e, const upb_Map* map, const upb_FieldDef* f) { const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f); - const upb_FieldDef* key_f = - upb_MessageDef_FindFieldByNumberWithSize(entry, 1); - const upb_FieldDef* val_f = - upb_MessageDef_FindFieldByNumberWithSize(entry, 2); + const upb_FieldDef* key_f = upb_MessageDef_FindFieldByNumber(entry, 1); + const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(entry, 2); size_t iter = kUpb_Map_Begin; bool first = true; diff --git a/upb/reflection.c b/upb/reflection.c index dca1ec25a6..a3a64d2780 100644 --- a/upb/reflection.c +++ b/upb/reflection.c @@ -180,10 +180,10 @@ make: if (!a) return (upb_MutableMessageValue){.array = NULL}; if (upb_FieldDef_IsMap(f)) { const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f); - const upb_FieldDef* key = upb_MessageDef_FindFieldByNumberWithSize( - entry, kUpb_MapEntry_KeyFieldNumber); - const upb_FieldDef* value = upb_MessageDef_FindFieldByNumberWithSize( - entry, kUpb_MapEntry_ValueFieldNumber); + const upb_FieldDef* key = + upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_KeyFieldNumber); + const upb_FieldDef* value = + upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_ValueFieldNumber); ret.map = upb_Map_New(a, upb_FieldDef_CType(key), upb_FieldDef_CType(value)); } else if (upb_FieldDef_IsRepeated(f)) { @@ -313,8 +313,7 @@ bool _upb_Message_DiscardUnknown(upb_Message* msg, const upb_MessageDef* m, const upb_MessageDef* subm = upb_FieldDef_MessageSubDef(f); if (!subm) continue; if (upb_FieldDef_IsMap(f)) { - const upb_FieldDef* val_f = - upb_MessageDef_FindFieldByNumberWithSize(subm, 2); + const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(subm, 2); const upb_MessageDef* val_m = upb_FieldDef_MessageSubDef(val_f); upb_Map* map = (upb_Map*)val.map_val; size_t iter = kUpb_Map_Begin; diff --git a/upb/util/BUILD b/upb/util/BUILD index d81cb43cb8..e39044a111 100644 --- a/upb/util/BUILD +++ b/upb/util/BUILD @@ -13,7 +13,10 @@ cc_library( srcs = ["def_to_proto.c"], hdrs = ["def_to_proto.h"], visibility = ["//visibility:public"], - deps = ["//:reflection"], + deps = [ + "//:port", + "//:reflection", + ], ) proto_library( @@ -22,7 +25,7 @@ proto_library( "def_to_proto_public_import_test.proto", "def_to_proto_regular_import_test.proto", "def_to_proto_test.proto", - "def_to_proto_weak_import_test.proto", + "def_to_proto_weak_import_test.proto", # copybara:strip_for_google3 ], ) @@ -44,6 +47,8 @@ cc_test( ":def_to_proto_test_upb_proto", ":def_to_proto_test_upb_proto_reflection", "//:descriptor_upb_proto_reflection", + "//:reflection", + "//:upb", "@com_google_absl//absl/strings", "@com_google_googletest//:gtest_main", "@com_google_protobuf//:protobuf", @@ -57,7 +62,10 @@ cc_library( srcs = ["required_fields.c"], hdrs = ["required_fields.h"], visibility = ["//visibility:public"], - deps = ["//:reflection"], + deps = [ + "//:port", + "//:reflection", + ], ) proto_library( @@ -83,6 +91,8 @@ cc_test( ":required_fields_test_upb_proto", ":required_fields_test_upb_proto_reflection", "//:json", + "//:reflection", + "//:upb", "@com_google_absl//absl/strings", "@com_google_googletest//:gtest_main", ], @@ -95,7 +105,10 @@ cc_library( srcs = ["compare.c"], hdrs = ["compare.h"], visibility = ["//visibility:public"], - deps = ["//:reflection"], + deps = [ + "//:port", + "//:reflection", + ], ) cc_test( diff --git a/upb/util/def_to_proto_public_import_test.proto b/upb/util/def_to_proto_public_import_test.proto index 10f1c11733..bb4d6c9caa 100644 --- a/upb/util/def_to_proto_public_import_test.proto +++ b/upb/util/def_to_proto_public_import_test.proto @@ -26,3 +26,7 @@ */ syntax = "proto3"; + +package pkg; + +message PublicImportMessage {} diff --git a/upb/util/def_to_proto_regular_import_test.proto b/upb/util/def_to_proto_regular_import_test.proto index 10f1c11733..9eb6e564f1 100644 --- a/upb/util/def_to_proto_regular_import_test.proto +++ b/upb/util/def_to_proto_regular_import_test.proto @@ -26,3 +26,11 @@ */ syntax = "proto3"; + +package pkg; + +message RegularImportMessage {} + +enum Proto3Enum { + PROTO3_ENUM_ZERO = 0; +} diff --git a/upb/util/def_to_proto_test.cc b/upb/util/def_to_proto_test.cc index 22c99e5e7b..decc51f88a 100644 --- a/upb/util/def_to_proto_test.cc +++ b/upb/util/def_to_proto_test.cc @@ -125,3 +125,19 @@ TEST(DefToProto, Test) { upb::FileDefPtr file = msgdef.file(); CheckFile(file, file_desc); } + +// Like the previous test, but uses a message layout built at runtime. +TEST(DefToProto, TestRuntimeReflection) { + upb::Arena arena; + upb::SymbolTable symtab; + upb_StringView test_file_desc = + upb_util_def_to_proto_test_proto_upbdefinit.descriptor; + const auto* file_desc = google_protobuf_FileDescriptorProto_parse( + test_file_desc.data, test_file_desc.size, arena.ptr()); + + _upb_DefPool_LoadDefInitEx( + symtab.ptr(), &upb_util_def_to_proto_test_proto_upbdefinit, true); + upb::FileDefPtr file = symtab.FindFileByName( + upb_util_def_to_proto_test_proto_upbdefinit.filename); + CheckFile(file, file_desc); +} diff --git a/upb/util/def_to_proto_test.proto b/upb/util/def_to_proto_test.proto index 3b531fda04..56ebbd9522 100644 --- a/upb/util/def_to_proto_test.proto +++ b/upb/util/def_to_proto_test.proto @@ -29,7 +29,7 @@ syntax = "proto2"; import "upb/util/def_to_proto_regular_import_test.proto"; import public "upb/util/def_to_proto_public_import_test.proto"; -import weak "upb/util/def_to_proto_weak_import_test.proto"; +import weak "upb/util/def_to_proto_weak_import_test.proto"; // copybara:strip_for_google3 package pkg; @@ -48,6 +48,9 @@ message Message { string oneof_bool = 3 [default = "true"]; bytes oneof_bytes = 4 [default = "abc\xef\xfe"]; } + optional pkg.RegularImportMessage regular_import_message = 6; + optional pkg.PublicImportMessage public_import_message = 7; + optional pkg.Proto3Enum proto3_enum = 8; extensions 1000 to max; extend Message { optional int32 ext = 1000; @@ -67,6 +70,26 @@ enum Enum { NEGATIVE_ONE = -1; } +enum EnumUpper32Value { + UPPER32_VALUE = 40; +} + +enum HasDuplicateValues { + option allow_alias = true; + A = 0; + B = 1; + C = 120; + D = 130; + + G = 120; + F = 1; + E = 0; + H = 121; + I = 121; + J = 121; + K = 121; +} + service Service { rpc Bar(Message) returns (Message); } @@ -75,6 +98,10 @@ extend Message { optional int32 ext = 1001; } +enum Has31 { + VALUE_31 = 31; +} + message PretendMessageSet { option message_set_wire_format = true; // Since this is message_set_wire_format, "max" here means INT32_MAX. diff --git a/upbc/protoc-gen-upb.cc b/upbc/protoc-gen-upb.cc index 5c6ffeba90..62ce9a7850 100644 --- a/upbc/protoc-gen-upb.cc +++ b/upbc/protoc-gen-upb.cc @@ -96,6 +96,18 @@ std::vector SortedEnums( return enums; } +std::vector SortedUniqueEnumNumbers( + const protobuf::EnumDescriptor* e) { + std::vector values; + for (int i = 0; i < e->value_count(); i++) { + values.push_back(e->value(i)->number()); + } + std::sort(values.begin(), values.end()); + auto last = std::unique(values.begin(), values.end()); + values.erase(last, values.end()); + return values; +} + void AddMessages(const protobuf::Descriptor* message, std::vector* messages) { messages->push_back(message); @@ -1112,7 +1124,8 @@ SubLayoutArray::SubLayoutArray(const protobuf::Descriptor* message) { std::vector sorted_subenums = SortedSubEnums(message); for (const auto* field : sorted_subenums) { - if (field->file()->syntax() != protobuf::FileDescriptor::SYNTAX_PROTO2) { + if (field->enum_type()->file()->syntax() != + protobuf::FileDescriptor::SYNTAX_PROTO2) { continue; } if (!indexes_.try_emplace(field->enum_type(), i).second) { @@ -1160,7 +1173,8 @@ bool TryFillTableEntry(const protobuf::Descriptor* message, type = "b1"; break; case protobuf::FieldDescriptor::TYPE_ENUM: - if (field->file()->syntax() == protobuf::FileDescriptor::SYNTAX_PROTO2) { + if (field->enum_type()->file()->syntax() == + protobuf::FileDescriptor::SYNTAX_PROTO2) { // We don't have the means to test proto2 enum fields for valid values. return false; } @@ -1495,23 +1509,19 @@ int WriteEnums(const protobuf::FileDescriptor* file, Output& output) { for (const auto* e : this_file_enums) { uint64_t mask = 0; - absl::flat_hash_set values; - for (int i = 0; i < e->value_count(); i++) { - int32_t number = e->value(i)->number(); + std::vector values; + for (auto number : SortedUniqueEnumNumbers(e)) { if (static_cast(number) < 64) { - mask |= 1 << number; + mask |= 1ULL << number; } else { - values.insert(number); + values.push_back(number); } } - std::vector values_vec(values.begin(), values.end()); - std::sort(values_vec.begin(), values_vec.end()); - if (!values_vec.empty()) { + if (!values.empty()) { values_init = EnumInit(e) + "_values"; - output("static const int32_t $0[$1] = {\n", values_init, - values_vec.size()); - for (auto value : values_vec) { + output("static const int32_t $0[$1] = {\n", values_init, values.size()); + for (int32_t value : values) { output(" $0,\n", value); } output("};\n\n"); @@ -1520,7 +1530,7 @@ int WriteEnums(const protobuf::FileDescriptor* file, Output& output) { output("const upb_MiniTable_Enum $0 = {\n", EnumInit(e)); output(" $0,\n", values_init); output(" 0x$0ULL,\n", absl::Hex(mask)); - output(" $0,\n", values_vec.size()); + output(" $0,\n", values.size()); output("};\n\n"); } @@ -1682,10 +1692,14 @@ bool Generator::Generate(const protobuf::FileDescriptor* file, FileLayout layout(file); - Output h_output(context->Open(HeaderFilename(file))); + std::unique_ptr h_output_stream( + context->Open(HeaderFilename(file))); + Output h_output(h_output_stream.get()); WriteHeader(file, h_output); - Output c_output(context->Open(SourceFilename(file))); + std::unique_ptr c_output_stream( + context->Open(SourceFilename(file))); + Output c_output(c_output_stream.get()); WriteSource(layout, c_output, fasttable_enabled); return true; diff --git a/upbc/protoc-gen-upbdefs.cc b/upbc/protoc-gen-upbdefs.cc index a5e07560fb..51b3b95927 100644 --- a/upbc/protoc-gen-upbdefs.cc +++ b/upbc/protoc-gen-upbdefs.cc @@ -170,10 +170,14 @@ bool Generator::Generate(const protobuf::FileDescriptor* file, return false; } - Output h_def_output(context->Open(DefHeaderFilename(file->name()))); + std::unique_ptr h_output_stream( + context->Open(DefHeaderFilename(file->name()))); + Output h_def_output(h_output_stream.get()); WriteDefHeader(file, h_def_output); - Output c_def_output(context->Open(DefSourceFilename(file->name()))); + std::unique_ptr c_output_stream( + context->Open(DefSourceFilename(file->name()))); + Output c_def_output(c_output_stream.get()); WriteDefSource(file, c_def_output); return true;