diff --git a/BUILD b/BUILD index 3dfb7a4eea..67f03c7c7f 100644 --- a/BUILD +++ b/BUILD @@ -112,10 +112,8 @@ cc_library( cc_library( name = "upb", srcs = [ - "upb/collections/map_sorter_internal.h", - "upb/collections/message_value.h", - "upb/msg.c", - "upb/msg_internal.h", + "upb/message/internal.h", + "upb/message/message.c", "upb/wire/decode.c", "upb/wire/encode.c", "upb/wire/swap_internal.h", @@ -134,6 +132,8 @@ cc_library( "upb/map.h", "upb/mem/alloc.h", "upb/mem/arena.h", + "upb/message/extension_internal.h", + "upb/message/message.h", "upb/msg.h", "upb/status.h", "upb/string_view.h", @@ -152,6 +152,7 @@ cc_library( ":hash", ":lex", ":mem", + ":mini_table_internal", ":port", ":wire_internal", ], @@ -177,9 +178,6 @@ cc_library( name = "extension_registry", srcs = [ "upb/extension_registry.c", - "upb/msg.h", - "upb/msg_internal.h", - "upb/upb.h", ], hdrs = [ "upb/extension_registry.h", @@ -188,78 +186,73 @@ cc_library( visibility = ["//visibility:public"], deps = [ ":base", - ":collections_internal", ":hash", ":mem", + ":mini_table_internal", ":port", ], ) cc_library( - name = "mini_table_internal", - srcs = [ - "upb/mini_table/common.h", - "upb/msg_internal.h", - ], + name = "mini_table", hdrs = [ - "upb/mini_table/common_internal.h", - "upb/mini_table/encode_internal.h", - "upb/mini_table/encode_internal.hpp", + "upb/mini_table.h", + "upb/mini_table/decode.h", + "upb/mini_table/types.h", ], - visibility = ["//:__subpackages__"], + copts = UPB_DEFAULT_COPTS, + visibility = ["//visibility:public"], deps = [ ":base", - ":collections_internal", - ":extension_registry", - ":hash", + ":mem", + ":mini_table_internal", ":port", - ":upb", ], ) cc_library( - name = "mini_table", + name = "mini_table_internal", srcs = [ "upb/mini_table/common.c", - "upb/mini_table/common.h", - "upb/mini_table/common_internal.h", "upb/mini_table/decode.c", "upb/mini_table/encode.c", - "upb/mini_table/encode_internal.h", - "upb/msg_internal.h", ], hdrs = [ - "upb/mini_table.h", + "upb/mini_table/common.h", + "upb/mini_table/common_internal.h", "upb/mini_table/decode.h", + "upb/mini_table/encode_internal.h", + "upb/mini_table/encode_internal.hpp", + "upb/mini_table/enum_internal.h", + "upb/mini_table/extension_internal.h", + "upb/mini_table/field_internal.h", + "upb/mini_table/file_internal.h", + "upb/mini_table/message_internal.h", + "upb/mini_table/sub_internal.h", + "upb/mini_table/types.h", ], - copts = UPB_DEFAULT_COPTS, visibility = ["//visibility:public"], deps = [ ":base", - ":collections_internal", - ":extension_registry", - ":hash", - ":mini_table_internal", + ":mem", ":port", - ":upb", ], ) cc_library( - name = "mini_table_accessors", + name = "message_accessors", srcs = [ - "upb/mini_table/accessors.c", - "upb/mini_table/common.h", - "upb/msg_internal.h", + "upb/message/accessors.c", + "upb/message/extension_internal.h", + "upb/message/internal.h", ], hdrs = [ - "upb/mini_table/accessors.h", + "upb/message/accessors.h", ], copts = UPB_DEFAULT_COPTS, visibility = ["//visibility:public"], deps = [ ":collections_internal", - ":hash", ":mini_table_internal", ":port", ":upb", @@ -269,12 +262,14 @@ cc_library( cc_test( name = "mini_table_encode_test", srcs = [ + "upb/message/extension_internal.h", + "upb/message/internal.h", "upb/mini_table/encode_test.cc", ], deps = [ + ":collections_internal", ":extension_registry", ":hash", - ":mini_table", ":mini_table_internal", ":port", ":upb", @@ -285,12 +280,11 @@ cc_test( ) cc_test( - name = "mini_table_accessors_test", - srcs = ["upb/mini_table/accessors_test.cc"], + name = "message_accessors_test", + srcs = ["upb/message/accessors_test.cc"], deps = [ ":collections", - ":mini_table", - ":mini_table_accessors", + ":message_accessors", ":mini_table_internal", ":test_messages_proto2_proto_upb", ":test_messages_proto3_proto_upb", @@ -305,10 +299,9 @@ cc_test( cc_library( name = "fastdecode", srcs = [ - "upb/decode.h", - "upb/decode_fast.h", - "upb/msg.h", - "upb/msg_internal.h", + "upb/message/extension_internal.h", + "upb/message/internal.h", + "upb/message/message.h", "upb/wire/decode.h", "upb/wire/decode_fast.c", "upb/wire/decode_fast.h", @@ -320,6 +313,7 @@ cc_library( ":extension_registry", ":hash", ":mem_internal", + ":mini_table_internal", ":port", ":wire_internal", ], @@ -339,10 +333,18 @@ cc_library( "upb/collections/map_gencode_util.h", "upb/collections/message_value.h", "upb/extension_registry.h", - "upb/mini_table/accessors.h", + "upb/message/accessors.h", + "upb/message/extension_internal.h", + "upb/message/internal.h", + "upb/message/message.h", "upb/mini_table/common.h", - "upb/msg.h", - "upb/msg_internal.h", + "upb/mini_table/enum_internal.h", + "upb/mini_table/extension_internal.h", + "upb/mini_table/field_internal.h", + "upb/mini_table/file_internal.h", + "upb/mini_table/message_internal.h", + "upb/mini_table/sub_internal.h", + "upb/mini_table/types.h", "upb/port/def.inc", "upb/port/undef.inc", "upb/wire/decode.h", @@ -363,8 +365,16 @@ cc_library( cc_library( name = "generated_cpp_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me", hdrs = [ - "upb/msg.h", - "upb/msg_internal.h", + "upb/message/extension_internal.h", + "upb/message/internal.h", + "upb/message/message.h", + "upb/mini_table/enum_internal.h", + "upb/mini_table/extension_internal.h", + "upb/mini_table/field_internal.h", + "upb/mini_table/file_internal.h", + "upb/mini_table/message_internal.h", + "upb/mini_table/sub_internal.h", + "upb/mini_table/types.h", "upb/port/def.inc", "upb/port/undef.inc", "upb/upb.hpp", @@ -385,20 +395,6 @@ cc_library( cc_library( name = "generated_reflection_support__only_for_generated_code_do_not_use__i_give_permission_to_break_me", - srcs = [ - "upb/reflection/common.h", - "upb/reflection/def_pool.h", - "upb/reflection/def_type.h", - "upb/reflection/enum_def.h", - "upb/reflection/enum_value_def.h", - "upb/reflection/extension_range.h", - "upb/reflection/field_def.h", - "upb/reflection/file_def.h", - "upb/reflection/message_def.h", - "upb/reflection/method_def.h", - "upb/reflection/oneof_def.h", - "upb/reflection/service_def.h", - ], hdrs = [ "upb/port/def.inc", "upb/port/undef.inc", @@ -448,9 +444,9 @@ cc_library( "upb/collections/array.c", "upb/collections/map.c", "upb/collections/map_sorter.c", - "upb/extension_registry.h", - "upb/msg.h", - "upb/msg_internal.h", + "upb/message/extension_internal.h", + "upb/message/internal.h", + "upb/message/message.h", ], hdrs = [ "upb/collections/array.h", @@ -465,8 +461,10 @@ cc_library( visibility = ["//:__subpackages__"], deps = [ ":base", + ":extension_registry", ":hash", ":mem", + ":mini_table_internal", ":port", ], ) @@ -549,7 +547,6 @@ cc_library( ":collections", ":descriptor_upb_proto", ":hash", - ":mini_table", ":mini_table_internal", ":port", ":upb", @@ -723,13 +720,13 @@ cc_test( ) cc_test( - name = "msg_test", - srcs = ["upb/msg_test.cc"], + name = "message_test", + srcs = ["upb/message/test.cc"], deps = [ ":fuzz_test_util", ":json", - ":msg_test_upb_proto", - ":msg_test_upb_proto_reflection", + ":message_test_upb_proto", + ":message_test_upb_proto_reflection", ":reflection", ":test_messages_proto3_proto_upb", ":upb", @@ -738,16 +735,16 @@ cc_test( ) proto_library( - name = "msg_test_proto", + name = "message_test_proto", testonly = 1, - srcs = ["upb/msg_test.proto"], + srcs = ["upb/message/test.proto"], deps = ["@com_google_protobuf//src/google/protobuf:test_messages_proto3_proto"], ) upb_proto_library( - name = "msg_test_upb_proto", + name = "message_test_upb_proto", testonly = 1, - deps = [":msg_test_proto"], + deps = [":message_test_proto"], ) proto_library( @@ -770,9 +767,9 @@ upb_proto_reflection_library( ) upb_proto_reflection_library( - name = "msg_test_upb_proto_reflection", + name = "message_test_upb_proto_reflection", testonly = 1, - deps = [":msg_test_proto"], + deps = [":message_test_proto"], ) proto_library( @@ -980,7 +977,6 @@ cc_library( srcs = ["upb/fuzz_test_util.cc"], hdrs = ["upb/fuzz_test_util.h"], deps = [ - ":mini_table", ":mini_table_internal", ":upb", ], @@ -1021,9 +1017,9 @@ cc_library( cc_library( name = "wire_internal", srcs = [ - "upb/extension_registry.h", - "upb/msg.h", - "upb/msg_internal.h", + "upb/message/extension_internal.h", + "upb/message/internal.h", + "upb/message/message.h", "upb/wire/decode.h", ], hdrs = [ @@ -1036,8 +1032,10 @@ cc_library( deps = [ ":base", ":collections_internal", + ":extension_registry", ":hash", ":mem_internal", + ":mini_table_internal", ":port", "//third_party/utf8_range", ], @@ -1100,8 +1098,8 @@ upb_amalgamation( ":hash", ":lex", ":mem_internal", - ":mini_table", - ":mini_table_accessors", + ":mini_table_internal", + ":message_accessors", ":port", ":reflection", ":reflection_internal", @@ -1136,8 +1134,8 @@ upb_amalgamation( ":json", ":lex", ":mem_internal", - ":mini_table", - ":mini_table_accessors", + ":mini_table_internal", + ":message_accessors", ":port", ":reflection", ":reflection_internal", @@ -1173,8 +1171,8 @@ upb_amalgamation( ":json", ":lex", ":mem_internal", - ":mini_table", - ":mini_table_accessors", + ":mini_table_internal", + ":message_accessors", ":port", ":reflection", ":reflection_internal", diff --git a/docs/wrapping-upb.md b/docs/wrapping-upb.md index 3c3114947a..46cc0753d1 100644 --- a/docs/wrapping-upb.md +++ b/docs/wrapping-upb.md @@ -235,7 +235,7 @@ APIs to do this do not exist yet. To use MiniTable-based access: 1. Load and access MiniDescriptors data using the interfaces in google3/third_party/upb/upb/mini_table.h. -2. Access message data using the interfaces in google3/third_party/upb/upb/mini_table_accessors.h. +2. Access message data using the interfaces in google3/third_party/upb/upb/msg_accessors.h. ## Memory Management diff --git a/protos/protos.h b/protos/protos.h index 63c72ffd46..c84e4861be 100644 --- a/protos/protos.h +++ b/protos/protos.h @@ -32,10 +32,11 @@ #include "absl/status/status.h" #include "absl/status/statusor.h" -#include "upb/decode.h" -#include "upb/encode.h" -#include "upb/mini_table.h" +#include "upb/message/extension_internal.h" +#include "upb/mini_table/types.h" #include "upb/upb.hpp" +#include "upb/wire/decode.h" +#include "upb/wire/encode.h" namespace protos { diff --git a/protos_generator/protoc-gen-upb-protos.cc b/protos_generator/protoc-gen-upb-protos.cc index 9a81b3405e..12de664956 100644 --- a/protos_generator/protoc-gen-upb-protos.cc +++ b/protos_generator/protoc-gen-upb-protos.cc @@ -135,7 +135,7 @@ void WriteHeader(const FileLayout& layout, Output& output) { #include "absl/strings/string_view.h" #include "absl/status/statusor.h" -#include "upb/msg_internal.h" +#include "upb/message/internal.h" )cc", ToPreproc(file->name())); @@ -198,7 +198,7 @@ void WriteSource(const FileLayout& layout, Output& output, R"cc( #include #include "absl/strings/string_view.h" -#include "upb/msg_internal.h" +#include "upb/message/internal.h" #include "protos/protos.h" #include "$0" )cc", diff --git a/upb/collections/map.c b/upb/collections/map.c index 54ff1cee41..eaeaaadaea 100644 --- a/upb/collections/map.c +++ b/upb/collections/map.c @@ -31,7 +31,7 @@ #include "upb/collections/map_internal.h" #include "upb/mem/arena.h" -#include "upb/msg.h" +#include "upb/message/message.h" // Must be last. #include "upb/port/def.inc" diff --git a/upb/collections/map_sorter_internal.h b/upb/collections/map_sorter_internal.h index 793a279d56..2642f04def 100644 --- a/upb/collections/map_sorter_internal.h +++ b/upb/collections/map_sorter_internal.h @@ -30,7 +30,7 @@ #ifndef UPB_COLLECTIONS_MAP_SORTER_INTERNAL_H_ #define UPB_COLLECTIONS_MAP_SORTER_INTERNAL_H_ -#include "upb/msg_internal.h" +#include "upb/message/internal.h" // Must be last. #include "upb/port/def.inc" diff --git a/upb/collections/message_value.h b/upb/collections/message_value.h index f3b0128284..3adb976a8a 100644 --- a/upb/collections/message_value.h +++ b/upb/collections/message_value.h @@ -29,7 +29,7 @@ #define UPB_MESSAGE_VALUE_H_ #include "upb/base/string_view.h" -#include "upb/msg.h" +#include "upb/message/message.h" // Must be last. #include "upb/port/def.inc" diff --git a/upb/extension_registry.c b/upb/extension_registry.c index 5ca85c93d6..3765515206 100644 --- a/upb/extension_registry.c +++ b/upb/extension_registry.c @@ -28,19 +28,18 @@ #include "upb/extension_registry.h" #include "upb/hash/str_table.h" -#include "upb/msg.h" -#include "upb/msg_internal.h" +#include "upb/mini_table/extension_internal.h" // Must be last. #include "upb/port/def.inc" +#define EXTREG_KEY_SIZE (sizeof(upb_MiniTable*) + sizeof(uint32_t)) + struct upb_ExtensionRegistry { upb_Arena* arena; - upb_strtable exts; /* Key is upb_MiniTable* concatenated with fieldnum. */ + upb_strtable exts; // Key is upb_MiniTable* concatenated with fieldnum. }; -#define EXTREG_KEY_SIZE (sizeof(upb_MiniTable*) + sizeof(uint32_t)) - static void extreg_key(char* buf, const upb_MiniTable* l, uint32_t fieldnum) { memcpy(buf, &l, sizeof(l)); memcpy(buf + sizeof(l), &fieldnum, sizeof(fieldnum)); @@ -75,7 +74,7 @@ bool upb_ExtensionRegistry_AddArray(upb_ExtensionRegistry* r, return true; failure: - /* Back out the entries previously added. */ + // Back out the entries previously added. for (end = e, e = start; e < end; e++) { const upb_MiniTableExtension* ext = *e; extreg_key(buf, ext->extendee, ext->field.number); diff --git a/upb/extension_registry.h b/upb/extension_registry.h index 4f31273c78..34103dd76f 100644 --- a/upb/extension_registry.h +++ b/upb/extension_registry.h @@ -28,7 +28,8 @@ #ifndef UPB_EXTENSION_REGISTRY_H_ #define UPB_EXTENSION_REGISTRY_H_ -#include "upb/msg.h" +#include "upb/mem/arena.h" +#include "upb/mini_table/types.h" // Must be last. #include "upb/port/def.inc" @@ -72,7 +73,6 @@ extern "C" { */ typedef struct upb_ExtensionRegistry upb_ExtensionRegistry; -typedef struct upb_MiniTableExtension upb_MiniTableExtension; // Creates a upb_ExtensionRegistry in the given arena. // The arena must outlive any use of the extreg. diff --git a/upb/fuzz_test_util.cc b/upb/fuzz_test_util.cc index 04e9c7ffab..fb5f872c9b 100644 --- a/upb/fuzz_test_util.cc +++ b/upb/fuzz_test_util.cc @@ -27,7 +27,10 @@ #include "upb/fuzz_test_util.h" -#include "upb/msg.h" +#include "upb/extension_registry.h" +#include "upb/message/message.h" +#include "upb/mini_table/decode.h" +#include "upb/mini_table/extension_internal.h" #include "upb/upb.hpp" namespace upb { diff --git a/upb/fuzz_test_util.h b/upb/fuzz_test_util.h index 9127b17106..10728fd883 100644 --- a/upb/fuzz_test_util.h +++ b/upb/fuzz_test_util.h @@ -31,7 +31,8 @@ #include #include -#include "upb/mini_table.h" +#include "upb/extension_registry.h" +#include "upb/mini_table/types.h" namespace upb { namespace fuzz { diff --git a/upb/mini_table/accessors.c b/upb/message/accessors.c similarity index 99% rename from upb/mini_table/accessors.c rename to upb/message/accessors.c index dd95e01dda..2d0b0f36c9 100644 --- a/upb/mini_table/accessors.c +++ b/upb/message/accessors.c @@ -25,14 +25,14 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include "upb/mini_table/accessors.h" +#include "upb/message/accessors.h" #include "upb/collections/array_internal.h" +#include "upb/message/message.h" #include "upb/wire/decode.h" #include "upb/wire/encode.h" // Must be last. -#include "upb/msg.h" #include "upb/port/def.inc" static size_t _upb_MiniTableField_Size(const upb_MiniTableField* f) { diff --git a/upb/mini_table/accessors.h b/upb/message/accessors.h similarity index 98% rename from upb/mini_table/accessors.h rename to upb/message/accessors.h index 67eb029570..8c41dc969d 100644 --- a/upb/mini_table/accessors.h +++ b/upb/message/accessors.h @@ -25,11 +25,14 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef UPB_MINI_TABLE_ACCESSORS_H_ -#define UPB_MINI_TABLE_ACCESSORS_H_ +#ifndef UPB_MESSAGE_ACCESSORS_H_ +#define UPB_MESSAGE_ACCESSORS_H_ #include "upb/collections/array.h" +#include "upb/message/internal.h" #include "upb/mini_table/common.h" +#include "upb/mini_table/enum_internal.h" +#include "upb/mini_table/field_internal.h" // Must be last. #include "upb/port/def.inc" @@ -358,4 +361,4 @@ upb_UnknownToMessage_Status upb_MiniTable_PromoteUnknownToMessageArray( #include "upb/port/undef.inc" -#endif // UPB_MINI_TABLE_ACCESSORS_H_ +#endif // UPB_MESSAGE_ACCESSORS_H_ diff --git a/upb/mini_table/accessors_test.cc b/upb/message/accessors_test.cc similarity index 99% rename from upb/mini_table/accessors_test.cc rename to upb/message/accessors_test.cc index 100a8594ab..4873061a73 100644 --- a/upb/mini_table/accessors_test.cc +++ b/upb/message/accessors_test.cc @@ -31,7 +31,7 @@ * accessed through reflective APIs exposed through mini table accessors. */ -#include "upb/mini_table/accessors.h" +#include "upb/message/accessors.h" #include "gtest/gtest.h" #include "google/protobuf/test_messages_proto2.upb.h" @@ -40,6 +40,7 @@ #include "upb/mini_table/decode.h" #include "upb/mini_table/encode_internal.hpp" #include "upb/test.upb.h" +#include "upb/upb.h" #include "upb/wire/decode.h" namespace { diff --git a/upb/message/extension_internal.h b/upb/message/extension_internal.h new file mode 100644 index 0000000000..2ea95d3f0a --- /dev/null +++ b/upb/message/extension_internal.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UPB_MESSAGE_EXTENSION_INTERNAL_H_ +#define UPB_MESSAGE_EXTENSION_INTERNAL_H_ + +#include "upb/base/descriptor_constants.h" +#include "upb/base/string_view.h" +#include "upb/mem/arena.h" +#include "upb/message/message.h" +#include "upb/mini_table/extension_internal.h" + +// Must be last. +#include "upb/port/def.inc" + +// The internal representation of an extension is self-describing: it contains +// enough information that we can serialize it to binary format without needing +// to look it up in a upb_ExtensionRegistry. +// +// This representation allocates 16 bytes to data on 64-bit platforms. +// This is rather wasteful for scalars (in the extreme case of bool, +// it wastes 15 bytes). We accept this because we expect messages to be +// the most common extension type. +typedef struct { + const upb_MiniTableExtension* ext; + union { + upb_StringView str; + void* ptr; + char scalar_data[8]; + } data; +} upb_Message_Extension; + +#ifdef __cplusplus +extern "C" { +#endif + +// Adds the given extension data to the given message. +// |ext| is copied into the message instance. +// This logically replaces any previously-added extension with this number. +upb_Message_Extension* _upb_Message_GetOrCreateExtension( + upb_Message* msg, const upb_MiniTableExtension* ext, upb_Arena* arena); + +// Returns an array of extensions for this message. +// Note: the array is ordered in reverse relative to the order of creation. +const upb_Message_Extension* _upb_Message_Getexts(const upb_Message* msg, + size_t* count); + +// Returns an extension for the given field number, or NULL if no extension +// exists for this field number. +const upb_Message_Extension* _upb_Message_Getext( + const upb_Message* msg, const upb_MiniTableExtension* ext); + +void _upb_Message_Clearext(upb_Message* msg, const upb_MiniTableExtension* ext); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#include "upb/port/undef.inc" + +#endif /* UPB_MESSAGE_EXTENSION_INTERNAL_H_ */ diff --git a/upb/message/internal.h b/upb/message/internal.h new file mode 100644 index 0000000000..e880866471 --- /dev/null +++ b/upb/message/internal.h @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* +** Our memory representation for parsing tables and messages themselves. +** Functions in this file are used by generated code and possibly reflection. +** +** The definitions in this file are internal to upb. +**/ + +#ifndef UPB_MESSAGE_INTERNAL_H_ +#define UPB_MESSAGE_INTERNAL_H_ + +#include +#include + +#include "upb/collections/map_internal.h" +#include "upb/extension_registry.h" +#include "upb/message/extension_internal.h" +#include "upb/message/message.h" +#include "upb/mini_table/extension_internal.h" +#include "upb/mini_table/file_internal.h" +#include "upb/mini_table/message_internal.h" + +// Must be last. +#include "upb/port/def.inc" + +#ifdef __cplusplus +extern "C" { +#endif + +extern const float kUpb_FltInfinity; +extern const double kUpb_Infinity; + +/* Internal members of a upb_Message that track unknown fields and/or + * extensions. We can change this without breaking binary compatibility. We put + * these before the user's data. The user's upb_Message* points after the + * upb_Message_Internal. */ + +typedef struct { + /* Total size of this structure, including the data that follows. + * Must be aligned to 8, which is alignof(upb_Message_Extension) */ + uint32_t size; + + /* Offsets relative to the beginning of this structure. + * + * Unknown data grows forward from the beginning to unknown_end. + * Extension data grows backward from size to ext_begin. + * When the two meet, we're out of data and have to realloc. + * + * If we imagine that the final member of this struct is: + * char data[size - overhead]; // overhead = + * sizeof(upb_Message_InternalData) + * + * Then we have: + * unknown data: data[0 .. (unknown_end - overhead)] + * extensions data: data[(ext_begin - overhead) .. (size - overhead)] */ + uint32_t unknown_end; + uint32_t ext_begin; + /* Data follows, as if there were an array: + * char data[size - sizeof(upb_Message_InternalData)]; */ +} upb_Message_InternalData; + +typedef struct { + upb_Message_InternalData* internal; + /* Message data follows. */ +} upb_Message_Internal; + +/* Maps upb_CType -> memory size. */ +extern char _upb_CTypeo_size[12]; + +UPB_INLINE size_t upb_msg_sizeof(const upb_MiniTable* t) { + return t->size + sizeof(upb_Message_Internal); +} + +// Inline version upb_Message_New(), for internal use. +UPB_INLINE upb_Message* _upb_Message_New(const upb_MiniTable* mini_table, + upb_Arena* arena) { + size_t size = upb_msg_sizeof(mini_table); + void* mem = upb_Arena_Malloc(arena, size + sizeof(upb_Message_Internal)); + if (UPB_UNLIKELY(!mem)) return NULL; + upb_Message* msg = UPB_PTR_AT(mem, sizeof(upb_Message_Internal), upb_Message); + memset(mem, 0, size); + return msg; +} + +UPB_INLINE upb_Message_Internal* upb_Message_Getinternal( + const upb_Message* msg) { + ptrdiff_t size = sizeof(upb_Message_Internal); + return (upb_Message_Internal*)((char*)msg - size); +} + +// Clears the given message. +void _upb_Message_Clear(upb_Message* msg, const upb_MiniTable* l); + +// Discards the unknown fields for this message only. +void _upb_Message_DiscardUnknown_shallow(upb_Message* msg); + +// Adds unknown data (serialized protobuf data) to the given message. +// The data is copied into the message instance. +bool _upb_Message_AddUnknown(upb_Message* msg, const char* data, size_t len, + upb_Arena* arena); + +/* Map entries aren't actually stored, they are only used during parsing. For + * parsing, it helps a lot if all map entry messages have the same layout. + * The compiler and def.c must ensure that all map entries have this layout. */ +typedef struct { + upb_Message_Internal internal; + union { + upb_StringView str; /* For str/bytes. */ + upb_value val; /* For all other types. */ + } k; + union { + upb_StringView str; /* For str/bytes. */ + upb_value val; /* For all other types. */ + } v; +} upb_MapEntry; + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#include "upb/port/undef.inc" + +#endif /* UPB_MESSAGE_INTERNAL_H_ */ diff --git a/upb/msg.c b/upb/message/message.c similarity index 94% rename from upb/msg.c rename to upb/message/message.c index b53f03a792..7d9001f494 100644 --- a/upb/msg.c +++ b/upb/message/message.c @@ -25,12 +25,12 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include "upb/msg.h" +#include "upb/message/message.h" #include #include "upb/base/log2.h" -#include "upb/msg_internal.h" +#include "upb/message/internal.h" // Must be last. #include "upb/port/def.inc" @@ -40,19 +40,14 @@ const double kUpb_Infinity = INFINITY; static const size_t overhead = sizeof(upb_Message_InternalData); -static const upb_Message_Internal* upb_Message_Getinternal_const( - const upb_Message* msg) { - ptrdiff_t size = sizeof(upb_Message_Internal); - return (upb_Message_Internal*)((char*)msg - size); -} - upb_Message* upb_Message_New(const upb_MiniTable* mini_table, upb_Arena* arena) { return _upb_Message_New(mini_table, arena); } void _upb_Message_Clear(upb_Message* msg, const upb_MiniTable* l) { - void* mem = (char*)msg - sizeof(upb_Message_Internal); + // Note: Can't use UPB_PTR_AT() here because we are doing pointer subtraction. + char* mem = (char*)msg - sizeof(upb_Message_Internal); memset(mem, 0, upb_msg_sizeof(l)); } @@ -105,7 +100,7 @@ void _upb_Message_DiscardUnknown_shallow(upb_Message* msg) { } const char* upb_Message_GetUnknown(const upb_Message* msg, size_t* len) { - const upb_Message_Internal* in = upb_Message_Getinternal_const(msg); + const upb_Message_Internal* in = upb_Message_Getinternal(msg); if (in->internal) { *len = in->internal->unknown_end - overhead; return (char*)(in->internal + 1); @@ -135,7 +130,7 @@ void upb_Message_DeleteUnknown(upb_Message* msg, const char* data, size_t len) { const upb_Message_Extension* _upb_Message_Getexts(const upb_Message* msg, size_t* count) { - const upb_Message_Internal* in = upb_Message_Getinternal_const(msg); + const upb_Message_Internal* in = upb_Message_Getinternal(msg); if (in->internal) { *count = (in->internal->size - in->internal->ext_begin) / sizeof(upb_Message_Extension); diff --git a/upb/message/message.h b/upb/message/message.h new file mode 100644 index 0000000000..64aeb38816 --- /dev/null +++ b/upb/message/message.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// Public APIs for message operations that do not require descriptors. +// These functions can be used even in build that does not want to depend on +// reflection or descriptors. +// +// Descriptor-based reflection functionality lives in reflection.h. + +#ifndef UPB_MESSAGE_MESSAGE_H_ +#define UPB_MESSAGE_MESSAGE_H_ + +#include "upb/mem/arena.h" +#include "upb/mini_table/types.h" + +// Must be last. +#include "upb/port/def.inc" + +#ifdef __cplusplus +extern "C" { +#endif + +// Creates a new message with the given mini_table on the given arena. +upb_Message* upb_Message_New(const upb_MiniTable* mini_table, upb_Arena* arena); + +// Adds unknown data (serialized protobuf data) to the given message. +// The data is copied into the message instance. +void upb_Message_AddUnknown(upb_Message* msg, const char* data, size_t len, + upb_Arena* arena); + +// Returns a reference to the message's unknown data. +const char* upb_Message_GetUnknown(const upb_Message* msg, size_t* len); + +// Removes partial unknown data from message. +void upb_Message_DeleteUnknown(upb_Message* msg, const char* data, size_t len); + +// Returns the number of extensions present in this message. +size_t upb_Message_ExtensionCount(const upb_Message* msg); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#include "upb/port/undef.inc" + +#endif /* UPB_MESSAGE_MESSAGE_H_ */ diff --git a/upb/msg_test.cc b/upb/message/test.cc similarity index 99% rename from upb/msg_test.cc rename to upb/message/test.cc index de59261cdf..8c967612c7 100644 --- a/upb/msg_test.cc +++ b/upb/message/test.cc @@ -31,8 +31,8 @@ #include "upb/fuzz_test_util.h" #include "upb/json/decode.h" #include "upb/json/encode.h" -#include "upb/msg_test.upb.h" -#include "upb/msg_test.upbdefs.h" +#include "upb/message/test.upb.h" +#include "upb/message/test.upbdefs.h" #include "upb/reflection/def.hpp" #include "upb/upb.hpp" diff --git a/upb/msg_test.proto b/upb/message/test.proto similarity index 100% rename from upb/msg_test.proto rename to upb/message/test.proto diff --git a/upb/mini_table/common.h b/upb/mini_table/common.h index de70816bd4..19f4f2a34e 100644 --- a/upb/mini_table/common.h +++ b/upb/mini_table/common.h @@ -28,7 +28,9 @@ #ifndef UPB_MINI_TABLE_COMMON_H_ #define UPB_MINI_TABLE_COMMON_H_ -#include "upb/msg_internal.h" +#include "upb/mini_table/field_internal.h" +#include "upb/mini_table/message_internal.h" +#include "upb/mini_table/sub_internal.h" // Must be last. #include "upb/port/def.inc" diff --git a/upb/mini_table/decode.c b/upb/mini_table/decode.c index 1e7d4cf7a1..a6edf8dd8c 100644 --- a/upb/mini_table/decode.c +++ b/upb/mini_table/decode.c @@ -28,11 +28,15 @@ #include "upb/mini_table/decode.h" #include +#include #include "upb/base/log2.h" +#include "upb/base/string_view.h" #include "upb/mem/arena.h" #include "upb/mini_table/common.h" #include "upb/mini_table/common_internal.h" +#include "upb/mini_table/enum_internal.h" +#include "upb/mini_table/extension_internal.h" // Must be last. #include "upb/port/def.inc" diff --git a/upb/mini_table/decode.h b/upb/mini_table/decode.h index 4b97ef245a..8ac4baf2fb 100644 --- a/upb/mini_table/decode.h +++ b/upb/mini_table/decode.h @@ -28,6 +28,8 @@ #ifndef UPB_MINI_TABLE_DECODE_H_ #define UPB_MINI_TABLE_DECODE_H_ +#include "upb/base/status.h" +#include "upb/mem/arena.h" #include "upb/mini_table/common.h" // Must be last. diff --git a/upb/mini_table/encode_test.cc b/upb/mini_table/encode_test.cc index 2dcb254dd1..88c57bedda 100644 --- a/upb/mini_table/encode_test.cc +++ b/upb/mini_table/encode_test.cc @@ -29,9 +29,11 @@ #include "gtest/gtest.h" #include "absl/container/flat_hash_set.h" #include "google/protobuf/descriptor.h" +#include "upb/message/internal.h" #include "upb/mini_table/common_internal.h" #include "upb/mini_table/decode.h" #include "upb/mini_table/encode_internal.hpp" +#include "upb/mini_table/enum_internal.h" #include "upb/upb.hpp" #include "upb/wire/decode.h" diff --git a/upb/mini_table/enum_internal.h b/upb/mini_table/enum_internal.h new file mode 100644 index 0000000000..f86a06bd01 --- /dev/null +++ b/upb/mini_table/enum_internal.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UPB_MINI_TABLE_ENUM_INTERNAL_H_ +#define UPB_MINI_TABLE_ENUM_INTERNAL_H_ + +#include "upb/mini_table/types.h" + +// Must be last. +#include "upb/port/def.inc" + +struct upb_MiniTableEnum { + uint32_t mask_limit; // Limit enum value that can be tested with mask. + uint32_t value_count; // Number of values after the bitfield. + uint32_t data[]; // Bitmask + enumerated values follow. +}; + +typedef enum { + _kUpb_FastEnumCheck_ValueIsInEnum = 0, + _kUpb_FastEnumCheck_ValueIsNotInEnum = 1, + _kUpb_FastEnumCheck_CannotCheckFast = 2, +} _kUpb_FastEnumCheck_Status; + +#ifdef __cplusplus +extern "C" { +#endif + +UPB_INLINE _kUpb_FastEnumCheck_Status +_upb_MiniTable_CheckEnumValueFast(const upb_MiniTableEnum* e, uint32_t val) { + if (UPB_UNLIKELY(val >= 64)) return _kUpb_FastEnumCheck_CannotCheckFast; + uint64_t mask = e->data[0] | ((uint64_t)e->data[1] << 32); + return (mask & (1ULL << val)) ? _kUpb_FastEnumCheck_ValueIsInEnum + : _kUpb_FastEnumCheck_ValueIsNotInEnum; +} + +UPB_INLINE bool _upb_MiniTable_CheckEnumValueSlow(const upb_MiniTableEnum* e, + uint32_t val) { + if (val < e->mask_limit) return e->data[val / 32] & (1ULL << (val % 32)); + // OPT: binary search long lists? + const uint32_t* start = &e->data[e->mask_limit / 32]; + const uint32_t* limit = &e->data[(e->mask_limit / 32) + e->value_count]; + for (const uint32_t* p = start; p < limit; p++) { + if (*p == val) return true; + } + return false; +} + +// Validates enum value against range defined by enum mini table. +UPB_INLINE bool upb_MiniTableEnum_CheckValue(const upb_MiniTableEnum* e, + uint32_t val) { + _kUpb_FastEnumCheck_Status status = _upb_MiniTable_CheckEnumValueFast(e, val); + if (UPB_UNLIKELY(status == _kUpb_FastEnumCheck_CannotCheckFast)) { + return _upb_MiniTable_CheckEnumValueSlow(e, val); + } + return status == _kUpb_FastEnumCheck_ValueIsInEnum ? true : false; +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#include "upb/port/undef.inc" + +#endif /* UPB_MINI_TABLE_ENUM_INTERNAL_H_ */ diff --git a/upb/mini_table/extension_internal.h b/upb/mini_table/extension_internal.h new file mode 100644 index 0000000000..73499c3f93 --- /dev/null +++ b/upb/mini_table/extension_internal.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UPB_MINI_TABLE_EXTENSION_INTERNAL_H_ +#define UPB_MINI_TABLE_EXTENSION_INTERNAL_H_ + +#include "upb/mini_table/field_internal.h" +#include "upb/mini_table/sub_internal.h" + +// Must be last. +#include "upb/port/def.inc" + +struct upb_MiniTableExtension { + upb_MiniTableField field; + const upb_MiniTable* extendee; + upb_MiniTableSub sub; // NULL unless submessage or proto2 enum +}; + +#include "upb/port/undef.inc" + +#endif /* UPB_MINI_TABLE_EXTENSION_INTERNAL_H_ */ diff --git a/upb/mini_table/field_internal.h b/upb/mini_table/field_internal.h new file mode 100644 index 0000000000..2f0ba7bb40 --- /dev/null +++ b/upb/mini_table/field_internal.h @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UPB_MINI_TABLE_FIELD_INTERNAL_H_ +#define UPB_MINI_TABLE_FIELD_INTERNAL_H_ + +#include "upb/base/descriptor_constants.h" +#include "upb/mini_table/types.h" + +// Must be last. +#include "upb/port/def.inc" + +struct upb_MiniTableField { + uint32_t number; + uint16_t offset; + int16_t presence; // If >0, hasbit_index. If <0, ~oneof_index + uint16_t submsg_index; // kUpb_NoSub if descriptortype != MESSAGE/GROUP/ENUM + uint8_t descriptortype; + + // upb_FieldMode | upb_LabelFlags | (upb_FieldRep << kUpb_FieldRep_Shift) + uint8_t mode; +}; + +#define kUpb_NoSub ((uint16_t)-1) + +typedef enum { + kUpb_FieldMode_Map = 0, + kUpb_FieldMode_Array = 1, + kUpb_FieldMode_Scalar = 2, +} upb_FieldMode; + +// Mask to isolate the upb_FieldMode from field.mode. +#define kUpb_FieldMode_Mask 3 + +// Extra flags on the mode field. +typedef enum { + kUpb_LabelFlags_IsPacked = 4, + kUpb_LabelFlags_IsExtension = 8, + // Indicates that this descriptor type is an "alternate type": + // - for Int32, this indicates that the actual type is Enum (but was + // rewritten to Int32 because it is an open enum that requires no check). + // - for Bytes, this indicates that the actual type is String (but does + // not require any UTF-8 check). + kUpb_LabelFlags_IsAlternate = 16, +} upb_LabelFlags; + +// Note: we sort by this number when calculating layout order. +typedef enum { + kUpb_FieldRep_1Byte = 0, + kUpb_FieldRep_4Byte = 1, + kUpb_FieldRep_StringView = 2, + kUpb_FieldRep_8Byte = 3, + + kUpb_FieldRep_Shift = 6, // Bit offset of the rep in upb_MiniTableField.mode + kUpb_FieldRep_Max = kUpb_FieldRep_8Byte, +} upb_FieldRep; + +#ifdef __cplusplus +extern "C" { +#endif + +UPB_INLINE upb_FieldMode upb_FieldMode_Get(const upb_MiniTableField* field) { + return (upb_FieldMode)(field->mode & 3); +} + +UPB_INLINE bool upb_IsRepeatedOrMap(const upb_MiniTableField* field) { + // This works because upb_FieldMode has no value 3. + return !(field->mode & kUpb_FieldMode_Scalar); +} + +UPB_INLINE bool upb_IsSubMessage(const upb_MiniTableField* field) { + return field->descriptortype == kUpb_FieldType_Message || + field->descriptortype == kUpb_FieldType_Group; +} + +// LINT.IfChange(presence_logic) + +// Hasbit access /////////////////////////////////////////////////////////////// + +UPB_INLINE size_t _upb_hasbit_ofs(size_t idx) { return idx / 8; } + +UPB_INLINE char _upb_hasbit_mask(size_t idx) { return 1 << (idx % 8); } + +UPB_INLINE bool _upb_hasbit(const upb_Message* msg, size_t idx) { + return (*UPB_PTR_AT(msg, _upb_hasbit_ofs(idx), const char) & + _upb_hasbit_mask(idx)) != 0; +} + +UPB_INLINE void _upb_sethas(const upb_Message* msg, size_t idx) { + (*UPB_PTR_AT(msg, _upb_hasbit_ofs(idx), char)) |= _upb_hasbit_mask(idx); +} + +UPB_INLINE void _upb_clearhas(const upb_Message* msg, size_t idx) { + (*UPB_PTR_AT(msg, _upb_hasbit_ofs(idx), char)) &= ~_upb_hasbit_mask(idx); +} + +UPB_INLINE size_t _upb_Message_Hasidx(const upb_MiniTableField* f) { + UPB_ASSERT(f->presence > 0); + return f->presence; +} + +UPB_INLINE bool _upb_hasbit_field(const upb_Message* msg, + const upb_MiniTableField* f) { + return _upb_hasbit(msg, _upb_Message_Hasidx(f)); +} + +UPB_INLINE void _upb_sethas_field(const upb_Message* msg, + const upb_MiniTableField* f) { + _upb_sethas(msg, _upb_Message_Hasidx(f)); +} + +UPB_INLINE void _upb_clearhas_field(const upb_Message* msg, + const upb_MiniTableField* f) { + _upb_clearhas(msg, _upb_Message_Hasidx(f)); +} + +// Oneof case access /////////////////////////////////////////////////////////// + +UPB_INLINE uint32_t* _upb_oneofcase(upb_Message* msg, size_t case_ofs) { + return UPB_PTR_AT(msg, case_ofs, uint32_t); +} + +UPB_INLINE uint32_t _upb_getoneofcase(const void* msg, size_t case_ofs) { + return *UPB_PTR_AT(msg, case_ofs, uint32_t); +} + +UPB_INLINE size_t _upb_oneofcase_ofs(const upb_MiniTableField* f) { + UPB_ASSERT(f->presence < 0); + return ~(ptrdiff_t)f->presence; +} + +UPB_INLINE uint32_t* _upb_oneofcase_field(upb_Message* msg, + const upb_MiniTableField* f) { + return _upb_oneofcase(msg, _upb_oneofcase_ofs(f)); +} + +UPB_INLINE uint32_t _upb_getoneofcase_field(const upb_Message* msg, + const upb_MiniTableField* f) { + return _upb_getoneofcase(msg, _upb_oneofcase_ofs(f)); +} + +UPB_INLINE bool _upb_has_submsg_nohasbit(const upb_Message* msg, size_t ofs) { + return *UPB_PTR_AT(msg, ofs, const upb_Message*) != NULL; +} + +// LINT.ThenChange(GoogleInternalName2) + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#include "upb/port/undef.inc" + +#endif /* UPB_MINI_TABLE_FIELD_INTERNAL_H_ */ diff --git a/upb/mini_table/file_internal.h b/upb/mini_table/file_internal.h new file mode 100644 index 0000000000..b102bdfca2 --- /dev/null +++ b/upb/mini_table/file_internal.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UPB_MINI_TABLE_FILE_INTERNAL_H_ +#define UPB_MINI_TABLE_FILE_INTERNAL_H_ + +#include "upb/mini_table/types.h" + +// Must be last. +#include "upb/port/def.inc" + +struct upb_MiniTableFile { + const upb_MiniTable** msgs; + const upb_MiniTableEnum** enums; + const upb_MiniTableExtension** exts; + int msg_count; + int enum_count; + int ext_count; +}; + +#include "upb/port/undef.inc" + +#endif /* UPB_MINI_TABLE_FILE_INTERNAL_H_ */ diff --git a/upb/mini_table/message_internal.h b/upb/mini_table/message_internal.h new file mode 100644 index 0000000000..10e19a77ab --- /dev/null +++ b/upb/mini_table/message_internal.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UPB_MINI_TABLE_MESSAGE_INTERNAL_H_ +#define UPB_MINI_TABLE_MESSAGE_INTERNAL_H_ + +#include "upb/mini_table/types.h" + +// Must be last. +#include "upb/port/def.inc" + +struct upb_Decoder; +typedef const char* _upb_FieldParser(struct upb_Decoder* d, const char* ptr, + upb_Message* msg, intptr_t table, + uint64_t hasbits, uint64_t data); +typedef struct { + uint64_t field_data; + _upb_FieldParser* field_parser; +} _upb_FastTable_Entry; + +typedef enum { + kUpb_ExtMode_NonExtendable = 0, // Non-extendable message. + kUpb_ExtMode_Extendable = 1, // Normal extendable message. + kUpb_ExtMode_IsMessageSet = 2, // MessageSet message. + kUpb_ExtMode_IsMessageSet_ITEM = + 3, // MessageSet item (temporary only, see decode.c) + + // During table building we steal a bit to indicate that the message is a map + // entry. *Only* used during table building! + kUpb_ExtMode_IsMapEntry = 4, +} upb_ExtMode; + +// upb_MiniTable represents the memory layout of a given upb_MessageDef. +// The members are public so generated code can initialize them, +// but users MUST NOT directly read or write any of its members. +struct upb_MiniTable { + const upb_MiniTableSub* subs; + const upb_MiniTableField* fields; + + // Must be aligned to sizeof(void*). Doesn't include internal members like + // unknown fields, extension dict, pointer to msglayout, etc. + uint16_t size; + + uint16_t field_count; + uint8_t ext; // upb_ExtMode, declared as uint8_t so sizeof(ext) == 1 + uint8_t dense_below; + uint8_t table_mask; + uint8_t required_count; // Required fields have the lowest hasbits. + + // To statically initialize the tables of variable length, we need a flexible + // array member, and we need to compile in gnu99 mode (constant initialization + // of flexible array members is a GNU extension, not in C99 unfortunately. + _upb_FastTable_Entry fasttable[]; +}; + +#ifdef __cplusplus +extern "C" { +#endif + +// Computes a bitmask in which the |l->required_count| lowest bits are set, +// except that we skip the lowest bit (because upb never uses hasbit 0). +// +// Sample output: +// requiredmask(1) => 0b10 (0x2) +// requiredmask(5) => 0b111110 (0x3e) +UPB_INLINE uint64_t upb_MiniTable_requiredmask(const upb_MiniTable* l) { + int n = l->required_count; + assert(0 < n && n <= 63); + return ((1ULL << n) - 1) << 1; +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#include "upb/port/undef.inc" + +#endif /* UPB_MINI_TABLE_MESSAGE_INTERNAL_H_ */ diff --git a/upb/mini_table/sub_internal.h b/upb/mini_table/sub_internal.h new file mode 100644 index 0000000000..981a286d22 --- /dev/null +++ b/upb/mini_table/sub_internal.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UPB_MINI_TABLE_SUB_INTERNAL_H_ +#define UPB_MINI_TABLE_SUB_INTERNAL_H_ + +#include "upb/mini_table/types.h" + +union upb_MiniTableSub { + const upb_MiniTable* submsg; + const upb_MiniTableEnum* subenum; +}; + +#endif /* UPB_MINI_TABLE_SUB_INTERNAL_H_ */ diff --git a/upb/mini_table/types.h b/upb/mini_table/types.h new file mode 100644 index 0000000000..8f0a9c4382 --- /dev/null +++ b/upb/mini_table/types.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UPB_MINI_TABLE_TYPES_H_ +#define UPB_MINI_TABLE_TYPES_H_ + +typedef void upb_Message; + +typedef struct upb_MiniTable upb_MiniTable; +typedef struct upb_MiniTableEnum upb_MiniTableEnum; +typedef struct upb_MiniTableExtension upb_MiniTableExtension; +typedef struct upb_MiniTableField upb_MiniTableField; +typedef struct upb_MiniTableFile upb_MiniTableFile; +typedef union upb_MiniTableSub upb_MiniTableSub; + +#endif /* UPB_MINI_TABLE_TYPES_H_ */ diff --git a/upb/msg.h b/upb/msg.h index abb132e59e..1d70f6f0ed 100644 --- a/upb/msg.h +++ b/upb/msg.h @@ -25,53 +25,11 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* - * Public APIs for message operations that do not require descriptors. - * These functions can be used even in build that does not want to depend on - * reflection or descriptors. - * - * Descriptor-based reflection functionality lives in reflection.h. - */ +// This header is deprecated, use upb/message/message.h instead #ifndef UPB_MSG_H_ #define UPB_MSG_H_ -#include "upb/mem/arena.h" - -// Must be last. -#include "upb/port/def.inc" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef void upb_Message; - -/* For users these are opaque. They can be obtained from - * upb_MessageDef_MiniTable() but users cannot access any of the members. */ -typedef struct upb_MiniTable upb_MiniTable; - -/* Creates a new message with the given mini_table on the given arena. */ -upb_Message* upb_Message_New(const upb_MiniTable* mini_table, upb_Arena* arena); - -/* Adds unknown data (serialized protobuf data) to the given message. The data - * is copied into the message instance. */ -void upb_Message_AddUnknown(upb_Message* msg, const char* data, size_t len, - upb_Arena* arena); - -/* Returns a reference to the message's unknown data. */ -const char* upb_Message_GetUnknown(const upb_Message* msg, size_t* len); - -/* Removes partial unknown data from message. */ -void upb_Message_DeleteUnknown(upb_Message* msg, const char* data, size_t len); - -/* Returns the number of extensions present in this message. */ -size_t upb_Message_ExtensionCount(const upb_Message* msg); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#include "upb/port/undef.inc" +#include "upb/message/message.h" #endif /* UPB_MSG_INT_H_ */ diff --git a/upb/msg_internal.h b/upb/msg_internal.h index 4a1b235c82..39d6d6b210 100644 --- a/upb/msg_internal.h +++ b/upb/msg_internal.h @@ -25,411 +25,11 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* -** Our memory representation for parsing tables and messages themselves. -** Functions in this file are used by generated code and possibly reflection. -** -** The definitions in this file are internal to upb. -**/ +// This header is deprecated, use upb/message/internal.h instead #ifndef UPB_MSG_INT_H_ #define UPB_MSG_INT_H_ -#include -#include - -#include "upb/base/status.h" -#include "upb/collections/map_internal.h" -#include "upb/extension_registry.h" -#include "upb/msg.h" - -// Must be last. -#include "upb/port/def.inc" - -#ifdef __cplusplus -extern "C" { -#endif - -extern const float kUpb_FltInfinity; -extern const double kUpb_Infinity; - -/** upb_MiniTable *************************************************************/ - -/* upb_MiniTable represents the memory layout of a given upb_MessageDef. The - * members are public so generated code can initialize them, but users MUST NOT - * read or write any of its members. */ - -typedef struct { - uint32_t number; - uint16_t offset; - int16_t presence; // If >0, hasbit_index. If <0, ~oneof_index - uint16_t submsg_index; // kUpb_NoSub if descriptortype != MESSAGE/GROUP/ENUM - uint8_t descriptortype; - uint8_t mode; /* upb_FieldMode | upb_LabelFlags | - (upb_FieldRep << kUpb_FieldRep_Shift) */ -} upb_MiniTableField; - -#define kUpb_NoSub ((uint16_t)-1) - -typedef enum { - kUpb_FieldMode_Map = 0, - kUpb_FieldMode_Array = 1, - kUpb_FieldMode_Scalar = 2, -} upb_FieldMode; - -// Mask to isolate the upb_FieldMode from field.mode. -#define kUpb_FieldMode_Mask 3 - -// Extra flags on the mode field. -typedef enum { - kUpb_LabelFlags_IsPacked = 4, - kUpb_LabelFlags_IsExtension = 8, - // Indicates that this descriptor type is an "alternate type": - // - for Int32, this indicates that the actual type is Enum (but was - // rewritten to Int32 because it is an open enum that requires no check). - // - for Bytes, this indicates that the actual type is String (but does - // not require any UTF-8 check). - kUpb_LabelFlags_IsAlternate = 16, -} upb_LabelFlags; - -// Note: we sort by this number when calculating layout order. -typedef enum { - kUpb_FieldRep_1Byte = 0, - kUpb_FieldRep_4Byte = 1, - kUpb_FieldRep_StringView = 2, - kUpb_FieldRep_8Byte = 3, - - kUpb_FieldRep_Shift = 6, // Bit offset of the rep in upb_MiniTableField.mode - kUpb_FieldRep_Max = kUpb_FieldRep_8Byte, -} upb_FieldRep; - -UPB_INLINE upb_FieldMode upb_FieldMode_Get(const upb_MiniTableField* field) { - return (upb_FieldMode)(field->mode & 3); -} - -UPB_INLINE bool upb_IsRepeatedOrMap(const upb_MiniTableField* field) { - /* This works because upb_FieldMode has no value 3. */ - return !(field->mode & kUpb_FieldMode_Scalar); -} - -UPB_INLINE bool upb_IsSubMessage(const upb_MiniTableField* field) { - return field->descriptortype == kUpb_FieldType_Message || - field->descriptortype == kUpb_FieldType_Group; -} - -struct upb_Decoder; -struct upb_MiniTable; - -typedef const char* _upb_FieldParser(struct upb_Decoder* d, const char* ptr, - upb_Message* msg, intptr_t table, - uint64_t hasbits, uint64_t data); - -typedef struct { - uint64_t field_data; - _upb_FieldParser* field_parser; -} _upb_FastTable_Entry; - -typedef struct { - uint32_t mask_limit; // Limit enum value that can be tested with mask. - uint32_t value_count; // Number of values after the bitfield. - uint32_t data[]; // Bitmask + enumerated values follow. -} upb_MiniTableEnum; - -typedef enum { - _kUpb_FastEnumCheck_ValueIsInEnum = 0, - _kUpb_FastEnumCheck_ValueIsNotInEnum = 1, - _kUpb_FastEnumCheck_CannotCheckFast = 2, -} _kUpb_FastEnumCheck_Status; - -UPB_INLINE _kUpb_FastEnumCheck_Status -_upb_MiniTable_CheckEnumValueFast(const upb_MiniTableEnum* e, uint32_t val) { - if (UPB_UNLIKELY(val >= 64)) return _kUpb_FastEnumCheck_CannotCheckFast; - uint64_t mask = e->data[0] | ((uint64_t)e->data[1] << 32); - return (mask & (1ULL << val)) ? _kUpb_FastEnumCheck_ValueIsInEnum - : _kUpb_FastEnumCheck_ValueIsNotInEnum; -} - -UPB_INLINE bool _upb_MiniTable_CheckEnumValueSlow(const upb_MiniTableEnum* e, - uint32_t val) { - if (val < e->mask_limit) return e->data[val / 32] & (1ULL << (val % 32)); - // OPT: binary search long lists? - const uint32_t* start = &e->data[e->mask_limit / 32]; - const uint32_t* limit = &e->data[(e->mask_limit / 32) + e->value_count]; - for (const uint32_t* p = start; p < limit; p++) { - if (*p == val) return true; - } - return false; -} - -// Validates enum value against range defined by enum mini table. -UPB_INLINE bool upb_MiniTableEnum_CheckValue(const upb_MiniTableEnum* e, - uint32_t val) { - _kUpb_FastEnumCheck_Status status = _upb_MiniTable_CheckEnumValueFast(e, val); - if (UPB_UNLIKELY(status == _kUpb_FastEnumCheck_CannotCheckFast)) { - return _upb_MiniTable_CheckEnumValueSlow(e, val); - } - return status == _kUpb_FastEnumCheck_ValueIsInEnum ? true : false; -} - -typedef union { - const struct upb_MiniTable* submsg; - const upb_MiniTableEnum* subenum; -} upb_MiniTableSub; - -typedef enum { - kUpb_ExtMode_NonExtendable = 0, // Non-extendable message. - kUpb_ExtMode_Extendable = 1, // Normal extendable message. - kUpb_ExtMode_IsMessageSet = 2, // MessageSet message. - kUpb_ExtMode_IsMessageSet_ITEM = - 3, // MessageSet item (temporary only, see decode.c) - - // During table building we steal a bit to indicate that the message is a map - // entry. *Only* used during table building! - kUpb_ExtMode_IsMapEntry = 4, -} upb_ExtMode; - -struct upb_MiniTable { - const upb_MiniTableSub* subs; - const upb_MiniTableField* fields; - /* Must be aligned to sizeof(void*). Doesn't include internal members like - * unknown fields, extension dict, pointer to msglayout, etc. */ - uint16_t size; - uint16_t field_count; - uint8_t ext; // upb_ExtMode, declared as uint8_t so sizeof(ext) == 1 - uint8_t dense_below; - uint8_t table_mask; - uint8_t required_count; // Required fields have the lowest hasbits. - /* To statically initialize the tables of variable length, we need a flexible - * array member, and we need to compile in gnu99 mode (constant initialization - * of flexible array members is a GNU extension, not in C99 unfortunately. */ - _upb_FastTable_Entry fasttable[]; -}; - -struct upb_MiniTableExtension { - upb_MiniTableField field; - const upb_MiniTable* extendee; - upb_MiniTableSub sub; /* NULL unless submessage or proto2 enum */ -}; - -typedef struct { - const upb_MiniTable** msgs; - const upb_MiniTableEnum** enums; - const upb_MiniTableExtension** exts; - int msg_count; - int enum_count; - int ext_count; -} upb_MiniTableFile; - -// Computes a bitmask in which the |l->required_count| lowest bits are set, -// except that we skip the lowest bit (because upb never uses hasbit 0). -// -// Sample output: -// requiredmask(1) => 0b10 (0x2) -// requiredmask(5) => 0b111110 (0x3e) -UPB_INLINE uint64_t upb_MiniTable_requiredmask(const upb_MiniTable* l) { - int n = l->required_count; - assert(0 < n && n <= 63); - return ((1ULL << n) - 1) << 1; -} - -/** upb_Message ***************************************************************/ - -/* Internal members of a upb_Message that track unknown fields and/or - * extensions. We can change this without breaking binary compatibility. We put - * these before the user's data. The user's upb_Message* points after the - * upb_Message_Internal. */ - -typedef struct { - /* Total size of this structure, including the data that follows. - * Must be aligned to 8, which is alignof(upb_Message_Extension) */ - uint32_t size; - - /* Offsets relative to the beginning of this structure. - * - * Unknown data grows forward from the beginning to unknown_end. - * Extension data grows backward from size to ext_begin. - * When the two meet, we're out of data and have to realloc. - * - * If we imagine that the final member of this struct is: - * char data[size - overhead]; // overhead = - * sizeof(upb_Message_InternalData) - * - * Then we have: - * unknown data: data[0 .. (unknown_end - overhead)] - * extensions data: data[(ext_begin - overhead) .. (size - overhead)] */ - uint32_t unknown_end; - uint32_t ext_begin; - /* Data follows, as if there were an array: - * char data[size - sizeof(upb_Message_InternalData)]; */ -} upb_Message_InternalData; - -typedef struct { - upb_Message_InternalData* internal; - /* Message data follows. */ -} upb_Message_Internal; - -/* Maps upb_CType -> memory size. */ -extern char _upb_CTypeo_size[12]; - -UPB_INLINE size_t upb_msg_sizeof(const upb_MiniTable* l) { - return l->size + sizeof(upb_Message_Internal); -} - -/* Inline version upb_Message_New(), for internal use */ -UPB_INLINE upb_Message* _upb_Message_New(const upb_MiniTable* mini_table, - upb_Arena* arena) { - size_t size = upb_msg_sizeof(mini_table); - void* mem = upb_Arena_Malloc(arena, size + sizeof(upb_Message_Internal)); - if (UPB_UNLIKELY(!mem)) return NULL; - upb_Message* msg = UPB_PTR_AT(mem, sizeof(upb_Message_Internal), upb_Message); - memset(mem, 0, size); - return msg; -} - -UPB_INLINE upb_Message_Internal* upb_Message_Getinternal(upb_Message* msg) { - ptrdiff_t size = sizeof(upb_Message_Internal); - return (upb_Message_Internal*)((char*)msg - size); -} - -/* Clears the given message. */ -void _upb_Message_Clear(upb_Message* msg, const upb_MiniTable* l); - -/* Discards the unknown fields for this message only. */ -void _upb_Message_DiscardUnknown_shallow(upb_Message* msg); - -/* Adds unknown data (serialized protobuf data) to the given message. The data - * is copied into the message instance. */ -bool _upb_Message_AddUnknown(upb_Message* msg, const char* data, size_t len, - upb_Arena* arena); - -/** upb_Message_Extension *****************************************************/ - -/* The internal representation of an extension is self-describing: it contains - * enough information that we can serialize it to binary format without needing - * to look it up in a upb_ExtensionRegistry. - * - * This representation allocates 16 bytes to data on 64-bit platforms. This is - * rather wasteful for scalars (in the extreme case of bool, it wastes 15 - * bytes). We accept this because we expect messages to be the most common - * extension type. */ -typedef struct { - const upb_MiniTableExtension* ext; - union { - upb_StringView str; - void* ptr; - char scalar_data[8]; - } data; -} upb_Message_Extension; - -/* Adds the given extension data to the given message. |ext| is copied into the - * message instance. This logically replaces any previously-added extension with - * this number */ -upb_Message_Extension* _upb_Message_GetOrCreateExtension( - upb_Message* msg, const upb_MiniTableExtension* ext, upb_Arena* arena); - -/* Returns an array of extensions for this message. Note: the array is - * ordered in reverse relative to the order of creation. */ -const upb_Message_Extension* _upb_Message_Getexts(const upb_Message* msg, - size_t* count); - -/* Returns an extension for the given field number, or NULL if no extension - * exists for this field number. */ -const upb_Message_Extension* _upb_Message_Getext( - const upb_Message* msg, const upb_MiniTableExtension* ext); - -void _upb_Message_Clearext(upb_Message* msg, const upb_MiniTableExtension* ext); - -// LINT.IfChange(presence_logic) - -/** Hasbit access *************************************************************/ - -UPB_INLINE size_t _upb_hasbit_ofs(size_t idx) { return idx / 8; } - -UPB_INLINE char _upb_hasbit_mask(size_t idx) { return 1 << (idx % 8); } - -UPB_INLINE bool _upb_hasbit(const upb_Message* msg, size_t idx) { - return (*UPB_PTR_AT(msg, _upb_hasbit_ofs(idx), const char) & - _upb_hasbit_mask(idx)) != 0; -} - -UPB_INLINE void _upb_sethas(const upb_Message* msg, size_t idx) { - (*UPB_PTR_AT(msg, _upb_hasbit_ofs(idx), char)) |= _upb_hasbit_mask(idx); -} - -UPB_INLINE void _upb_clearhas(const upb_Message* msg, size_t idx) { - (*UPB_PTR_AT(msg, _upb_hasbit_ofs(idx), char)) &= ~_upb_hasbit_mask(idx); -} - -UPB_INLINE size_t _upb_Message_Hasidx(const upb_MiniTableField* f) { - UPB_ASSERT(f->presence > 0); - return f->presence; -} - -UPB_INLINE bool _upb_hasbit_field(const upb_Message* msg, - const upb_MiniTableField* f) { - return _upb_hasbit(msg, _upb_Message_Hasidx(f)); -} - -UPB_INLINE void _upb_sethas_field(const upb_Message* msg, - const upb_MiniTableField* f) { - _upb_sethas(msg, _upb_Message_Hasidx(f)); -} - -UPB_INLINE void _upb_clearhas_field(const upb_Message* msg, - const upb_MiniTableField* f) { - _upb_clearhas(msg, _upb_Message_Hasidx(f)); -} - -/** Oneof case access *********************************************************/ - -UPB_INLINE uint32_t* _upb_oneofcase(upb_Message* msg, size_t case_ofs) { - return UPB_PTR_AT(msg, case_ofs, uint32_t); -} - -UPB_INLINE uint32_t _upb_getoneofcase(const void* msg, size_t case_ofs) { - return *UPB_PTR_AT(msg, case_ofs, uint32_t); -} - -UPB_INLINE size_t _upb_oneofcase_ofs(const upb_MiniTableField* f) { - UPB_ASSERT(f->presence < 0); - return ~(ptrdiff_t)f->presence; -} - -UPB_INLINE uint32_t* _upb_oneofcase_field(upb_Message* msg, - const upb_MiniTableField* f) { - return _upb_oneofcase(msg, _upb_oneofcase_ofs(f)); -} - -UPB_INLINE uint32_t _upb_getoneofcase_field(const upb_Message* msg, - const upb_MiniTableField* f) { - return _upb_getoneofcase(msg, _upb_oneofcase_ofs(f)); -} - -UPB_INLINE bool _upb_has_submsg_nohasbit(const upb_Message* msg, size_t ofs) { - return *UPB_PTR_AT(msg, ofs, const upb_Message*) != NULL; -} - -// LINT.ThenChange(GoogleInternalName2) - -/* Map entries aren't actually stored, they are only used during parsing. For - * parsing, it helps a lot if all map entry messages have the same layout. - * The compiler and def.c must ensure that all map entries have this layout. */ -typedef struct { - upb_Message_Internal internal; - union { - upb_StringView str; /* For str/bytes. */ - upb_value val; /* For all other types. */ - } k; - union { - upb_StringView str; /* For str/bytes. */ - upb_value val; /* For all other types. */ - } v; -} upb_MapEntry; - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#include "upb/port/undef.inc" +#include "upb/message/internal.h" #endif /* UPB_MSG_INT_H_ */ diff --git a/upb/reflection/def_pool.h b/upb/reflection/def_pool.h index dd02f73be9..abe6a3fdb9 100644 --- a/upb/reflection/def_pool.h +++ b/upb/reflection/def_pool.h @@ -30,6 +30,7 @@ #ifndef UPB_REFLECTION_DEF_POOL_H_ #define UPB_REFLECTION_DEF_POOL_H_ +#include "upb/base/status.h" #include "upb/base/string_view.h" #include "upb/reflection/common.h" #include "upb/reflection/def_type.h" diff --git a/upb/reflection/desc_state_internal.h b/upb/reflection/desc_state_internal.h index 8b431f1da9..f25724e92c 100644 --- a/upb/reflection/desc_state_internal.h +++ b/upb/reflection/desc_state_internal.h @@ -28,6 +28,7 @@ #ifndef UPB_REFLECTION_DESC_STATE_INTERNAL_H_ #define UPB_REFLECTION_DESC_STATE_INTERNAL_H_ +#include "upb/mem/arena.h" #include "upb/mini_table/encode_internal.h" // Must be last. diff --git a/upb/reflection/message.c b/upb/reflection/message.c index 2fa1d1d907..1a7f591bd7 100644 --- a/upb/reflection/message.c +++ b/upb/reflection/message.c @@ -31,7 +31,7 @@ #include "upb/collections/map.h" #include "upb/hash/common.h" -#include "upb/msg.h" +#include "upb/message/message.h" #include "upb/reflection/def_pool.h" #include "upb/reflection/def_type.h" #include "upb/reflection/field_def_internal.h" diff --git a/upb/wire/decode.c b/upb/wire/decode.c index a3167ffe11..61bd60fba7 100644 --- a/upb/wire/decode.c +++ b/upb/wire/decode.c @@ -31,6 +31,7 @@ #include "upb/collections/array_internal.h" #include "upb/collections/map_internal.h" +#include "upb/mini_table/enum_internal.h" #include "upb/wire/common_internal.h" #include "upb/wire/decode_internal.h" #include "upb/wire/swap_internal.h" diff --git a/upb/wire/decode.h b/upb/wire/decode.h index b693a8feaa..e0041737eb 100644 --- a/upb/wire/decode.h +++ b/upb/wire/decode.h @@ -30,9 +30,9 @@ #ifndef UPB_WIRE_DECODE_H_ #define UPB_WIRE_DECODE_H_ -#include "upb/mem/arena.h" #include "upb/extension_registry.h" -#include "upb/msg.h" +#include "upb/mem/arena.h" +#include "upb/message/message.h" // Must be last. #include "upb/port/def.inc" diff --git a/upb/wire/decode_fast.h b/upb/wire/decode_fast.h index d5e7853e25..da606a832a 100644 --- a/upb/wire/decode_fast.h +++ b/upb/wire/decode_fast.h @@ -62,7 +62,7 @@ #ifndef UPB_WIRE_DECODE_FAST_H_ #define UPB_WIRE_DECODE_FAST_H_ -#include "upb/msg.h" +#include "upb/message/message.h" // Must be last. #include "upb/port/def.inc" diff --git a/upb/wire/decode_internal.h b/upb/wire/decode_internal.h index dd3322bf9f..83892a5f33 100644 --- a/upb/wire/decode_internal.h +++ b/upb/wire/decode_internal.h @@ -34,7 +34,7 @@ #define UPB_WIRE_DECODE_INTERNAL_H_ #include "upb/mem/arena_internal.h" -#include "upb/msg_internal.h" +#include "upb/message/internal.h" #include "upb/wire/decode.h" #include "third_party/utf8_range/utf8_range.h" diff --git a/upb/wire/encode.h b/upb/wire/encode.h index 983b6da8dc..8067fcbeb6 100644 --- a/upb/wire/encode.h +++ b/upb/wire/encode.h @@ -25,14 +25,12 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* - * upb_Encode: parsing from a upb_Message using a upb_MiniTable. - */ +// upb_Encode: parsing from a upb_Message using a upb_MiniTable. #ifndef UPB_WIRE_ENCODE_H_ #define UPB_WIRE_ENCODE_H_ -#include "upb/msg.h" +#include "upb/message/message.h" // Must be last. #include "upb/port/def.inc" diff --git a/upbc/BUILD b/upbc/BUILD index 38d1e1ae0c..40f4ed9eeb 100644 --- a/upbc/BUILD +++ b/upbc/BUILD @@ -188,6 +188,7 @@ cc_binary( ":plugin_upb_proto_reflection", "//:json", "//:mini_table", + "//:mini_table_internal", "//:port", "//:reflection", "//:upb", diff --git a/upbc/file_layout.cc b/upbc/file_layout.cc index 9f0541c520..fab686f918 100644 --- a/upbc/file_layout.cc +++ b/upbc/file_layout.cc @@ -29,6 +29,7 @@ #include #include "upb/mini_table/encode_internal.hpp" +#include "upb/mini_table/extension_internal.h" #include "upbc/common.h" namespace upbc { diff --git a/upbc/file_layout.h b/upbc/file_layout.h index c57653798c..0d5f537487 100644 --- a/upbc/file_layout.h +++ b/upbc/file_layout.h @@ -32,6 +32,7 @@ #include "absl/strings/substitute.h" #include "upb/mini_table/decode.h" #include "upb/mini_table/encode_internal.hpp" +#include "upb/mini_table/extension_internal.h" #include "upb/upb.hpp" namespace upbc { diff --git a/upbc/protoc-gen-upb.cc b/upbc/protoc-gen-upb.cc index 6564f43d4e..b01ecd0326 100644 --- a/upbc/protoc-gen-upb.cc +++ b/upbc/protoc-gen-upb.cc @@ -34,6 +34,8 @@ #include "google/protobuf/descriptor.h" #include "google/protobuf/wire_format.h" #include "upb/mini_table/encode_internal.hpp" +#include "upb/mini_table/enum_internal.h" +#include "upb/mini_table/extension_internal.h" #include "upbc/common.h" #include "upbc/file_layout.h" #include "upbc/names.h" @@ -889,8 +891,9 @@ void WriteHeader(const FileLayout& layout, Output& output) { "#define $0_UPB_H_\n\n" "#include \"upb/collections/array_internal.h\"\n" "#include \"upb/collections/map_gencode_util.h\"\n" - "#include \"upb/mini_table/accessors.h\"\n" - "#include \"upb/msg_internal.h\"\n" + "#include \"upb/message/accessors.h\"\n" + "#include \"upb/message/internal.h\"\n" + "#include \"upb/mini_table/enum_internal.h\"\n" "#include \"upb/wire/decode.h\"\n" "#include \"upb/wire/decode_fast.h\"\n" "#include \"upb/wire/encode.h\"\n\n", @@ -1509,7 +1512,8 @@ void WriteSource(const FileLayout& layout, Output& output, output( "#include \n" "#include \"upb/collections/array_internal.h\"\n" - "#include \"upb/msg_internal.h\"\n" + "#include \"upb/message/internal.h\"\n" + "#include \"upb/mini_table/enum_internal.h\"\n" "#include \"$0\"\n", HeaderFilename(file));