diff --git a/DESIGN.md b/DESIGN.md index aa6a288280..41a2097e7f 100644 --- a/DESIGN.md +++ b/DESIGN.md @@ -123,7 +123,7 @@ static const upb_msglayout_field upb_test_MessageName__fields[2] = { {2, UPB_SIZE(8, 8), 2, 0, 5, _UPB_MODE_SCALAR}, }; -const upb_msglayout upb_test_MessageName_msginit = { +const upb_msglayout upb_test_MessageName_msg_init = { NULL, &upb_test_MessageName__fields[0], UPB_SIZE(16, 16), 2, false, 2, 255, diff --git a/python/descriptor.c b/python/descriptor.c index b30b348277..e7bef1214b 100644 --- a/python/descriptor.c +++ b/python/descriptor.c @@ -324,8 +324,7 @@ static PyObject* PyUpb_Descriptor_GetOneofs(PyObject* _self, void* closure) { static PyObject* PyUpb_Descriptor_GetOptions(PyObject* _self, PyObject* args) { PyUpb_DescriptorBase* self = (void*)_self; return PyUpb_DescriptorBase_GetOptions( - self, upb_MessageDef_Options(self->def), - &google_protobuf_MessageOptions_msginit, + self, upb_MessageDef_Options(self->def), &google_protobuf_MessageOptions_msg_init, PYUPB_DESCRIPTOR_PROTO_PACKAGE ".MessageOptions"); } @@ -333,7 +332,7 @@ static PyObject* PyUpb_Descriptor_CopyToProto(PyObject* _self, PyObject* py_proto) { return PyUpb_DescriptorBase_CopyToProto( _self, (PyUpb_ToProto_Func*)&upb_MessageDef_ToProto, - &google_protobuf_DescriptorProto_msginit, + &google_protobuf_DescriptorProto_msg_init, PYUPB_DESCRIPTOR_PROTO_PACKAGE ".DescriptorProto", py_proto); } @@ -734,17 +733,16 @@ static PyObject* PyUpb_EnumDescriptor_GetHasOptions(PyObject* _self, static PyObject* PyUpb_EnumDescriptor_GetOptions(PyObject* _self, PyObject* args) { PyUpb_DescriptorBase* self = (void*)_self; - return PyUpb_DescriptorBase_GetOptions(self, upb_EnumDef_Options(self->def), - &google_protobuf_EnumOptions_msginit, - PYUPB_DESCRIPTOR_PROTO_PACKAGE - ".EnumOptions"); + return PyUpb_DescriptorBase_GetOptions( + self, upb_EnumDef_Options(self->def), &google_protobuf_EnumOptions_msg_init, + PYUPB_DESCRIPTOR_PROTO_PACKAGE ".EnumOptions"); } static PyObject* PyUpb_EnumDescriptor_CopyToProto(PyObject* _self, PyObject* py_proto) { return PyUpb_DescriptorBase_CopyToProto( _self, (PyUpb_ToProto_Func*)&upb_EnumDef_ToProto, - &google_protobuf_EnumDescriptorProto_msginit, + &google_protobuf_EnumDescriptorProto_msg_init, PYUPB_DESCRIPTOR_PROTO_PACKAGE ".EnumDescriptorProto", py_proto); } @@ -825,7 +823,7 @@ static PyObject* PyUpb_EnumValueDescriptor_GetOptions(PyObject* _self, PyUpb_DescriptorBase* self = (void*)_self; return PyUpb_DescriptorBase_GetOptions( self, upb_EnumValueDef_Options(self->def), - &google_protobuf_EnumValueOptions_msginit, + &google_protobuf_EnumValueOptions_msg_init, PYUPB_DESCRIPTOR_PROTO_PACKAGE ".EnumValueOptions"); } @@ -1023,10 +1021,9 @@ static PyObject* PyUpb_FieldDescriptor_GetHasPresence( static PyObject* PyUpb_FieldDescriptor_GetOptions(PyObject* _self, PyObject* args) { PyUpb_DescriptorBase* self = (void*)_self; - return PyUpb_DescriptorBase_GetOptions(self, upb_FieldDef_Options(self->def), - &google_protobuf_FieldOptions_msginit, - PYUPB_DESCRIPTOR_PROTO_PACKAGE - ".FieldOptions"); + return PyUpb_DescriptorBase_GetOptions( + self, upb_FieldDef_Options(self->def), &google_protobuf_FieldOptions_msg_init, + PYUPB_DESCRIPTOR_PROTO_PACKAGE ".FieldOptions"); } static PyGetSetDef PyUpb_FieldDescriptor_Getters[] = { @@ -1163,7 +1160,7 @@ static PyObject* PyUpb_FileDescriptor_GetSerializedPb(PyObject* self, void* closure) { return PyUpb_DescriptorBase_GetSerializedProto( self, (PyUpb_ToProto_Func*)&upb_FileDef_ToProto, - &google_protobuf_FileDescriptorProto_msginit); + &google_protobuf_FileDescriptorProto_msg_init); } static PyObject* PyUpb_FileDescriptor_GetMessageTypesByName(PyObject* _self, @@ -1265,17 +1262,16 @@ static PyObject* PyUpb_FileDescriptor_GetHasOptions(PyObject* _self, static PyObject* PyUpb_FileDescriptor_GetOptions(PyObject* _self, PyObject* args) { PyUpb_DescriptorBase* self = (void*)_self; - return PyUpb_DescriptorBase_GetOptions(self, upb_FileDef_Options(self->def), - &google_protobuf_FileOptions_msginit, - PYUPB_DESCRIPTOR_PROTO_PACKAGE - ".FileOptions"); + return PyUpb_DescriptorBase_GetOptions( + self, upb_FileDef_Options(self->def), &google_protobuf_FileOptions_msg_init, + PYUPB_DESCRIPTOR_PROTO_PACKAGE ".FileOptions"); } static PyObject* PyUpb_FileDescriptor_CopyToProto(PyObject* _self, PyObject* py_proto) { return PyUpb_DescriptorBase_CopyToProto( _self, (PyUpb_ToProto_Func*)&upb_FileDef_ToProto, - &google_protobuf_FileDescriptorProto_msginit, + &google_protobuf_FileDescriptorProto_msg_init, PYUPB_DESCRIPTOR_PROTO_PACKAGE ".FileDescriptorProto", py_proto); } @@ -1379,17 +1375,16 @@ static PyObject* PyUpb_MethodDescriptor_GetOutputType(PyObject* self, static PyObject* PyUpb_MethodDescriptor_GetOptions(PyObject* _self, PyObject* args) { PyUpb_DescriptorBase* self = (void*)_self; - return PyUpb_DescriptorBase_GetOptions(self, upb_MethodDef_Options(self->def), - &google_protobuf_MethodOptions_msginit, - PYUPB_DESCRIPTOR_PROTO_PACKAGE - ".MethodOptions"); + return PyUpb_DescriptorBase_GetOptions( + self, upb_MethodDef_Options(self->def), &google_protobuf_MethodOptions_msg_init, + PYUPB_DESCRIPTOR_PROTO_PACKAGE ".MethodOptions"); } static PyObject* PyUpb_MethodDescriptor_CopyToProto(PyObject* _self, PyObject* py_proto) { return PyUpb_DescriptorBase_CopyToProto( _self, (PyUpb_ToProto_Func*)&upb_MethodDef_ToProto, - &google_protobuf_MethodDescriptorProto_msginit, + &google_protobuf_MethodDescriptorProto_msg_init, PYUPB_DESCRIPTOR_PROTO_PACKAGE ".MethodDescriptorProto", py_proto); } @@ -1484,10 +1479,9 @@ static PyObject* PyUpb_OneofDescriptor_GetFields(PyObject* _self, static PyObject* PyUpb_OneofDescriptor_GetOptions(PyObject* _self, PyObject* args) { PyUpb_DescriptorBase* self = (void*)_self; - return PyUpb_DescriptorBase_GetOptions(self, upb_OneofDef_Options(self->def), - &google_protobuf_OneofOptions_msginit, - PYUPB_DESCRIPTOR_PROTO_PACKAGE - ".OneofOptions"); + return PyUpb_DescriptorBase_GetOptions( + self, upb_OneofDef_Options(self->def), &google_protobuf_OneofOptions_msg_init, + PYUPB_DESCRIPTOR_PROTO_PACKAGE ".OneofOptions"); } static PyGetSetDef PyUpb_OneofDescriptor_Getters[] = { @@ -1586,8 +1580,7 @@ static PyObject* PyUpb_ServiceDescriptor_GetOptions(PyObject* _self, PyObject* args) { PyUpb_DescriptorBase* self = (void*)_self; return PyUpb_DescriptorBase_GetOptions( - self, upb_ServiceDef_Options(self->def), - &google_protobuf_ServiceOptions_msginit, + self, upb_ServiceDef_Options(self->def), &google_protobuf_ServiceOptions_msg_init, PYUPB_DESCRIPTOR_PROTO_PACKAGE ".ServiceOptions"); } @@ -1595,7 +1588,7 @@ static PyObject* PyUpb_ServiceDescriptor_CopyToProto(PyObject* _self, PyObject* py_proto) { return PyUpb_DescriptorBase_CopyToProto( _self, (PyUpb_ToProto_Func*)&upb_ServiceDef_ToProto, - &google_protobuf_ServiceDescriptorProto_msginit, + &google_protobuf_ServiceDescriptorProto_msg_init, PYUPB_DESCRIPTOR_PROTO_PACKAGE ".ServiceDescriptorProto", py_proto); } diff --git a/upb/mini_table_accessors_test.cc b/upb/mini_table_accessors_test.cc index 16274476ad..d79374d32e 100644 --- a/upb/mini_table_accessors_test.cc +++ b/upb/mini_table_accessors_test.cc @@ -64,12 +64,12 @@ const uint64_t kTestUInt64 = 0xFEDCBAFF87654321; const upb_MiniTable_Field* find_proto3_field(int field_number) { return upb_MiniTable_FindFieldByNumber( - &protobuf_test_messages_proto3_TestAllTypesProto3_msginit, field_number); + &protobuf_test_messages_proto3_TestAllTypesProto3_msg_init, field_number); } const upb_MiniTable_Field* find_proto2_field(int field_number) { return upb_MiniTable_FindFieldByNumber( - &protobuf_test_messages_proto2_TestAllTypesProto2_msginit, field_number); + &protobuf_test_messages_proto2_TestAllTypesProto2_msg_init, field_number); } TEST(GeneratedCode, HazzersProto2) { @@ -275,7 +275,7 @@ TEST(GeneratedCode, SubMessage) { const upb_MiniTable_Field* nested_message_a_field = upb_MiniTable_FindFieldByNumber( - &protobuf_test_messages_proto2_TestAllTypesProto2_NestedMessage_msginit, + &protobuf_test_messages_proto2_TestAllTypesProto2_NestedMessage_msg_init, kFieldOptionalNestedMessageA); EXPECT_EQ(5, upb_MiniTable_GetInt32(sub_message, nested_message_a_field)); @@ -292,7 +292,7 @@ TEST(GeneratedCode, SubMessage) { upb_MiniTable_SetMessage(msg, optional_message_field, new_nested_message); upb_Message* mutable_message = upb_MiniTable_GetMutableMessage( - msg, &protobuf_test_messages_proto2_TestAllTypesProto2_msginit, + msg, &protobuf_test_messages_proto2_TestAllTypesProto2_msg_init, optional_message_field, arena); EXPECT_EQ( true, @@ -365,10 +365,10 @@ TEST(GeneratedCode, GetMutableMessage) { const upb_MiniTable_Field* optional_message_field = find_proto2_field(kFieldOptionalNestedMessage); upb_Message* msg1 = upb_MiniTable_GetMutableMessage( - msg, &protobuf_test_messages_proto2_TestAllTypesProto2_msginit, + msg, &protobuf_test_messages_proto2_TestAllTypesProto2_msg_init, optional_message_field, arena); upb_Message* msg2 = upb_MiniTable_GetMutableMessage( - msg, &protobuf_test_messages_proto2_TestAllTypesProto2_msginit, + msg, &protobuf_test_messages_proto2_TestAllTypesProto2_msg_init, optional_message_field, arena); // Verify that newly constructed sub message is stored in msg. EXPECT_EQ(msg1, msg2); diff --git a/upb/msg_test.cc b/upb/msg_test.cc index d49e8ef5b0..33b103a87f 100644 --- a/upb/msg_test.cc +++ b/upb/msg_test.cc @@ -313,7 +313,7 @@ TEST(MessageTest, DecodeRequiredFieldsTopLevelMessage) { // Fails, because required fields are missing. EXPECT_EQ(kUpb_DecodeStatus_MissingRequired, - upb_Decode(NULL, 0, test_msg, &upb_test_TestRequiredFields_msginit, + upb_Decode(NULL, 0, test_msg, &upb_test_TestRequiredFields_msg_init, NULL, kUpb_DecodeOption_CheckRequired, arena.ptr())); upb_test_TestRequiredFields_set_required_int32(test_msg, 1); @@ -327,7 +327,7 @@ TEST(MessageTest, DecodeRequiredFieldsTopLevelMessage) { // payload is not empty. EXPECT_EQ(kUpb_DecodeStatus_MissingRequired, upb_Decode(serialized, size, test_msg, - &upb_test_TestRequiredFields_msginit, NULL, + &upb_test_TestRequiredFields_msg_init, NULL, kUpb_DecodeOption_CheckRequired, arena.ptr())); empty_msg = upb_test_EmptyMessage_new(arena.ptr()); @@ -337,7 +337,7 @@ TEST(MessageTest, DecodeRequiredFieldsTopLevelMessage) { // Succeeds, because required fields are present (though not in the input). EXPECT_EQ(kUpb_DecodeStatus_Ok, - upb_Decode(NULL, 0, test_msg, &upb_test_TestRequiredFields_msginit, + upb_Decode(NULL, 0, test_msg, &upb_test_TestRequiredFields_msg_init, NULL, kUpb_DecodeOption_CheckRequired, arena.ptr())); // Serialize a complete payload. @@ -356,7 +356,7 @@ TEST(MessageTest, DecodeRequiredFieldsTopLevelMessage) { test_msg2, upb_test_TestRequiredFields_new(arena.ptr())); EXPECT_EQ(kUpb_DecodeStatus_Ok, upb_Decode(serialized, size, test_msg2, - &upb_test_TestRequiredFields_msginit, NULL, + &upb_test_TestRequiredFields_msg_init, NULL, kUpb_DecodeOption_CheckRequired, arena.ptr())); } diff --git a/upbc/BUILD b/upbc/BUILD index d32ffa45a0..6d538282b2 100644 --- a/upbc/BUILD +++ b/upbc/BUILD @@ -68,8 +68,12 @@ upb_proto_reflection_library( cc_library( name = "common", - srcs = ["common.cc"], - hdrs = ["common.h"], + srcs = [ + "common.cc", + ], + hdrs = [ + "common.h", + ], copts = UPB_DEFAULT_CPPOPTS, deps = [ "@com_google_absl//absl/strings", @@ -77,6 +81,36 @@ cc_library( ], ) +cc_library( + name = "file_layout", + srcs = [ + "file_layout.cc", + ], + hdrs = [ + "file_layout.h", + ], + copts = UPB_DEFAULT_CPPOPTS, + deps = [ + ":common", + "//:mini_table", + "//:upb", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/strings", + "@com_google_protobuf//:protoc_lib", + ], +) + +cc_library( + name = "keywords", + srcs = [ + "keywords.cc", + ], + hdrs = [ + "keywords.h", + ], + copts = UPB_DEFAULT_CPPOPTS, +) + cc_binary( name = "protoc-gen-upb", srcs = ["protoc-gen-upb.cc"], @@ -84,6 +118,7 @@ cc_binary( visibility = ["//visibility:public"], deps = [ ":common", + ":file_layout", "//:mini_table", "//:port", "//:upb", diff --git a/upbc/common.cc b/upbc/common.cc index a4e162d814..9299eb0674 100644 --- a/upbc/common.cc +++ b/upbc/common.cc @@ -74,4 +74,12 @@ std::string HeaderFilename(const google::protobuf::FileDescriptor* file) { return StripExtension(file->name()) + ".upb.h"; } +std::string MessageInit(const protobuf::Descriptor* descriptor) { + return MessageName(descriptor) + "_msg_init"; +} + +std::string EnumInit(const protobuf::EnumDescriptor* descriptor) { + return ToCIdent(descriptor->full_name()) + "_enum_init"; +} + } // namespace upbc diff --git a/upbc/common.h b/upbc/common.h index 08de234383..5ec7ba2f0f 100644 --- a/upbc/common.h +++ b/upbc/common.h @@ -30,17 +30,16 @@ #include +#include "google/protobuf/io/zero_copy_stream.h" +#include "google/protobuf/descriptor.h" #include "absl/strings/str_replace.h" #include "absl/strings/substitute.h" -#include "google/protobuf/descriptor.h" -#include "google/protobuf/io/zero_copy_stream.h" namespace upbc { class Output { public: - Output(google::protobuf::io::ZeroCopyOutputStream* stream) - : stream_(stream) {} + Output(google::protobuf::io::ZeroCopyOutputStream* stream) : stream_(stream) {} ~Output() { stream_->BackUp((int)size_); } template @@ -101,12 +100,14 @@ class Output { std::string StripExtension(absl::string_view fname); std::string ToCIdent(absl::string_view str); std::string ToPreproc(absl::string_view str); -void EmitFileWarning(const google::protobuf::FileDescriptor* file, - Output& output); +void EmitFileWarning(const google::protobuf::FileDescriptor* file, Output& output); std::string MessageName(const google::protobuf::Descriptor* descriptor); std::string FileLayoutName(const google::protobuf::FileDescriptor* file); std::string HeaderFilename(const google::protobuf::FileDescriptor* file); +std::string MessageInit(const google::protobuf::Descriptor* descriptor); +std::string EnumInit(const google::protobuf::EnumDescriptor* descriptor); + } // namespace upbc #endif // UPBC_COMMON_H diff --git a/upbc/file_layout.cc b/upbc/file_layout.cc new file mode 100644 index 0000000000..49268aab27 --- /dev/null +++ b/upbc/file_layout.cc @@ -0,0 +1,383 @@ +// Copyright (c) 2009-2021, Google LLC +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Google LLC nor the +// names of its contributors may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, +// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "upbc/file_layout.h" + +#include +#include + +#include "upb/mini_table.hpp" +#include "upbc/common.h" + +namespace upbc { + +namespace protobuf = ::google::protobuf; + +const char* kEnumsInit = "enums_layout"; +const char* kExtensionsInit = "extensions_layout"; +const char* kMessagesInit = "messages_layout"; + +void AddEnums(const protobuf::Descriptor* message, + std::vector* enums) { + enums->reserve(enums->size() + message->enum_type_count()); + for (int i = 0; i < message->enum_type_count(); i++) { + enums->push_back(message->enum_type(i)); + } + for (int i = 0; i < message->nested_type_count(); i++) { + AddEnums(message->nested_type(i), enums); + } +} + +std::vector SortedEnums( + const protobuf::FileDescriptor* file) { + std::vector enums; + enums.reserve(file->enum_type_count()); + for (int i = 0; i < file->enum_type_count(); i++) { + enums.push_back(file->enum_type(i)); + } + for (int i = 0; i < file->message_type_count(); i++) { + AddEnums(file->message_type(i), &enums); + } + return enums; +} + +std::vector SortedUniqueEnumNumbers( + const protobuf::EnumDescriptor* e) { + std::vector values; + values.reserve(e->value_count()); + for (int i = 0; i < e->value_count(); i++) { + values.push_back(static_cast(e->value(i)->number())); + } + std::sort(values.begin(), values.end()); + auto last = std::unique(values.begin(), values.end()); + values.erase(last, values.end()); + return values; +} + +void AddMessages(const protobuf::Descriptor* message, + std::vector* messages) { + messages->push_back(message); + for (int i = 0; i < message->nested_type_count(); i++) { + AddMessages(message->nested_type(i), messages); + } +} + +// Ordering must match upb/def.c! +// +// The ordering is significant because each upb_MessageDef* will point at the +// corresponding upb_MiniTable and we just iterate through the list without +// any search or lookup. +std::vector SortedMessages( + const protobuf::FileDescriptor* file) { + std::vector messages; + for (int i = 0; i < file->message_type_count(); i++) { + AddMessages(file->message_type(i), &messages); + } + return messages; +} + +void AddExtensionsFromMessage( + const protobuf::Descriptor* message, + std::vector* exts) { + for (int i = 0; i < message->extension_count(); i++) { + exts->push_back(message->extension(i)); + } + for (int i = 0; i < message->nested_type_count(); i++) { + AddExtensionsFromMessage(message->nested_type(i), exts); + } +} + +// Ordering must match upb/def.c! +// +// The ordering is significant because each upb_FieldDef* will point at the +// corresponding upb_MiniTable_Extension and we just iterate through the list +// without any search or lookup. +std::vector SortedExtensions( + const protobuf::FileDescriptor* file) { + std::vector ret; + for (int i = 0; i < file->extension_count(); i++) { + ret.push_back(file->extension(i)); + } + for (int i = 0; i < file->message_type_count(); i++) { + AddExtensionsFromMessage(file->message_type(i), &ret); + } + return ret; +} + +std::vector FieldNumberOrder( + const protobuf::Descriptor* message) { + std::vector fields; + for (int i = 0; i < message->field_count(); i++) { + fields.push_back(message->field(i)); + } + std::sort(fields.begin(), fields.end(), + [](const protobuf::FieldDescriptor* a, + const protobuf::FieldDescriptor* b) { + return a->number() < b->number(); + }); + return fields; +} + +upb_MiniTable* FilePlatformLayout::GetMiniTable( + const protobuf::Descriptor* m) const { + auto it = table_map_.find(m); + assert(it != table_map_.end()); + return it->second; +} + +upb_MiniTable_Enum* FilePlatformLayout::GetEnumTable( + const protobuf::EnumDescriptor* d) const { + auto it = enum_map_.find(d); + assert(it != enum_map_.end()); + return it->second; +} + +const upb_MiniTable_Extension* FilePlatformLayout::GetExtension( + const protobuf::FieldDescriptor* fd) const { + auto it = extension_map_.find(fd); + assert(it != extension_map_.end()); + return &it->second; +} + +void FilePlatformLayout::ResolveIntraFileReferences() { + // This properly resolves references within a file, in order to set any + // necessary flags (eg. is a map). + for (const auto& pair : table_map_) { + upb_MiniTable* mt = pair.second; + // First we properly resolve for defs within the file. + for (const auto* f : FieldNumberOrder(pair.first)) { + if (f->message_type() && f->message_type()->file() == f->file()) { + // const_cast is safe because the mini-table is owned exclusively + // by us, and was allocated from an arena (known-writable memory). + upb_MiniTable_Field* mt_f = const_cast( + upb_MiniTable_FindFieldByNumber(mt, f->number())); + upb_MiniTable* sub_mt = GetMiniTable(f->message_type()); + upb_MiniTable_SetSubMessage(mt, mt_f, sub_mt); + } + // We don't worry about enums here, because resolving an enum will + // never alter the mini-table. + } + } +} + +upb_MiniTable_Sub FilePlatformLayout::PackSub(const char* data, SubTag tag) { + uintptr_t val = reinterpret_cast(data); + assert((val & kMask) == 0); + upb_MiniTable_Sub sub; + sub.submsg = reinterpret_cast(val | tag); + return sub; +} + +bool FilePlatformLayout::IsNull(upb_MiniTable_Sub sub) { + return reinterpret_cast(sub.subenum) == 0; +} + +std::string FilePlatformLayout::GetSub(upb_MiniTable_Sub sub) { + uintptr_t as_int = reinterpret_cast(sub.submsg); + const char* str = reinterpret_cast(as_int & ~SubTag::kMask); + switch (as_int & SubTag::kMask) { + case SubTag::kMessage: + return absl::Substitute("{.submsg = &$0}", str); + case SubTag::kEnum: + return absl::Substitute("{.subenum = &$0}", str); + default: + return std::string("{.submsg = NULL}"); + } + return std::string("ERROR in GetSub"); +} + +void FilePlatformLayout::SetSubTableStrings() { + for (const auto& pair : table_map_) { + upb_MiniTable* mt = pair.second; + for (const auto* f : FieldNumberOrder(pair.first)) { + upb_MiniTable_Field* mt_f = const_cast( + upb_MiniTable_FindFieldByNumber(mt, f->number())); + assert(mt_f); + upb_MiniTable_Sub sub = PackSubForField(f, mt_f); + if (IsNull(sub)) continue; + // const_cast is safe because the mini-table is owned exclusively + // by us, and was allocated from an arena (known-writable memory). + *const_cast(&mt->subs[mt_f->submsg_index]) = sub; + } + } +} + +upb_MiniTable_Sub FilePlatformLayout::PackSubForField( + const protobuf::FieldDescriptor* f, const upb_MiniTable_Field* mt_f) { + if (mt_f->submsg_index == kUpb_NoSub) { + return PackSub(nullptr, SubTag::kNull); + } else if (f->message_type()) { + return PackSub(AllocStr(MessageInit(f->message_type())), SubTag::kMessage); + } else { + ABSL_ASSERT(f->enum_type()); + return PackSub(AllocStr(EnumInit(f->enum_type())), SubTag::kEnum); + } +} + +const char* FilePlatformLayout::AllocStr(absl::string_view str) { + char* ret = + static_cast(upb_Arena_Malloc(arena_.ptr(), str.size() + 1)); + memcpy(ret, str.data(), str.size()); + ret[str.size()] = '\0'; + return ret; +} + +void FilePlatformLayout::BuildMiniTables(const protobuf::FileDescriptor* fd) { + for (const auto& m : SortedMessages(fd)) { + table_map_[m] = MakeMiniTable(m); + } + for (const auto& e : SortedEnums(fd)) { + enum_map_[e] = MakeMiniTableEnum(e); + } + ResolveIntraFileReferences(); + SetSubTableStrings(); +} + +void FilePlatformLayout::BuildExtensions(const protobuf::FileDescriptor* fd) { + std::vector sorted = SortedExtensions(fd); + upb::Status status; + for (const auto* f : sorted) { + upb::MtDataEncoder e; + e.StartMessage(0); + e.PutField(static_cast(f->type()), f->number(), + GetFieldModifiers(f)); + upb_MiniTable_Extension& ext = extension_map_[f]; + upb_MiniTable_Sub sub; + bool ok = upb_MiniTable_BuildExtension(e.data().data(), e.data().size(), + &ext, sub, status.ptr()); + if (!ok) { + // TODO(haberman): Use ABSL CHECK() when it is available. + fprintf(stderr, "Error building mini-table: %s\n", + status.error_message()); + } + ABSL_ASSERT(ok); + ext.extendee = reinterpret_cast( + AllocStr(MessageInit(f->containing_type()))); + ext.sub = PackSubForField(f, &ext.field); + } +} + +upb_MiniTable* FilePlatformLayout::MakeMiniTable( + const protobuf::Descriptor* m) { + if (m->options().message_set_wire_format()) { + return upb_MiniTable_BuildMessageSet(platform_, arena_.ptr()); + } else if (m->options().map_entry()) { + return upb_MiniTable_BuildMapEntry( + static_cast(m->map_key()->type()), + static_cast(m->map_value()->type()), + m->map_value()->enum_type() && + m->map_value()->enum_type()->file()->syntax() == + protobuf::FileDescriptor::SYNTAX_PROTO3, + platform_, arena_.ptr()); + } else { + return MakeRegularMiniTable(m); + } +} + +upb_MiniTable* FilePlatformLayout::MakeRegularMiniTable( + const protobuf::Descriptor* m) { + upb::MtDataEncoder e; + e.StartMessage(GetMessageModifiers(m)); + for (const auto* f : FieldNumberOrder(m)) { + e.PutField(static_cast(f->type()), f->number(), + GetFieldModifiers(f)); + } + for (int i = 0; i < m->real_oneof_decl_count(); i++) { + const protobuf::OneofDescriptor* oneof = m->oneof_decl(i); + e.StartOneof(); + for (int j = 0; j < oneof->field_count(); j++) { + const protobuf::FieldDescriptor* f = oneof->field(j); + e.PutOneofField(f->number()); + } + } + absl::string_view str = e.data(); + upb::Status status; + upb_MiniTable* ret = upb_MiniTable_Build(str.data(), str.size(), platform_, + arena_.ptr(), status.ptr()); + if (!ret) { + fprintf(stderr, "Error building mini-table: %s\n", status.error_message()); + } + assert(ret); + return ret; +} + +upb_MiniTable_Enum* FilePlatformLayout::MakeMiniTableEnum( + const protobuf::EnumDescriptor* d) { + upb::Arena arena; + upb::MtDataEncoder e; + + e.StartEnum(); + for (uint32_t i : SortedUniqueEnumNumbers(d)) { + e.PutEnumValue(i); + } + e.EndEnum(); + + absl::string_view str = e.data(); + upb::Status status; + upb_MiniTable_Enum* ret = upb_MiniTable_BuildEnum(str.data(), str.size(), + arena_.ptr(), status.ptr()); + if (!ret) { + fprintf(stderr, "Error building mini-table: %s\n", status.error_message()); + } + assert(ret); + return ret; +} + +uint64_t FilePlatformLayout::GetMessageModifiers( + const protobuf::Descriptor* m) { + uint64_t ret = 0; + + if (m->file()->syntax() == protobuf::FileDescriptor::SYNTAX_PROTO3) { + ret |= kUpb_MessageModifier_ValidateUtf8; + ret |= kUpb_MessageModifier_DefaultIsPacked; + } + + if (m->extension_range_count() > 0) { + ret |= kUpb_MessageModifier_IsExtendable; + } + + assert(!m->options().map_entry()); + return ret; +} + +uint64_t FilePlatformLayout::GetFieldModifiers( + const protobuf::FieldDescriptor* f) { + uint64_t ret = 0; + + if (f->is_repeated()) ret |= kUpb_FieldModifier_IsRepeated; + if (f->is_required()) ret |= kUpb_FieldModifier_IsRequired; + if (f->is_packed()) ret |= kUpb_FieldModifier_IsPacked; + if (f->enum_type() && f->enum_type()->file()->syntax() == + protobuf::FileDescriptor::SYNTAX_PROTO2) { + ret |= kUpb_FieldModifier_IsClosedEnum; + } + if (f->is_optional() && !f->has_presence()) { + ret |= kUpb_FieldModifier_IsProto3Singular; + } + + return ret; +} + +} // namespace upbc diff --git a/upbc/file_layout.h b/upbc/file_layout.h new file mode 100644 index 0000000000..271db45c39 --- /dev/null +++ b/upbc/file_layout.h @@ -0,0 +1,211 @@ +// Copyright (c) 2009-2021, Google LLC +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Google LLC nor the +// names of its contributors may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, +// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef UPBC_FILE_LAYOUT_H +#define UPBC_FILE_LAYOUT_H + +#include + +#include "google/protobuf/descriptor.pb.h" +#include "absl/container/flat_hash_map.h" +#include "absl/strings/substitute.h" +#include "upb/mini_table.hpp" +#include "upb/upb.hpp" + +namespace upbc { + +namespace protoc = ::google::protobuf::compiler; +namespace protobuf = ::google::protobuf; + +std::vector SortedEnums( + const protobuf::FileDescriptor* file); + +// Ordering must match upb/def.c! +// +// The ordering is significant because each upb_MessageDef* will point at the +// corresponding upb_MiniTable and we just iterate through the list without +// any search or lookup. +std::vector SortedMessages( + const protobuf::FileDescriptor* file); + +// Ordering must match upb/def.c! +// +// The ordering is significant because each upb_FieldDef* will point at the +// corresponding upb_MiniTable_Extension and we just iterate through the list +// without any search or lookup. +std::vector SortedExtensions( + const protobuf::FileDescriptor* file); + +std::vector FieldNumberOrder( + const protobuf::Descriptor* message); + +//////////////////////////////////////////////////////////////////////////////// +// FilePlatformLayout +//////////////////////////////////////////////////////////////////////////////// + +// FilePlatformLayout builds and vends upb MiniTables for a given platform (32 +// or 64 bit). +class FilePlatformLayout { + public: + FilePlatformLayout(const protobuf::FileDescriptor* fd, + upb_MiniTablePlatform platform) + : platform_(platform) { + BuildMiniTables(fd); + BuildExtensions(fd); + } + + // Retrieves a upb MiniTable or Extension given a protobuf descriptor. The + // descriptor must be from this layout's file. + upb_MiniTable* GetMiniTable(const protobuf::Descriptor* m) const; + upb_MiniTable_Enum* GetEnumTable(const protobuf::EnumDescriptor* d) const; + const upb_MiniTable_Extension* GetExtension( + const protobuf::FieldDescriptor* fd) const; + + // Get the initializer for the given sub-message/sub-enum link. + static std::string GetSub(upb_MiniTable_Sub sub); + + private: + // Functions to build mini-tables for this file's messages and extensions. + void BuildMiniTables(const protobuf::FileDescriptor* fd); + void BuildExtensions(const protobuf::FileDescriptor* fd); + upb_MiniTable* MakeMiniTable(const protobuf::Descriptor* m); + upb_MiniTable* MakeRegularMiniTable(const protobuf::Descriptor* m); + upb_MiniTable_Enum* MakeMiniTableEnum(const protobuf::EnumDescriptor* d); + uint64_t GetMessageModifiers(const protobuf::Descriptor* m); + uint64_t GetFieldModifiers(const protobuf::FieldDescriptor* f); + void ResolveIntraFileReferences(); + + // When we are generating code, tables are linked to sub-tables via name (ie. + // a string) rather than by pointer. We need to emit an initializer like + // `&foo_sub_table`. To do this, we store `const char*` strings in all the + // links that would normally be pointers: + // field -> sub-message + // field -> enum table (proto2 only) + // extension -> extendee + // + // This requires a bit of reinterpret_cast<>(), but it's confined to a few + // functions. We tag the pointer so we know which member of the union to + // initialize. + enum SubTag { + kNull = 0, + kMessage = 1, + kEnum = 2, + kMask = 3, + }; + + static upb_MiniTable_Sub PackSub(const char* data, SubTag tag); + static bool IsNull(upb_MiniTable_Sub sub); + void SetSubTableStrings(); + upb_MiniTable_Sub PackSubForField(const protobuf::FieldDescriptor* f, + const upb_MiniTable_Field* mt_f); + const char* AllocStr(absl::string_view str); + + private: + using TableMap = + absl::flat_hash_map; + using EnumMap = + absl::flat_hash_map; + using ExtensionMap = absl::flat_hash_map; + upb::Arena arena_; + TableMap table_map_; + EnumMap enum_map_; + ExtensionMap extension_map_; + upb_MiniTablePlatform platform_; +}; + +//////////////////////////////////////////////////////////////////////////////// +// FileLayout +//////////////////////////////////////////////////////////////////////////////// + +// FileLayout is a pair of platform layouts: one for 32-bit and one for 64-bit. +class FileLayout { + public: + FileLayout(const protobuf::FileDescriptor* fd) + : descriptor_(fd), + layout32_(fd, kUpb_MiniTablePlatform_32Bit), + layout64_(fd, kUpb_MiniTablePlatform_64Bit) {} + + const protobuf::FileDescriptor* descriptor() const { return descriptor_; } + + const upb_MiniTable* GetMiniTable32(const protobuf::Descriptor* m) const { + return layout32_.GetMiniTable(m); + } + + const upb_MiniTable* GetMiniTable64(const protobuf::Descriptor* m) const { + return layout64_.GetMiniTable(m); + } + + const upb_MiniTable_Enum* GetEnumTable( + const protobuf::EnumDescriptor* d) const { + return layout64_.GetEnumTable(d); + } + + std::string GetFieldOffset(const protobuf::FieldDescriptor* f) const { + const upb_MiniTable_Field* f_32 = upb_MiniTable_FindFieldByNumber( + GetMiniTable32(f->containing_type()), f->number()); + const upb_MiniTable_Field* f_64 = upb_MiniTable_FindFieldByNumber( + GetMiniTable64(f->containing_type()), f->number()); + return absl::Substitute("UPB_SIZE($0, $1)", f_32->offset, f_64->offset); + } + + std::string GetOneofCaseOffset(const protobuf::OneofDescriptor* o) const { + const protobuf::FieldDescriptor* f = o->field(0); + const upb_MiniTable_Field* f_32 = upb_MiniTable_FindFieldByNumber( + GetMiniTable32(f->containing_type()), f->number()); + const upb_MiniTable_Field* f_64 = upb_MiniTable_FindFieldByNumber( + GetMiniTable64(f->containing_type()), f->number()); + return absl::Substitute("UPB_SIZE($0, $1)", ~f_32->presence, + ~f_64->presence); + } + + std::string GetMessageSize(const protobuf::Descriptor* d) const { + return absl::Substitute("UPB_SIZE($0, $1)", GetMiniTable32(d)->size, + GetMiniTable64(d)->size); + } + + int GetHasbitIndex(const protobuf::FieldDescriptor* f) const { + const upb_MiniTable_Field* f_64 = upb_MiniTable_FindFieldByNumber( + GetMiniTable64(f->containing_type()), f->number()); + return f_64->presence; + } + + bool HasHasbit(const protobuf::FieldDescriptor* f) const { + return GetHasbitIndex(f) > 0; + } + + const upb_MiniTable_Extension* GetExtension( + const protobuf::FieldDescriptor* f) const { + return layout64_.GetExtension(f); + } + + private: + const protobuf::FileDescriptor* descriptor_; + FilePlatformLayout layout32_; + FilePlatformLayout layout64_; +}; + +} // namespace upbc + +#endif // UPBC_FILE_LAYOUT_H diff --git a/upbc/keywords.cc b/upbc/keywords.cc new file mode 100644 index 0000000000..44a9ed5139 --- /dev/null +++ b/upbc/keywords.cc @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2007-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "upbc/keywords.h" + +#include +#include + +namespace upbc { + +static const char* const kKeywordList[] = { + // + "NULL", + "alignas", + "alignof", + "and", + "and_eq", + "asm", + "auto", + "bitand", + "bitor", + "bool", + "break", + "case", + "catch", + "char", + "class", + "compl", + "const", + "constexpr", + "const_cast", + "continue", + "decltype", + "default", + "delete", + "do", + "double", + "dynamic_cast", + "else", + "enum", + "explicit", + "export", + "extern", + "false", + "float", + "for", + "friend", + "goto", + "if", + "inline", + "int", + "long", + "mutable", + "namespace", + "new", + "noexcept", + "not", + "not_eq", + "nullptr", + "operator", + "or", + "or_eq", + "private", + "protected", + "public", + "register", + "reinterpret_cast", + "return", + "short", + "signed", + "sizeof", + "static", + "static_assert", + "static_cast", + "struct", + "switch", + "template", + "this", + "thread_local", + "throw", + "true", + "try", + "typedef", + "typeid", + "typename", + "union", + "unsigned", + "using", + "virtual", + "void", + "volatile", + "wchar_t", + "while", + "xor", + "xor_eq", +#ifdef PROTOBUF_FUTURE_CPP20_KEYWORDS // C++20 keywords. + "char8_t", + "char16_t", + "char32_t", + "concept", + "consteval", + "constinit", + "co_await", + "co_return", + "co_yield", + "requires", +#endif // !PROTOBUF_FUTURE_BREAKING_CHANGES +}; + +static std::unordered_set* MakeKeywordsMap() { + auto* result = new std::unordered_set(); + for (const auto keyword : kKeywordList) { + result->emplace(keyword); + } + return result; +} + +static std::unordered_set& kKeywords = *MakeKeywordsMap(); + +std::string ResolveKeywordConflict(const std::string& name) { + if (kKeywords.count(name) > 0) { + return name + "_"; + } + return name; +} + +} // namespace upbc diff --git a/upbc/keywords.h b/upbc/keywords.h new file mode 100644 index 0000000000..767c220f74 --- /dev/null +++ b/upbc/keywords.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UPB_PROTOS_GENERATOR_KEYWORDS_H +#define UPB_PROTOS_GENERATOR_KEYWORDS_H + +#include + +namespace upbc { + +// Resolves proto field name conflict with C++ reserved keywords. +std::string ResolveKeywordConflict(const std::string& name); + +} // namespace upbc + +#endif // UPB_PROTOS_GENERATOR_KEYWORDS_H diff --git a/upbc/protoc-gen-upb.cc b/upbc/protoc-gen-upb.cc index 524ebccaae..5e6f8d209c 100644 --- a/upbc/protoc-gen-upb.cc +++ b/upbc/protoc-gen-upb.cc @@ -25,18 +25,19 @@ #include -#include "absl/container/flat_hash_map.h" -#include "absl/container/flat_hash_set.h" -#include "absl/strings/ascii.h" -#include "absl/strings/substitute.h" #include "google/protobuf/compiler/code_generator.h" #include "google/protobuf/compiler/plugin.h" -#include "google/protobuf/descriptor.h" #include "google/protobuf/descriptor.pb.h" +#include "google/protobuf/descriptor.h" #include "google/protobuf/wire_format.h" +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" +#include "absl/strings/ascii.h" +#include "absl/strings/substitute.h" #include "upb/mini_table.hpp" #include "upb/upb.hpp" #include "upbc/common.h" +#include "upbc/file_layout.h" // Must be last. #include "upb/port_def.inc" @@ -70,11 +71,11 @@ std::string SourceFilename(const google::protobuf::FileDescriptor* file) { } std::string MessageInit(const protobuf::Descriptor* descriptor) { - return MessageName(descriptor) + "_msginit"; + return MessageName(descriptor) + "_msg_init"; } std::string EnumInit(const protobuf::EnumDescriptor* descriptor) { - return ToCIdent(descriptor->full_name()) + "_enuminit"; + return ToCIdent(descriptor->full_name()) + "_enum_init"; } std::string ExtensionIdentBase(const protobuf::FieldDescriptor* ext) { @@ -119,83 +120,6 @@ std::vector SortedEnums( return enums; } -std::vector SortedUniqueEnumNumbers( - const protobuf::EnumDescriptor* e) { - std::vector values; - values.reserve(e->value_count()); - for (int i = 0; i < e->value_count(); i++) { - values.push_back(static_cast(e->value(i)->number())); - } - std::sort(values.begin(), values.end()); - auto last = std::unique(values.begin(), values.end()); - values.erase(last, values.end()); - return values; -} - -void AddMessages(const protobuf::Descriptor* message, - std::vector* messages) { - messages->push_back(message); - for (int i = 0; i < message->nested_type_count(); i++) { - AddMessages(message->nested_type(i), messages); - } -} - -// Ordering must match upb/def.c! -// -// The ordering is significant because each upb_MessageDef* will point at the -// corresponding upb_MiniTable and we just iterate through the list without -// any search or lookup. -std::vector SortedMessages( - const protobuf::FileDescriptor* file) { - std::vector messages; - for (int i = 0; i < file->message_type_count(); i++) { - AddMessages(file->message_type(i), &messages); - } - return messages; -} - -void AddExtensionsFromMessage( - const protobuf::Descriptor* message, - std::vector* exts) { - for (int i = 0; i < message->extension_count(); i++) { - exts->push_back(message->extension(i)); - } - for (int i = 0; i < message->nested_type_count(); i++) { - AddExtensionsFromMessage(message->nested_type(i), exts); - } -} - -// Ordering must match upb/def.c! -// -// The ordering is significant because each upb_FieldDef* will point at the -// corresponding upb_MiniTable_Extension and we just iterate through the list -// without any search or lookup. -std::vector SortedExtensions( - const protobuf::FileDescriptor* file) { - std::vector ret; - for (int i = 0; i < file->extension_count(); i++) { - ret.push_back(file->extension(i)); - } - for (int i = 0; i < file->message_type_count(); i++) { - AddExtensionsFromMessage(file->message_type(i), &ret); - } - return ret; -} - -std::vector FieldNumberOrder( - const protobuf::Descriptor* message) { - std::vector fields; - for (int i = 0; i < message->field_count(); i++) { - fields.push_back(message->field(i)); - } - std::sort(fields.begin(), fields.end(), - [](const protobuf::FieldDescriptor* a, - const protobuf::FieldDescriptor* b) { - return a->number() < b->number(); - }); - return fields; -} - std::string EnumValueSymbol(const protobuf::EnumValueDescriptor* value) { return ToCIdent(value->full_name()); } @@ -332,392 +256,6 @@ std::string CTypeConst(const protobuf::FieldDescriptor* field) { return CTypeInternal(field, true); } -//////////////////////////////////////////////////////////////////////////////// -// FilePlatformLayout -//////////////////////////////////////////////////////////////////////////////// - -// FilePlatformLayout builds and vends upb MiniTables for a given platform (32 -// or 64 bit). -class FilePlatformLayout { - public: - FilePlatformLayout(const protobuf::FileDescriptor* fd, - upb_MiniTablePlatform platform) - : platform_(platform) { - BuildMiniTables(fd); - BuildExtensions(fd); - } - - // Retrieves a upb MiniTable or Extension given a protobuf descriptor. The - // descriptor must be from this layout's file. - upb_MiniTable* GetMiniTable(const protobuf::Descriptor* m) const; - upb_MiniTable_Enum* GetEnumTable(const protobuf::EnumDescriptor* d) const; - const upb_MiniTable_Extension* GetExtension( - const protobuf::FieldDescriptor* fd) const; - - // Get the initializer for the given sub-message/sub-enum link. - static std::string GetSub(upb_MiniTable_Sub sub); - - private: - // Functions to build mini-tables for this file's messages and extensions. - void BuildMiniTables(const protobuf::FileDescriptor* fd); - void BuildExtensions(const protobuf::FileDescriptor* fd); - upb_MiniTable* MakeMiniTable(const protobuf::Descriptor* m); - upb_MiniTable* MakeRegularMiniTable(const protobuf::Descriptor* m); - upb_MiniTable_Enum* MakeMiniTableEnum(const protobuf::EnumDescriptor* d); - uint64_t GetMessageModifiers(const protobuf::Descriptor* m); - uint64_t GetFieldModifiers(const protobuf::FieldDescriptor* f); - void ResolveIntraFileReferences(); - - // When we are generating code, tables are linked to sub-tables via name (ie. - // a string) rather than by pointer. We need to emit an initializer like - // `&foo_sub_table`. To do this, we store `const char*` strings in all the - // links that would normally be pointers: - // field -> sub-message - // field -> enum table (proto2 only) - // extension -> extendee - // - // This requires a bit of reinterpret_cast<>(), but it's confined to a few - // functions. We tag the pointer so we know which member of the union to - // initialize. - enum SubTag { - kNull = 0, - kMessage = 1, - kEnum = 2, - kMask = 3, - }; - - static upb_MiniTable_Sub PackSub(const char* data, SubTag tag); - static bool IsNull(upb_MiniTable_Sub sub); - void SetSubTableStrings(); - upb_MiniTable_Sub PackSubForField(const protobuf::FieldDescriptor* f, - const upb_MiniTable_Field* mt_f); - const char* AllocStr(absl::string_view str); - - private: - using TableMap = - absl::flat_hash_map; - using EnumMap = - absl::flat_hash_map; - using ExtensionMap = absl::flat_hash_map; - upb::Arena arena_; - TableMap table_map_; - EnumMap enum_map_; - ExtensionMap extension_map_; - upb_MiniTablePlatform platform_; -}; - -upb_MiniTable* FilePlatformLayout::GetMiniTable( - const protobuf::Descriptor* m) const { - auto it = table_map_.find(m); - assert(it != table_map_.end()); - return it->second; -} - -upb_MiniTable_Enum* FilePlatformLayout::GetEnumTable( - const protobuf::EnumDescriptor* d) const { - auto it = enum_map_.find(d); - assert(it != enum_map_.end()); - return it->second; -} - -const upb_MiniTable_Extension* FilePlatformLayout::GetExtension( - const protobuf::FieldDescriptor* fd) const { - auto it = extension_map_.find(fd); - assert(it != extension_map_.end()); - return &it->second; -} - -void FilePlatformLayout::ResolveIntraFileReferences() { - // This properly resolves references within a file, in order to set any - // necessary flags (eg. is a map). - for (const auto& pair : table_map_) { - upb_MiniTable* mt = pair.second; - // First we properly resolve for defs within the file. - for (const auto* f : FieldNumberOrder(pair.first)) { - if (f->message_type() && f->message_type()->file() == f->file()) { - // const_cast is safe because the mini-table is owned exclusively - // by us, and was allocated from an arena (known-writable memory). - upb_MiniTable_Field* mt_f = const_cast( - upb_MiniTable_FindFieldByNumber(mt, f->number())); - upb_MiniTable* sub_mt = GetMiniTable(f->message_type()); - upb_MiniTable_SetSubMessage(mt, mt_f, sub_mt); - } - // We don't worry about enums here, because resolving an enum will - // never alter the mini-table. - } - } -} - -upb_MiniTable_Sub FilePlatformLayout::PackSub(const char* data, SubTag tag) { - uintptr_t val = reinterpret_cast(data); - assert((val & kMask) == 0); - upb_MiniTable_Sub sub; - sub.submsg = reinterpret_cast(val | tag); - return sub; -} - -bool FilePlatformLayout::IsNull(upb_MiniTable_Sub sub) { - return reinterpret_cast(sub.subenum) == 0; -} - -std::string FilePlatformLayout::GetSub(upb_MiniTable_Sub sub) { - uintptr_t as_int = reinterpret_cast(sub.submsg); - const char* str = reinterpret_cast(as_int & ~SubTag::kMask); - switch (as_int & SubTag::kMask) { - case SubTag::kMessage: - return absl::Substitute("{.submsg = &$0}", str); - case SubTag::kEnum: - return absl::Substitute("{.subenum = &$0}", str); - default: - return std::string("{.submsg = NULL}"); - } - return std::string("ERROR in GetSub"); -} - -void FilePlatformLayout::SetSubTableStrings() { - for (const auto& pair : table_map_) { - upb_MiniTable* mt = pair.second; - for (const auto* f : FieldNumberOrder(pair.first)) { - upb_MiniTable_Field* mt_f = const_cast( - upb_MiniTable_FindFieldByNumber(mt, f->number())); - assert(mt_f); - upb_MiniTable_Sub sub = PackSubForField(f, mt_f); - if (IsNull(sub)) continue; - // const_cast is safe because the mini-table is owned exclusively - // by us, and was allocated from an arena (known-writable memory). - *const_cast(&mt->subs[mt_f->submsg_index]) = sub; - } - } -} - -upb_MiniTable_Sub FilePlatformLayout::PackSubForField( - const protobuf::FieldDescriptor* f, const upb_MiniTable_Field* mt_f) { - if (mt_f->submsg_index == kUpb_NoSub) { - return PackSub(nullptr, SubTag::kNull); - } else if (f->message_type()) { - return PackSub(AllocStr(MessageInit(f->message_type())), SubTag::kMessage); - } else { - ABSL_ASSERT(f->enum_type()); - return PackSub(AllocStr(EnumInit(f->enum_type())), SubTag::kEnum); - } -} - -const char* FilePlatformLayout::AllocStr(absl::string_view str) { - char* ret = - static_cast(upb_Arena_Malloc(arena_.ptr(), str.size() + 1)); - memcpy(ret, str.data(), str.size()); - ret[str.size()] = '\0'; - return ret; -} - -void FilePlatformLayout::BuildMiniTables(const protobuf::FileDescriptor* fd) { - for (const auto& m : SortedMessages(fd)) { - table_map_[m] = MakeMiniTable(m); - } - for (const auto& e : SortedEnums(fd)) { - enum_map_[e] = MakeMiniTableEnum(e); - } - ResolveIntraFileReferences(); - SetSubTableStrings(); -} - -void FilePlatformLayout::BuildExtensions(const protobuf::FileDescriptor* fd) { - std::vector sorted = SortedExtensions(fd); - upb::Status status; - for (const auto* f : sorted) { - upb::MtDataEncoder e; - e.StartMessage(0); - e.PutField(static_cast(f->type()), f->number(), - GetFieldModifiers(f)); - upb_MiniTable_Extension& ext = extension_map_[f]; - upb_MiniTable_Sub sub; - bool ok = upb_MiniTable_BuildExtension(e.data().data(), e.data().size(), - &ext, sub, status.ptr()); - if (!ok) { - // TODO(haberman): Use ABSL CHECK() when it is available. - fprintf(stderr, "Error building mini-table: %s\n", - status.error_message()); - } - ABSL_ASSERT(ok); - ext.extendee = reinterpret_cast( - AllocStr(MessageInit(f->containing_type()))); - ext.sub = PackSubForField(f, &ext.field); - } -} - -upb_MiniTable* FilePlatformLayout::MakeMiniTable( - const protobuf::Descriptor* m) { - if (m->options().message_set_wire_format()) { - return upb_MiniTable_BuildMessageSet(platform_, arena_.ptr()); - } else if (m->options().map_entry()) { - return upb_MiniTable_BuildMapEntry( - static_cast(m->map_key()->type()), - static_cast(m->map_value()->type()), - m->map_value()->enum_type() && - m->map_value()->enum_type()->file()->syntax() == - protobuf::FileDescriptor::SYNTAX_PROTO3, - platform_, arena_.ptr()); - } else { - return MakeRegularMiniTable(m); - } -} - -upb_MiniTable* FilePlatformLayout::MakeRegularMiniTable( - const protobuf::Descriptor* m) { - upb::MtDataEncoder e; - e.StartMessage(GetMessageModifiers(m)); - for (const auto* f : FieldNumberOrder(m)) { - e.PutField(static_cast(f->type()), f->number(), - GetFieldModifiers(f)); - } - for (int i = 0; i < m->real_oneof_decl_count(); i++) { - const protobuf::OneofDescriptor* oneof = m->oneof_decl(i); - e.StartOneof(); - for (int j = 0; j < oneof->field_count(); j++) { - const protobuf::FieldDescriptor* f = oneof->field(j); - e.PutOneofField(f->number()); - } - } - absl::string_view str = e.data(); - upb::Status status; - upb_MiniTable* ret = upb_MiniTable_Build(str.data(), str.size(), platform_, - arena_.ptr(), status.ptr()); - if (!ret) { - fprintf(stderr, "Error building mini-table: %s\n", status.error_message()); - } - assert(ret); - return ret; -} - -upb_MiniTable_Enum* FilePlatformLayout::MakeMiniTableEnum( - const protobuf::EnumDescriptor* d) { - upb::Arena arena; - upb::MtDataEncoder e; - - e.StartEnum(); - for (uint32_t i : SortedUniqueEnumNumbers(d)) { - e.PutEnumValue(i); - } - e.EndEnum(); - - absl::string_view str = e.data(); - upb::Status status; - upb_MiniTable_Enum* ret = upb_MiniTable_BuildEnum(str.data(), str.size(), - arena_.ptr(), status.ptr()); - if (!ret) { - fprintf(stderr, "Error building mini-table: %s\n", status.error_message()); - } - assert(ret); - return ret; -} - -uint64_t FilePlatformLayout::GetMessageModifiers( - const protobuf::Descriptor* m) { - uint64_t ret = 0; - - if (m->file()->syntax() == protobuf::FileDescriptor::SYNTAX_PROTO3) { - ret |= kUpb_MessageModifier_ValidateUtf8; - ret |= kUpb_MessageModifier_DefaultIsPacked; - } - - if (m->extension_range_count() > 0) { - ret |= kUpb_MessageModifier_IsExtendable; - } - - assert(!m->options().map_entry()); - return ret; -} - -uint64_t FilePlatformLayout::GetFieldModifiers( - const protobuf::FieldDescriptor* f) { - uint64_t ret = 0; - - if (f->is_repeated()) ret |= kUpb_FieldModifier_IsRepeated; - if (f->is_required()) ret |= kUpb_FieldModifier_IsRequired; - if (f->is_packed()) ret |= kUpb_FieldModifier_IsPacked; - if (f->enum_type() && f->enum_type()->file()->syntax() == - protobuf::FileDescriptor::SYNTAX_PROTO2) { - ret |= kUpb_FieldModifier_IsClosedEnum; - } - if (f->is_optional() && !f->has_presence()) { - ret |= kUpb_FieldModifier_IsProto3Singular; - } - - return ret; -} - -//////////////////////////////////////////////////////////////////////////////// -// FileLayout -//////////////////////////////////////////////////////////////////////////////// - -// FileLayout is a pair of platform layouts: one for 32-bit and one for 64-bit. -class FileLayout { - public: - FileLayout(const protobuf::FileDescriptor* fd) - : descriptor_(fd), - layout32_(fd, kUpb_MiniTablePlatform_32Bit), - layout64_(fd, kUpb_MiniTablePlatform_64Bit) {} - - const protobuf::FileDescriptor* descriptor() const { return descriptor_; } - - const upb_MiniTable* GetMiniTable32(const protobuf::Descriptor* m) const { - return layout32_.GetMiniTable(m); - } - - const upb_MiniTable* GetMiniTable64(const protobuf::Descriptor* m) const { - return layout64_.GetMiniTable(m); - } - - const upb_MiniTable_Enum* GetEnumTable( - const protobuf::EnumDescriptor* d) const { - return layout64_.GetEnumTable(d); - } - - std::string GetFieldOffset(const protobuf::FieldDescriptor* f) const { - const upb_MiniTable_Field* f_32 = upb_MiniTable_FindFieldByNumber( - GetMiniTable32(f->containing_type()), f->number()); - const upb_MiniTable_Field* f_64 = upb_MiniTable_FindFieldByNumber( - GetMiniTable64(f->containing_type()), f->number()); - return absl::Substitute("UPB_SIZE($0, $1)", f_32->offset, f_64->offset); - } - - std::string GetOneofCaseOffset(const protobuf::OneofDescriptor* o) const { - const protobuf::FieldDescriptor* f = o->field(0); - const upb_MiniTable_Field* f_32 = upb_MiniTable_FindFieldByNumber( - GetMiniTable32(f->containing_type()), f->number()); - const upb_MiniTable_Field* f_64 = upb_MiniTable_FindFieldByNumber( - GetMiniTable64(f->containing_type()), f->number()); - return absl::Substitute("UPB_SIZE($0, $1)", ~f_32->presence, - ~f_64->presence); - } - - std::string GetMessageSize(const protobuf::Descriptor* d) const { - return absl::Substitute("UPB_SIZE($0, $1)", GetMiniTable32(d)->size, - GetMiniTable64(d)->size); - } - - int GetHasbitIndex(const protobuf::FieldDescriptor* f) const { - const upb_MiniTable_Field* f_64 = upb_MiniTable_FindFieldByNumber( - GetMiniTable64(f->containing_type()), f->number()); - return f_64->presence; - } - - bool HasHasbit(const protobuf::FieldDescriptor* f) const { - return GetHasbitIndex(f) > 0; - } - - const upb_MiniTable_Extension* GetExtension( - const protobuf::FieldDescriptor* f) const { - return layout64_.GetExtension(f); - } - - private: - const protobuf::FileDescriptor* descriptor_; - FilePlatformLayout layout32_; - FilePlatformLayout layout64_; -}; - void DumpEnumValues(const protobuf::EnumDescriptor* desc, Output& output) { std::vector values; for (int i = 0; i < desc->value_count(); i++) {