From 60a3edb87f63aa865ed7554dac9bd4cce131c4cb Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 19 Feb 2022 16:20:31 -0800 Subject: [PATCH] Code is in place to generate from upbc, but segv results. --- BUILD | 5 +- cmake/CMakeLists.txt | 3 +- upb/mini_table_test.cc | 99 +++++++++------------------------- upbc/message_layout.h | 2 + upbc/protoc-gen-upb.cc | 120 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 152 insertions(+), 77 deletions(-) diff --git a/BUILD b/BUILD index 8cdddf36ec..421a21ad3c 100644 --- a/BUILD +++ b/BUILD @@ -112,7 +112,10 @@ cc_library( cc_library( name = "mini_table", srcs = ["upb/mini_table.c"], - hdrs = ["upb/mini_table.h"], + hdrs = [ + "upb/mini_table.h", + "upb/mini_table.hpp", + ], copts = UPB_DEFAULT_COPTS, visibility = ["//visibility:public"], deps = [":upb"], diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 4fa31df835..88c6563bf9 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -83,7 +83,8 @@ target_link_libraries(upb /third_party/utf8_range) add_library(mini_table ../upb/mini_table.c - ../upb/mini_table.h) + ../upb/mini_table.h + ../upb/mini_table.hpp) target_link_libraries(mini_table upb) add_library(fastdecode diff --git a/upb/mini_table_test.cc b/upb/mini_table_test.cc index 8f69699d05..32adc2c50a 100644 --- a/upb/mini_table_test.cc +++ b/upb/mini_table_test.cc @@ -25,7 +25,7 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include "upb/mini_table.h" +#include "upb/mini_table.hpp" #include "absl/container/flat_hash_set.h" #include "gmock/gmock.h" @@ -33,53 +33,6 @@ #include "upb/msg_internal.h" #include "upb/upb.hpp" -class StringAppender { - public: - StringAppender(upb_MtDataEncoder* e, std::string* str) : str_(str) { - e->end = buf_ + sizeof(buf_); - } - - template - bool operator()(T&& func) { - char* end = func(buf_); - if (!end) return false; - str_->append(buf_, end - buf_); - return true; - } - - private: - char buf_[kUpb_MtDataEncoder_MinSize]; - std::string* str_; -}; - -// We can consider putting these in a standard upb .hpp header. -bool StartMessage(upb_MtDataEncoder* e, uint64_t msg_mod, std::string* out) { - StringAppender appender(e, out); - return appender([=](char* buf) { - return upb_MtDataEncoder_StartMessage(e, buf, msg_mod); - }); -} - -bool PutField(upb_MtDataEncoder* e, upb_FieldType type, uint32_t field_num, - uint64_t field_mod, std::string* out) { - StringAppender appender(e, out); - return appender([=](char* buf) { - return upb_MtDataEncoder_PutField(e, buf, type, field_num, field_mod); - }); -} - -bool StartOneof(upb_MtDataEncoder* e, std::string* out) { - StringAppender appender(e, out); - return appender([=](char* buf) { return upb_MtDataEncoder_StartOneof(e, buf); }); -} - -bool PutOneofField(upb_MtDataEncoder* e, uint32_t field_num, std::string* out) { - StringAppender appender(e, out); - return appender([=](char* buf) { - return upb_MtDataEncoder_PutOneofField(e, buf, field_num); - }); -} - class MiniTableTest : public testing::TestWithParam {}; TEST_P(MiniTableTest, Empty) { @@ -94,18 +47,17 @@ TEST_P(MiniTableTest, Empty) { TEST_P(MiniTableTest, AllScalarTypes) { upb::Arena arena; - std::string input; - upb_MtDataEncoder e; - ASSERT_TRUE(StartMessage(&e, 0, &input)); + upb::MtDataEncoder e; + ASSERT_TRUE(e.StartMessage(0)); int count = 0; for (int i = kUpb_FieldType_Double ; i < kUpb_FieldType_SInt64; i++) { - ASSERT_TRUE(PutField(&e, static_cast(i), i, 0, &input)); + ASSERT_TRUE(e.PutField(static_cast(i), i, 0)); count++; } - fprintf(stderr, "YO: %s\n", input.c_str()); + fprintf(stderr, "YO: %s\n", e.data().c_str()); upb::Status status; upb_MiniTable* table = upb_MiniTable_Build( - input.data(), input.size(), GetParam(), arena.ptr(), status.ptr()); + e.data().data(), e.data().size(), GetParam(), arena.ptr(), status.ptr()); ASSERT_NE(nullptr, table); EXPECT_EQ(count, table->field_count); absl::flat_hash_set offsets; @@ -122,19 +74,18 @@ TEST_P(MiniTableTest, AllScalarTypes) { TEST_P(MiniTableTest, AllRepeatedTypes) { upb::Arena arena; - std::string input; - upb_MtDataEncoder e; - ASSERT_TRUE(StartMessage(&e, 0, &input)); + upb::MtDataEncoder e; + ASSERT_TRUE(e.StartMessage(0)); int count = 0; for (int i = kUpb_FieldType_Double ; i < kUpb_FieldType_SInt64; i++) { - ASSERT_TRUE(PutField(&e, static_cast(i), i, - kUpb_FieldModifier_IsRepeated, &input)); + ASSERT_TRUE(e.PutField(static_cast(i), i, + kUpb_FieldModifier_IsRepeated)); count++; } - fprintf(stderr, "YO: %s\n", input.c_str()); + fprintf(stderr, "YO: %s\n", e.data().c_str()); upb::Status status; upb_MiniTable* table = upb_MiniTable_Build( - input.data(), input.size(), GetParam(), arena.ptr(), status.ptr()); + e.data().data(), e.data().size(), GetParam(), arena.ptr(), status.ptr()); ASSERT_NE(nullptr, table); EXPECT_EQ(count, table->field_count); absl::flat_hash_set offsets; @@ -151,21 +102,20 @@ TEST_P(MiniTableTest, AllRepeatedTypes) { TEST_P(MiniTableTest, Skips) { upb::Arena arena; - std::string input; - upb_MtDataEncoder e; - ASSERT_TRUE(StartMessage(&e, 0, &input)); + upb::MtDataEncoder e; + ASSERT_TRUE(e.StartMessage(0)); int count = 0; std::vector field_numbers; for (int i = 0; i < 25; i++) { int field_number = 1 << i; field_numbers.push_back(field_number); - ASSERT_TRUE(PutField(&e, kUpb_FieldType_Float, field_number, 0, &input)); + ASSERT_TRUE(e.PutField(kUpb_FieldType_Float, field_number, 0)); count++; } - fprintf(stderr, "YO: %s, %zu\n", input.c_str(), input.size()); + fprintf(stderr, "YO: %s, %zu\n", e.data().c_str(), e.data().size()); upb::Status status; upb_MiniTable* table = upb_MiniTable_Build( - input.data(), input.size(), GetParam(), arena.ptr(), status.ptr()); + e.data().data(), e.data().size(), GetParam(), arena.ptr(), status.ptr()); ASSERT_NE(nullptr, table); EXPECT_EQ(count, table->field_count); absl::flat_hash_set offsets; @@ -182,22 +132,21 @@ TEST_P(MiniTableTest, Skips) { TEST_P(MiniTableTest, AllScalarTypesOneof) { upb::Arena arena; - std::string input; - upb_MtDataEncoder e; - ASSERT_TRUE(StartMessage(&e, 0, &input)); + upb::MtDataEncoder e; + ASSERT_TRUE(e.StartMessage(0)); int count = 0; for (int i = kUpb_FieldType_Double ; i < kUpb_FieldType_SInt64; i++) { - ASSERT_TRUE(PutField(&e, static_cast(i), i, 0, &input)); + ASSERT_TRUE(e.PutField(static_cast(i), i, 0)); count++; } - ASSERT_TRUE(StartOneof(&e, &input)); + ASSERT_TRUE(e.StartOneof()); for (int i = kUpb_FieldType_Double ; i < kUpb_FieldType_SInt64; i++) { - ASSERT_TRUE(PutOneofField(&e, i, &input)); + ASSERT_TRUE(e.PutOneofField(i)); } - fprintf(stderr, "YO: %s\n", input.c_str()); + fprintf(stderr, "YO: %s\n", e.data().c_str()); upb::Status status; upb_MiniTable* table = upb_MiniTable_Build( - input.data(), input.size(), GetParam(), arena.ptr(), status.ptr()); + e.data().data(), e.data().size(), GetParam(), arena.ptr(), status.ptr()); ASSERT_NE(nullptr, table); EXPECT_EQ(count, table->field_count); absl::flat_hash_set offsets; diff --git a/upbc/message_layout.h b/upbc/message_layout.h index fffa78db4e..31cff45eae 100644 --- a/upbc/message_layout.h +++ b/upbc/message_layout.h @@ -31,6 +31,8 @@ #include "absl/base/macros.h" #include "absl/container/flat_hash_map.h" #include "google/protobuf/descriptor.h" +#include "upb/upb.hpp" +#include "upb/mini_table.h" namespace upbc { diff --git a/upbc/protoc-gen-upb.cc b/upbc/protoc-gen-upb.cc index de135a5c30..7ad5eb419d 100644 --- a/upbc/protoc-gen-upb.cc +++ b/upbc/protoc-gen-upb.cc @@ -34,6 +34,7 @@ #include "google/protobuf/descriptor.h" #include "google/protobuf/descriptor.pb.h" #include "google/protobuf/wire_format.h" +#include "upb/mini_table.hpp" #include "upbc/common.h" #include "upbc/message_layout.h" @@ -353,6 +354,123 @@ std::string CTypeConst(const protobuf::FieldDescriptor* field) { return CTypeInternal(field, true); } +// TODO: shared function defined by minitable +upb_MiniTable_Field* FindField(upb_MiniTable* mt, uint32_t field_number) { + int n = mt->field_count; + for (int i = 0; i < n; i++) { + if (mt->fields[i].number == field_number) { + return const_cast(&mt->fields[i]); + } + } + assert(false); + return NULL; +} + +class FileLayout { + public: + FileLayout(const protobuf::FileDescriptor* fd, upb_MiniTablePlatform platform) + : platform_(platform) { + ComputeLayout(fd); + } + + upb_MiniTable* GetTable(const protobuf::Descriptor* m) { + auto it = table_map_.find(m); + assert(it != table_map_.end()); + return it->second; + } + + private: + void ComputeLayout(const protobuf::FileDescriptor* fd) { + for (const auto& m : SortedMessages(fd)) { + table_map_[m] = MakeMiniTable(m); + } + for (const auto& m : SortedMessages(fd)) { + upb_MiniTable* mt = GetTable(m); + for (const auto& f : FieldNumberOrder(m)) { + if (f->message_type() && f->message_type()->file() == f->file()) { + upb_MiniTable_Field* mt_f = FindField(mt, f->number()); + upb_MiniTable* sub_mt = GetTable(f->message_type()); + upb_MiniTable_SetSubMessage(mt, mt_f, sub_mt); + } + // TODO: proto2 enums. + } + } + } + + upb_MiniTable* MakeMiniTable(const protobuf::Descriptor* m) { + if (m->options().message_set_wire_format()) { + return upb_MiniTable_BuildMessageSet(platform_, arena_.ptr()); + } else if (m->options().map_entry()) { + return upb_MiniTable_BuildMapEntry( + static_cast(m->map_key()->type()), + static_cast(m->map_value()->type()), platform_, + arena_.ptr()); + } else { + return MakeRegularMiniTable(m); + } + } + + upb_MiniTable* MakeRegularMiniTable(const protobuf::Descriptor* m) { + upb::MtDataEncoder e; + e.StartMessage(GetMessageModifiers(m)); + for (const auto& f : FieldNumberOrder(m)) { + e.PutField(static_cast(f->type()), f->number(), + GetFieldModifiers(f)); + } + for (int i = 0; i < m->real_oneof_decl_count(); i++) { + const protobuf::OneofDescriptor* oneof = m->oneof_decl(i); + e.StartOneof(); + for (int j = 0; j < oneof->field_count(); j++) { + const protobuf::FieldDescriptor* f = oneof->field(i); + e.PutOneofField(f->number()); + } + } + const auto& str = e.data(); + upb::Status status; + upb_MiniTable* ret = upb_MiniTable_Build(str.data(), str.size(), platform_, + arena_.ptr(), status.ptr()); + assert(ret); + return ret; + } + + uint64_t GetMessageModifiers(const protobuf::Descriptor* m) { + uint64_t ret = 0; + + if (m->file()->syntax() == protobuf::FileDescriptor::SYNTAX_PROTO2) { + ret |= kUpb_MessageModifier_HasClosedEnums; + } else { + ret |= kUpb_MessageModifier_DefaultIsPacked; + } + + if (m->extension_range_count() > 0) { + ret |= kUpb_MessageModifier_IsExtendable; + } + + assert(!m->options().map_entry()); + return ret; + } + + uint64_t GetFieldModifiers(const protobuf::FieldDescriptor* f) { + uint64_t ret = 0; + + if (f->is_repeated()) ret |= kUpb_FieldModifier_IsRepeated; + if (f->is_required()) ret |= kUpb_FieldModifier_IsRequired; + if (f->is_packed()) ret |= kUpb_FieldModifier_IsPacked; // TODO + if (f->is_optional() && !f->has_presence()) { + ret |= kUpb_FieldModifier_IsProto3Singular; + } + + return ret; + } + + private: + typedef absl::flat_hash_map + TableMap; + upb::Arena arena_; + TableMap table_map_; + upb_MiniTablePlatform platform_; +}; + void DumpEnumValues(const protobuf::EnumDescriptor* desc, Output& output) { std::vector values; for (int i = 0; i < desc->value_count(); i++) { @@ -1479,6 +1597,8 @@ bool Generator::Generate(const protobuf::FileDescriptor* file, } } + FileLayout layout(file, kUpb_MiniTablePlatform_64Bit); + Output h_output(context->Open(HeaderFilename(file))); WriteHeader(file, h_output);