From 273398ca5f90a39912aa758e8053b10961ee4cb0 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 13 Mar 2022 14:13:35 -0700 Subject: [PATCH] Removed MessageLayout from upbc. --- upb/mini_table.c | 13 ++- upbc/BUILD | 6 +- upbc/message_layout.cc | 233 ----------------------------------------- upbc/message_layout.h | 163 ---------------------------- upbc/protoc-gen-upb.cc | 63 ++++++----- 5 files changed, 49 insertions(+), 429 deletions(-) delete mode 100644 upbc/message_layout.cc delete mode 100644 upbc/message_layout.h diff --git a/upb/mini_table.c b/upb/mini_table.c index b0953368c6..4442e7df8c 100644 --- a/upb/mini_table.c +++ b/upb/mini_table.c @@ -848,7 +848,18 @@ static void upb_MtDecoder_AssignOffsets(upb_MtDecoder* d) { } } - d->table->size = UPB_ALIGN_UP(d->table->size, 16); + if (d->platform == kUpb_MiniTablePlatform_64Bit) { + // For compatibility with fast table parsing, we have to align this up to a + // multiple of 16 + 8. This is because arena alloc size must be a multiple + // of 16, but we will add sizeof(upb_Message_Internal) at runtime, as the + // table size does not include this value. + // + // This is a bit convoluted and should probably be simplified. + d->table->size = UPB_ALIGN_UP(d->table->size, 8); + if (UPB_ALIGN_UP(d->table->size, 16) == d->table->size) { + d->table->size += 8; + } + } } upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len, diff --git a/upbc/BUILD b/upbc/BUILD index 1fd5443bf6..23b26214b9 100644 --- a/upbc/BUILD +++ b/upbc/BUILD @@ -43,11 +43,7 @@ cc_library( cc_binary( name = "protoc-gen-upb", - srcs = [ - "protoc-gen-upb.cc", - "message_layout.cc", - "message_layout.h", - ], + srcs = ["protoc-gen-upb.cc"], copts = UPB_DEFAULT_CPPOPTS, visibility = ["//visibility:public"], deps = [ diff --git a/upbc/message_layout.cc b/upbc/message_layout.cc deleted file mode 100644 index e72dd6cfb2..0000000000 --- a/upbc/message_layout.cc +++ /dev/null @@ -1,233 +0,0 @@ -// Copyright (c) 2009-2021, Google LLC -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// * Neither the name of Google LLC nor the -// names of its contributors may be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, -// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include "upbc/message_layout.h" - -#include "google/protobuf/descriptor.pb.h" - -namespace upbc { - -namespace protobuf = ::google::protobuf; - -static int64_t DivRoundUp(int64_t a, int64_t b) { - ABSL_ASSERT(a >= 0); - ABSL_ASSERT(b > 0); - return (a + b - 1) / b; -} - -MessageLayout::Size MessageLayout::Place( - MessageLayout::SizeAndAlign size_and_align) { - Size offset = size_; - offset.AlignUp(size_and_align.align); - size_ = offset; - size_.Add(size_and_align.size); - ABSL_ASSERT(IsPowerOfTwo(size_and_align.align.size32)); - ABSL_ASSERT(IsPowerOfTwo(size_and_align.align.size64)); - maxalign_.MaxFrom(size_and_align.align); - return offset; -} - -bool MessageLayout::HasHasbit(const protobuf::FieldDescriptor* field) { - return field->has_presence() && !field->real_containing_oneof() && - !field->containing_type()->options().map_entry(); -} - -MessageLayout::SizeAndAlign MessageLayout::SizeOf( - const protobuf::FieldDescriptor* field) { - if (field->is_repeated()) { - return {{4, 8}, {4, 8}}; // Pointer to array object. - } else { - return SizeOfUnwrapped(field); - } -} - -MessageLayout::SizeAndAlign MessageLayout::SizeOfUnwrapped( - const protobuf::FieldDescriptor* field) { - switch (field->cpp_type()) { - case protobuf::FieldDescriptor::CPPTYPE_MESSAGE: - return {{4, 8}, {4, 8}}; // Pointer to message. - case protobuf::FieldDescriptor::CPPTYPE_STRING: - return {{8, 16}, {4, 8}}; // upb_StringView - case protobuf::FieldDescriptor::CPPTYPE_BOOL: - return {{1, 1}, {1, 1}}; - case protobuf::FieldDescriptor::CPPTYPE_FLOAT: - case protobuf::FieldDescriptor::CPPTYPE_INT32: - case protobuf::FieldDescriptor::CPPTYPE_UINT32: - case protobuf::FieldDescriptor::CPPTYPE_ENUM: - return {{4, 4}, {4, 4}}; - case protobuf::FieldDescriptor::CPPTYPE_INT64: - case protobuf::FieldDescriptor::CPPTYPE_UINT64: - case protobuf::FieldDescriptor::CPPTYPE_DOUBLE: - return {{8, 8}, {8, 8}}; - } - assert(false); - return {{-1, -1}, {-1, -1}}; -} - -int64_t MessageLayout::FieldLayoutRank(const protobuf::FieldDescriptor* field) { - // Order: - // 1, 2, 3. primitive fields (8, 4, 1 byte) - // 4. string fields - // 5. submessage fields - // 6. repeated fields - // - // This has the following nice properties: - // - // 1. padding alignment is (nearly) minimized. - // 2. fields that might have defaults (1-4) are segregated - // from fields that are always zero-initialized (5-7). - // - // We skip oneof fields, because they are emitted in a separate pass. - int64_t rank; - if (field->containing_oneof()) { - fprintf(stderr, "shouldn't have oneofs here.\n"); - abort(); - } else if (field->label() == protobuf::FieldDescriptor::LABEL_REPEATED) { - rank = 6; - } else { - switch (field->cpp_type()) { - case protobuf::FieldDescriptor::CPPTYPE_MESSAGE: - rank = 5; - break; - case protobuf::FieldDescriptor::CPPTYPE_STRING: - rank = 4; - break; - case protobuf::FieldDescriptor::CPPTYPE_BOOL: - rank = 3; - break; - case protobuf::FieldDescriptor::CPPTYPE_FLOAT: - case protobuf::FieldDescriptor::CPPTYPE_INT32: - case protobuf::FieldDescriptor::CPPTYPE_UINT32: - rank = 2; - break; - default: - rank = 1; - break; - } - } - - // Break ties with field number. - return (rank << 29) | field->number(); -} - -void MessageLayout::ComputeLayout(const protobuf::Descriptor* descriptor) { - size_ = Size{0, 0}; - maxalign_ = Size{8, 8}; - - if (descriptor->options().map_entry()) { - // Map entries aren't actually stored, they are only used during parsing. - // For parsing, it helps a lot if all map entry messages have the same - // layout. - SizeAndAlign size{{8, 16}, {4, 8}}; // upb_StringView - field_offsets_[descriptor->FindFieldByNumber(1)] = Place(size); - field_offsets_[descriptor->FindFieldByNumber(2)] = Place(size); - } else { - PlaceNonOneofFields(descriptor); - PlaceOneofFields(descriptor); - } - - // Align overall size up to max size. - size_.AlignUp(maxalign_); -} - -void MessageLayout::PlaceNonOneofFields( - const protobuf::Descriptor* descriptor) { - std::vector field_order; - for (int i = 0; i < descriptor->field_count(); i++) { - const protobuf::FieldDescriptor* field = descriptor->field(i); - if (!field->containing_oneof()) { - field_order.push_back(descriptor->field(i)); - } - } - std::sort(field_order.begin(), field_order.end(), - [](const protobuf::FieldDescriptor* a, - const protobuf::FieldDescriptor* b) { - return FieldLayoutRank(a) < FieldLayoutRank(b); - }); - - // Place/count hasbits. - hasbit_count_ = 0; - required_count_ = 0; - for (auto field : FieldHotnessOrder(descriptor)) { - if (HasHasbit(field)) { - // We don't use hasbit 0, so that 0 can indicate "no presence" in the - // table. This wastes one hasbit, but we don't worry about it for now. - int index = ++hasbit_count_; - hasbit_indexes_[field] = index; - if (field->is_required()) { - if (index > 63) { - // This could be fixed in the decoder without too much trouble. But - // we expect this to be so rare that we don't worry about it for now. - std::cerr << "upb does not support messages with more than 63 " - "required fields: " - << field->full_name() << "\n"; - exit(1); - } - required_count_++; - } - } - } - - // Place hasbits at the beginning. - hasbit_bytes_ = hasbit_count_ ? DivRoundUp(hasbit_count_ + 1, 8) : 0; - Place(SizeAndAlign{{hasbit_bytes_, hasbit_bytes_}, {1, 1}}); - - // Place non-oneof fields. - for (auto field : field_order) { - field_offsets_[field] = Place(SizeOf(field)); - } -} - -void MessageLayout::PlaceOneofFields(const protobuf::Descriptor* descriptor) { - std::vector oneof_order; - for (int i = 0; i < descriptor->oneof_decl_count(); i++) { - oneof_order.push_back(descriptor->oneof_decl(i)); - } - std::sort(oneof_order.begin(), oneof_order.end(), - [](const protobuf::OneofDescriptor* a, - const protobuf::OneofDescriptor* b) { - return a->full_name() < b->full_name(); - }); - - for (auto oneof : oneof_order) { - SizeAndAlign oneof_maxsize{{0, 0}, {0, 0}}; - // Calculate max size. - for (int i = 0; i < oneof->field_count(); i++) { - oneof_maxsize.MaxFrom(SizeOf(oneof->field(i))); - } - - // Place discriminator enum and data. - Size data = Place(oneof_maxsize); - Size discriminator = Place(SizeAndAlign{{4, 4}, {4, 4}}); - - oneof_case_offsets_[oneof] = discriminator; - - for (int i = 0; i < oneof->field_count(); i++) { - field_offsets_[oneof->field(i)] = data; - } - } -} - -} // namespace upbc diff --git a/upbc/message_layout.h b/upbc/message_layout.h deleted file mode 100644 index 6c3d58a89c..0000000000 --- a/upbc/message_layout.h +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Copyright (c) 2009-2021, Google LLC - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Google LLC nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, - * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef UPBC_MESSAGE_LAYOUT_H -#define UPBC_MESSAGE_LAYOUT_H - -#include "absl/base/macros.h" -#include "absl/container/flat_hash_map.h" -#include "google/protobuf/descriptor.h" -#include "upb/upb.hpp" -#include "upb/mini_table.h" - -namespace upbc { - -class MessageLayout { - public: - struct Size { - void Add(const Size& other) { - size32 += other.size32; - size64 += other.size64; - } - - void MaxFrom(const Size& other) { - size32 = std::max(size32, other.size32); - size64 = std::max(size64, other.size64); - } - - void AlignUp(const Size& align) { - size32 = Align(size32, align.size32); - size64 = Align(size64, align.size64); - } - - int64_t size32; - int64_t size64; - }; - - struct SizeAndAlign { - Size size; - Size align; - - void MaxFrom(const SizeAndAlign& other) { - size.MaxFrom(other.size); - align.MaxFrom(other.align); - } - }; - - MessageLayout(const google::protobuf::Descriptor* descriptor) { - ComputeLayout(descriptor); - } - - Size GetFieldOffset(const google::protobuf::FieldDescriptor* field) const { - return GetMapValue(field_offsets_, field); - } - - Size GetOneofCaseOffset( - const google::protobuf::OneofDescriptor* oneof) const { - return GetMapValue(oneof_case_offsets_, oneof); - } - - int GetHasbitIndex(const google::protobuf::FieldDescriptor* field) const { - return GetMapValue(hasbit_indexes_, field); - } - - Size message_size() const { return size_; } - - int hasbit_count() const { return hasbit_count_; } - int hasbit_bytes() const { return hasbit_bytes_; } - - // Required fields always have the lowest hasbits. - int required_count() const { return required_count_; } - - static bool HasHasbit(const google::protobuf::FieldDescriptor* field); - static SizeAndAlign SizeOfUnwrapped( - const google::protobuf::FieldDescriptor* field); - - private: - void ComputeLayout(const google::protobuf::Descriptor* descriptor); - void PlaceNonOneofFields(const google::protobuf::Descriptor* descriptor); - void PlaceOneofFields(const google::protobuf::Descriptor* descriptor); - Size Place(SizeAndAlign size_and_align); - - template - static V GetMapValue(const absl::flat_hash_map& map, K key) { - auto iter = map.find(key); - if (iter == map.end()) { - fprintf(stderr, "No value for field.\n"); - abort(); - } - return iter->second; - } - - static bool IsPowerOfTwo(size_t val) { return (val & (val - 1)) == 0; } - - static size_t Align(size_t val, size_t align) { - if (!IsPowerOfTwo(align)) { - fprintf(stderr, "YO! Align is: %d\n", (int)align); - } - ABSL_ASSERT(IsPowerOfTwo(align)); - return (val + align - 1) & ~(align - 1); - } - - static SizeAndAlign SizeOf(const google::protobuf::FieldDescriptor* field); - static int64_t FieldLayoutRank( - const google::protobuf::FieldDescriptor* field); - - absl::flat_hash_map - field_offsets_; - absl::flat_hash_map - hasbit_indexes_; - absl::flat_hash_map - oneof_case_offsets_; - Size maxalign_; - Size size_; - int hasbit_count_; - int hasbit_bytes_; - int required_count_ = 0; -}; - -// Returns fields in order of "hotness", eg. how frequently they appear in -// serialized payloads. Ideally this will use a profile. When we don't have -// that, we assume that fields with smaller numbers are used more frequently. -inline std::vector FieldHotnessOrder( - const google::protobuf::Descriptor* message) { - std::vector fields; - for (int i = 0; i < message->field_count(); i++) { - fields.push_back(message->field(i)); - } - std::sort(fields.begin(), fields.end(), - [](const google::protobuf::FieldDescriptor* a, - const google::protobuf::FieldDescriptor* b) { - return std::make_pair(!a->is_required(), a->number()) < - std::make_pair(!b->is_required(), b->number()); - }); - return fields; -} - -} // namespace upbc - -#endif // UPBC_MESSAGE_LAYOUT_H diff --git a/upbc/protoc-gen-upb.cc b/upbc/protoc-gen-upb.cc index 09f6701f84..734f65f6a9 100644 --- a/upbc/protoc-gen-upb.cc +++ b/upbc/protoc-gen-upb.cc @@ -35,8 +35,8 @@ #include "google/protobuf/descriptor.pb.h" #include "google/protobuf/wire_format.h" #include "upb/mini_table.hpp" +#include "upb/upb.hpp" #include "upbc/common.h" -#include "upbc/message_layout.h" namespace upbc { namespace { @@ -44,6 +44,24 @@ namespace { namespace protoc = ::google::protobuf::compiler; namespace protobuf = ::google::protobuf; +// Returns fields in order of "hotness", eg. how frequently they appear in +// serialized payloads. Ideally this will use a profile. When we don't have +// that, we assume that fields with smaller numbers are used more frequently. +inline std::vector FieldHotnessOrder( + const google::protobuf::Descriptor* message) { + std::vector fields; + for (int i = 0; i < message->field_count(); i++) { + fields.push_back(message->field(i)); + } + std::sort(fields.begin(), fields.end(), + [](const google::protobuf::FieldDescriptor* a, + const google::protobuf::FieldDescriptor* b) { + return std::make_pair(!a->is_required(), a->number()) < + std::make_pair(!b->is_required(), b->number()); + }); + return fields; +} + std::string SourceFilename(const google::protobuf::FileDescriptor* file) { return StripExtension(file->name()) + ".upb.c"; } @@ -209,10 +227,6 @@ std::string EnumValueSymbol(const protobuf::EnumValueDescriptor* value) { return ToCIdent(value->full_name()); } -std::string GetSizeInit(const MessageLayout::Size& size) { - return absl::Substitute("UPB_SIZE($0, $1)", size.size32, size.size64); -} - std::string CTypeInternal(const protobuf::FieldDescriptor* field, bool is_const) { std::string maybe_const = is_const ? "const " : ""; @@ -1171,33 +1185,28 @@ void WriteHeader(const FileLayout& layout, Output& output) { // This is gratuitously inefficient with how many times it rebuilds // MessageLayout objects for the same message. But we only do this for one // proto (descriptor.proto) so we don't worry about it. - const protobuf::Descriptor* max32 = nullptr; - const protobuf::Descriptor* max64 = nullptr; + const protobuf::Descriptor* max32_message = nullptr; + const protobuf::Descriptor* max64_message = nullptr; + size_t max32 = 0; + size_t max64 = 0; for (const auto* message : this_file_messages) { if (absl::EndsWith(message->name(), "Options")) { - MessageLayout layout(message); - if (max32 == nullptr) { - max32 = message; - max64 = message; - } else { - if (layout.message_size().size32 > - MessageLayout(max32).message_size().size32) { - max32 = message; - } - if (layout.message_size().size64 > - MessageLayout(max64).message_size().size64) { - max64 = message; - } + size_t size32 = layout.GetMiniTable32(message)->size; + size_t size64 = layout.GetMiniTable64(message)->size; + if (size32 > max32) { + max32 = size32; + max32_message = message; + } + if (size64 > max64) { + max64 = size64; + max64_message = message; } } } - output("/* Max size 32 is $0 */\n", max32->full_name()); - output("/* Max size 64 is $0 */\n", max64->full_name()); - MessageLayout::Size size; - size.size32 = MessageLayout(max32).message_size().size32; - size.size64 = MessageLayout(max32).message_size().size64; - output("#define _UPB_MAXOPT_SIZE $0\n\n", GetSizeInit(size)); + output("/* Max size 32 is $0 */\n", max32_message->full_name()); + output("/* Max size 64 is $0 */\n", max64_message->full_name()); + output("#define _UPB_MAXOPT_SIZE UPB_SIZE($0, $1)\n\n", max32, max64); } output( @@ -1236,6 +1245,7 @@ int GetTableSlot(const protobuf::FieldDescriptor* field) { return (tag & 0xf8) >> 3; } +#include "upb/port_def.inc" bool TryFillTableEntry(const FileLayout& layout, const protobuf::FieldDescriptor* field, TableEntry& ent) { @@ -1481,7 +1491,6 @@ void WriteMessage(const protobuf::Descriptor* message, const FileLayout& layout, std::string subenums_array_ref = "NULL"; const upb_MiniTable* mt_32 = layout.GetMiniTable32(message); const upb_MiniTable* mt_64 = layout.GetMiniTable64(message); - MessageLayout msg_layout(message); std::vector subs; for (int i = 0; i < mt_64->field_count; i++) {