Removed MessageLayout from upbc.

pull/13171/head
Joshua Haberman 3 years ago
parent a2004150f8
commit 273398ca5f
  1. 13
      upb/mini_table.c
  2. 6
      upbc/BUILD
  3. 233
      upbc/message_layout.cc
  4. 163
      upbc/message_layout.h
  5. 63
      upbc/protoc-gen-upb.cc

@ -848,7 +848,18 @@ static void upb_MtDecoder_AssignOffsets(upb_MtDecoder* d) {
}
}
d->table->size = UPB_ALIGN_UP(d->table->size, 16);
if (d->platform == kUpb_MiniTablePlatform_64Bit) {
// For compatibility with fast table parsing, we have to align this up to a
// multiple of 16 + 8. This is because arena alloc size must be a multiple
// of 16, but we will add sizeof(upb_Message_Internal) at runtime, as the
// table size does not include this value.
//
// This is a bit convoluted and should probably be simplified.
d->table->size = UPB_ALIGN_UP(d->table->size, 8);
if (UPB_ALIGN_UP(d->table->size, 16) == d->table->size) {
d->table->size += 8;
}
}
}
upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len,

@ -43,11 +43,7 @@ cc_library(
cc_binary(
name = "protoc-gen-upb",
srcs = [
"protoc-gen-upb.cc",
"message_layout.cc",
"message_layout.h",
],
srcs = ["protoc-gen-upb.cc"],
copts = UPB_DEFAULT_CPPOPTS,
visibility = ["//visibility:public"],
deps = [

@ -1,233 +0,0 @@
// Copyright (c) 2009-2021, Google LLC
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of Google LLC nor the
// names of its contributors may be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "upbc/message_layout.h"
#include "google/protobuf/descriptor.pb.h"
namespace upbc {
namespace protobuf = ::google::protobuf;
static int64_t DivRoundUp(int64_t a, int64_t b) {
ABSL_ASSERT(a >= 0);
ABSL_ASSERT(b > 0);
return (a + b - 1) / b;
}
MessageLayout::Size MessageLayout::Place(
MessageLayout::SizeAndAlign size_and_align) {
Size offset = size_;
offset.AlignUp(size_and_align.align);
size_ = offset;
size_.Add(size_and_align.size);
ABSL_ASSERT(IsPowerOfTwo(size_and_align.align.size32));
ABSL_ASSERT(IsPowerOfTwo(size_and_align.align.size64));
maxalign_.MaxFrom(size_and_align.align);
return offset;
}
bool MessageLayout::HasHasbit(const protobuf::FieldDescriptor* field) {
return field->has_presence() && !field->real_containing_oneof() &&
!field->containing_type()->options().map_entry();
}
MessageLayout::SizeAndAlign MessageLayout::SizeOf(
const protobuf::FieldDescriptor* field) {
if (field->is_repeated()) {
return {{4, 8}, {4, 8}}; // Pointer to array object.
} else {
return SizeOfUnwrapped(field);
}
}
MessageLayout::SizeAndAlign MessageLayout::SizeOfUnwrapped(
const protobuf::FieldDescriptor* field) {
switch (field->cpp_type()) {
case protobuf::FieldDescriptor::CPPTYPE_MESSAGE:
return {{4, 8}, {4, 8}}; // Pointer to message.
case protobuf::FieldDescriptor::CPPTYPE_STRING:
return {{8, 16}, {4, 8}}; // upb_StringView
case protobuf::FieldDescriptor::CPPTYPE_BOOL:
return {{1, 1}, {1, 1}};
case protobuf::FieldDescriptor::CPPTYPE_FLOAT:
case protobuf::FieldDescriptor::CPPTYPE_INT32:
case protobuf::FieldDescriptor::CPPTYPE_UINT32:
case protobuf::FieldDescriptor::CPPTYPE_ENUM:
return {{4, 4}, {4, 4}};
case protobuf::FieldDescriptor::CPPTYPE_INT64:
case protobuf::FieldDescriptor::CPPTYPE_UINT64:
case protobuf::FieldDescriptor::CPPTYPE_DOUBLE:
return {{8, 8}, {8, 8}};
}
assert(false);
return {{-1, -1}, {-1, -1}};
}
int64_t MessageLayout::FieldLayoutRank(const protobuf::FieldDescriptor* field) {
// Order:
// 1, 2, 3. primitive fields (8, 4, 1 byte)
// 4. string fields
// 5. submessage fields
// 6. repeated fields
//
// This has the following nice properties:
//
// 1. padding alignment is (nearly) minimized.
// 2. fields that might have defaults (1-4) are segregated
// from fields that are always zero-initialized (5-7).
//
// We skip oneof fields, because they are emitted in a separate pass.
int64_t rank;
if (field->containing_oneof()) {
fprintf(stderr, "shouldn't have oneofs here.\n");
abort();
} else if (field->label() == protobuf::FieldDescriptor::LABEL_REPEATED) {
rank = 6;
} else {
switch (field->cpp_type()) {
case protobuf::FieldDescriptor::CPPTYPE_MESSAGE:
rank = 5;
break;
case protobuf::FieldDescriptor::CPPTYPE_STRING:
rank = 4;
break;
case protobuf::FieldDescriptor::CPPTYPE_BOOL:
rank = 3;
break;
case protobuf::FieldDescriptor::CPPTYPE_FLOAT:
case protobuf::FieldDescriptor::CPPTYPE_INT32:
case protobuf::FieldDescriptor::CPPTYPE_UINT32:
rank = 2;
break;
default:
rank = 1;
break;
}
}
// Break ties with field number.
return (rank << 29) | field->number();
}
void MessageLayout::ComputeLayout(const protobuf::Descriptor* descriptor) {
size_ = Size{0, 0};
maxalign_ = Size{8, 8};
if (descriptor->options().map_entry()) {
// Map entries aren't actually stored, they are only used during parsing.
// For parsing, it helps a lot if all map entry messages have the same
// layout.
SizeAndAlign size{{8, 16}, {4, 8}}; // upb_StringView
field_offsets_[descriptor->FindFieldByNumber(1)] = Place(size);
field_offsets_[descriptor->FindFieldByNumber(2)] = Place(size);
} else {
PlaceNonOneofFields(descriptor);
PlaceOneofFields(descriptor);
}
// Align overall size up to max size.
size_.AlignUp(maxalign_);
}
void MessageLayout::PlaceNonOneofFields(
const protobuf::Descriptor* descriptor) {
std::vector<const protobuf::FieldDescriptor*> field_order;
for (int i = 0; i < descriptor->field_count(); i++) {
const protobuf::FieldDescriptor* field = descriptor->field(i);
if (!field->containing_oneof()) {
field_order.push_back(descriptor->field(i));
}
}
std::sort(field_order.begin(), field_order.end(),
[](const protobuf::FieldDescriptor* a,
const protobuf::FieldDescriptor* b) {
return FieldLayoutRank(a) < FieldLayoutRank(b);
});
// Place/count hasbits.
hasbit_count_ = 0;
required_count_ = 0;
for (auto field : FieldHotnessOrder(descriptor)) {
if (HasHasbit(field)) {
// We don't use hasbit 0, so that 0 can indicate "no presence" in the
// table. This wastes one hasbit, but we don't worry about it for now.
int index = ++hasbit_count_;
hasbit_indexes_[field] = index;
if (field->is_required()) {
if (index > 63) {
// This could be fixed in the decoder without too much trouble. But
// we expect this to be so rare that we don't worry about it for now.
std::cerr << "upb does not support messages with more than 63 "
"required fields: "
<< field->full_name() << "\n";
exit(1);
}
required_count_++;
}
}
}
// Place hasbits at the beginning.
hasbit_bytes_ = hasbit_count_ ? DivRoundUp(hasbit_count_ + 1, 8) : 0;
Place(SizeAndAlign{{hasbit_bytes_, hasbit_bytes_}, {1, 1}});
// Place non-oneof fields.
for (auto field : field_order) {
field_offsets_[field] = Place(SizeOf(field));
}
}
void MessageLayout::PlaceOneofFields(const protobuf::Descriptor* descriptor) {
std::vector<const protobuf::OneofDescriptor*> oneof_order;
for (int i = 0; i < descriptor->oneof_decl_count(); i++) {
oneof_order.push_back(descriptor->oneof_decl(i));
}
std::sort(oneof_order.begin(), oneof_order.end(),
[](const protobuf::OneofDescriptor* a,
const protobuf::OneofDescriptor* b) {
return a->full_name() < b->full_name();
});
for (auto oneof : oneof_order) {
SizeAndAlign oneof_maxsize{{0, 0}, {0, 0}};
// Calculate max size.
for (int i = 0; i < oneof->field_count(); i++) {
oneof_maxsize.MaxFrom(SizeOf(oneof->field(i)));
}
// Place discriminator enum and data.
Size data = Place(oneof_maxsize);
Size discriminator = Place(SizeAndAlign{{4, 4}, {4, 4}});
oneof_case_offsets_[oneof] = discriminator;
for (int i = 0; i < oneof->field_count(); i++) {
field_offsets_[oneof->field(i)] = data;
}
}
}
} // namespace upbc

@ -1,163 +0,0 @@
/*
* Copyright (c) 2009-2021, Google LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Google LLC nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef UPBC_MESSAGE_LAYOUT_H
#define UPBC_MESSAGE_LAYOUT_H
#include "absl/base/macros.h"
#include "absl/container/flat_hash_map.h"
#include "google/protobuf/descriptor.h"
#include "upb/upb.hpp"
#include "upb/mini_table.h"
namespace upbc {
class MessageLayout {
public:
struct Size {
void Add(const Size& other) {
size32 += other.size32;
size64 += other.size64;
}
void MaxFrom(const Size& other) {
size32 = std::max(size32, other.size32);
size64 = std::max(size64, other.size64);
}
void AlignUp(const Size& align) {
size32 = Align(size32, align.size32);
size64 = Align(size64, align.size64);
}
int64_t size32;
int64_t size64;
};
struct SizeAndAlign {
Size size;
Size align;
void MaxFrom(const SizeAndAlign& other) {
size.MaxFrom(other.size);
align.MaxFrom(other.align);
}
};
MessageLayout(const google::protobuf::Descriptor* descriptor) {
ComputeLayout(descriptor);
}
Size GetFieldOffset(const google::protobuf::FieldDescriptor* field) const {
return GetMapValue(field_offsets_, field);
}
Size GetOneofCaseOffset(
const google::protobuf::OneofDescriptor* oneof) const {
return GetMapValue(oneof_case_offsets_, oneof);
}
int GetHasbitIndex(const google::protobuf::FieldDescriptor* field) const {
return GetMapValue(hasbit_indexes_, field);
}
Size message_size() const { return size_; }
int hasbit_count() const { return hasbit_count_; }
int hasbit_bytes() const { return hasbit_bytes_; }
// Required fields always have the lowest hasbits.
int required_count() const { return required_count_; }
static bool HasHasbit(const google::protobuf::FieldDescriptor* field);
static SizeAndAlign SizeOfUnwrapped(
const google::protobuf::FieldDescriptor* field);
private:
void ComputeLayout(const google::protobuf::Descriptor* descriptor);
void PlaceNonOneofFields(const google::protobuf::Descriptor* descriptor);
void PlaceOneofFields(const google::protobuf::Descriptor* descriptor);
Size Place(SizeAndAlign size_and_align);
template <class K, class V>
static V GetMapValue(const absl::flat_hash_map<K, V>& map, K key) {
auto iter = map.find(key);
if (iter == map.end()) {
fprintf(stderr, "No value for field.\n");
abort();
}
return iter->second;
}
static bool IsPowerOfTwo(size_t val) { return (val & (val - 1)) == 0; }
static size_t Align(size_t val, size_t align) {
if (!IsPowerOfTwo(align)) {
fprintf(stderr, "YO! Align is: %d\n", (int)align);
}
ABSL_ASSERT(IsPowerOfTwo(align));
return (val + align - 1) & ~(align - 1);
}
static SizeAndAlign SizeOf(const google::protobuf::FieldDescriptor* field);
static int64_t FieldLayoutRank(
const google::protobuf::FieldDescriptor* field);
absl::flat_hash_map<const google::protobuf::FieldDescriptor*, Size>
field_offsets_;
absl::flat_hash_map<const google::protobuf::FieldDescriptor*, int>
hasbit_indexes_;
absl::flat_hash_map<const google::protobuf::OneofDescriptor*, Size>
oneof_case_offsets_;
Size maxalign_;
Size size_;
int hasbit_count_;
int hasbit_bytes_;
int required_count_ = 0;
};
// Returns fields in order of "hotness", eg. how frequently they appear in
// serialized payloads. Ideally this will use a profile. When we don't have
// that, we assume that fields with smaller numbers are used more frequently.
inline std::vector<const google::protobuf::FieldDescriptor*> FieldHotnessOrder(
const google::protobuf::Descriptor* message) {
std::vector<const google::protobuf::FieldDescriptor*> fields;
for (int i = 0; i < message->field_count(); i++) {
fields.push_back(message->field(i));
}
std::sort(fields.begin(), fields.end(),
[](const google::protobuf::FieldDescriptor* a,
const google::protobuf::FieldDescriptor* b) {
return std::make_pair(!a->is_required(), a->number()) <
std::make_pair(!b->is_required(), b->number());
});
return fields;
}
} // namespace upbc
#endif // UPBC_MESSAGE_LAYOUT_H

@ -35,8 +35,8 @@
#include "google/protobuf/descriptor.pb.h"
#include "google/protobuf/wire_format.h"
#include "upb/mini_table.hpp"
#include "upb/upb.hpp"
#include "upbc/common.h"
#include "upbc/message_layout.h"
namespace upbc {
namespace {
@ -44,6 +44,24 @@ namespace {
namespace protoc = ::google::protobuf::compiler;
namespace protobuf = ::google::protobuf;
// Returns fields in order of "hotness", eg. how frequently they appear in
// serialized payloads. Ideally this will use a profile. When we don't have
// that, we assume that fields with smaller numbers are used more frequently.
inline std::vector<const google::protobuf::FieldDescriptor*> FieldHotnessOrder(
const google::protobuf::Descriptor* message) {
std::vector<const google::protobuf::FieldDescriptor*> fields;
for (int i = 0; i < message->field_count(); i++) {
fields.push_back(message->field(i));
}
std::sort(fields.begin(), fields.end(),
[](const google::protobuf::FieldDescriptor* a,
const google::protobuf::FieldDescriptor* b) {
return std::make_pair(!a->is_required(), a->number()) <
std::make_pair(!b->is_required(), b->number());
});
return fields;
}
std::string SourceFilename(const google::protobuf::FileDescriptor* file) {
return StripExtension(file->name()) + ".upb.c";
}
@ -209,10 +227,6 @@ std::string EnumValueSymbol(const protobuf::EnumValueDescriptor* value) {
return ToCIdent(value->full_name());
}
std::string GetSizeInit(const MessageLayout::Size& size) {
return absl::Substitute("UPB_SIZE($0, $1)", size.size32, size.size64);
}
std::string CTypeInternal(const protobuf::FieldDescriptor* field,
bool is_const) {
std::string maybe_const = is_const ? "const " : "";
@ -1171,33 +1185,28 @@ void WriteHeader(const FileLayout& layout, Output& output) {
// This is gratuitously inefficient with how many times it rebuilds
// MessageLayout objects for the same message. But we only do this for one
// proto (descriptor.proto) so we don't worry about it.
const protobuf::Descriptor* max32 = nullptr;
const protobuf::Descriptor* max64 = nullptr;
const protobuf::Descriptor* max32_message = nullptr;
const protobuf::Descriptor* max64_message = nullptr;
size_t max32 = 0;
size_t max64 = 0;
for (const auto* message : this_file_messages) {
if (absl::EndsWith(message->name(), "Options")) {
MessageLayout layout(message);
if (max32 == nullptr) {
max32 = message;
max64 = message;
} else {
if (layout.message_size().size32 >
MessageLayout(max32).message_size().size32) {
max32 = message;
}
if (layout.message_size().size64 >
MessageLayout(max64).message_size().size64) {
max64 = message;
}
size_t size32 = layout.GetMiniTable32(message)->size;
size_t size64 = layout.GetMiniTable64(message)->size;
if (size32 > max32) {
max32 = size32;
max32_message = message;
}
if (size64 > max64) {
max64 = size64;
max64_message = message;
}
}
}
output("/* Max size 32 is $0 */\n", max32->full_name());
output("/* Max size 64 is $0 */\n", max64->full_name());
MessageLayout::Size size;
size.size32 = MessageLayout(max32).message_size().size32;
size.size64 = MessageLayout(max32).message_size().size64;
output("#define _UPB_MAXOPT_SIZE $0\n\n", GetSizeInit(size));
output("/* Max size 32 is $0 */\n", max32_message->full_name());
output("/* Max size 64 is $0 */\n", max64_message->full_name());
output("#define _UPB_MAXOPT_SIZE UPB_SIZE($0, $1)\n\n", max32, max64);
}
output(
@ -1236,6 +1245,7 @@ int GetTableSlot(const protobuf::FieldDescriptor* field) {
return (tag & 0xf8) >> 3;
}
#include "upb/port_def.inc"
bool TryFillTableEntry(const FileLayout& layout,
const protobuf::FieldDescriptor* field,
TableEntry& ent) {
@ -1481,7 +1491,6 @@ void WriteMessage(const protobuf::Descriptor* message, const FileLayout& layout,
std::string subenums_array_ref = "NULL";
const upb_MiniTable* mt_32 = layout.GetMiniTable32(message);
const upb_MiniTable* mt_64 = layout.GetMiniTable64(message);
MessageLayout msg_layout(message);
std::vector<std::string> subs;
for (int i = 0; i < mt_64->field_count; i++) {

Loading…
Cancel
Save