#include "upbc/message_layout.h" #include "google/protobuf/descriptor.pb.h" namespace upbc { namespace protobuf = ::google::protobuf; static int64_t DivRoundUp(int64_t a, int64_t b) { ABSL_ASSERT(a >= 0); ABSL_ASSERT(b > 0); return (a + b - 1) / b; } MessageLayout::Size MessageLayout::Place( MessageLayout::SizeAndAlign size_and_align) { Size offset = size_; offset.AlignUp(size_and_align.align); size_ = offset; size_.Add(size_and_align.size); //maxalign_.MaxFrom(size_and_align.align); maxalign_.MaxFrom(size_and_align.size); return offset; } bool MessageLayout::HasHasbit(const protobuf::FieldDescriptor* field) { return field->file()->syntax() == protobuf::FileDescriptor::SYNTAX_PROTO2 && field->label() != protobuf::FieldDescriptor::LABEL_REPEATED && !field->containing_oneof() && !field->containing_type()->options().map_entry(); } MessageLayout::SizeAndAlign MessageLayout::SizeOf( const protobuf::FieldDescriptor* field) { if (field->containing_type()->options().map_entry()) { // Map entries aren't actually stored, they are only used during parsing. // For parsing, it helps a lot if all map entry messages have the same // layout. return {{8, 16}, {4, 8}}; // upb_stringview } else if (field->is_repeated()) { return {{4, 8}, {4, 8}}; // Pointer to array object. } else { return SizeOfUnwrapped(field); } } MessageLayout::SizeAndAlign MessageLayout::SizeOfUnwrapped( const protobuf::FieldDescriptor* field) { switch (field->cpp_type()) { case protobuf::FieldDescriptor::CPPTYPE_MESSAGE: return {{4, 8}, {4, 8}}; // Pointer to message. case protobuf::FieldDescriptor::CPPTYPE_STRING: return {{8, 16}, {4, 8}}; // upb_stringview case protobuf::FieldDescriptor::CPPTYPE_BOOL: return {{1, 1}, {1, 1}}; case protobuf::FieldDescriptor::CPPTYPE_FLOAT: case protobuf::FieldDescriptor::CPPTYPE_INT32: case protobuf::FieldDescriptor::CPPTYPE_UINT32: return {{4, 4}, {4, 4}}; default: return {{8, 8}, {8, 8}}; } } int64_t MessageLayout::FieldLayoutRank(const protobuf::FieldDescriptor* field) { // Order: // 1, 2, 3. primitive fields (8, 4, 1 byte) // 4. string fields // 5. submessage fields // 6. repeated fields // // This has the following nice properties: // // 1. padding alignment is (nearly) minimized. // 2. fields that might have defaults (1-4) are segregated // from fields that are always zero-initialized (5-7). // // We skip oneof fields, because they are emitted in a separate pass. int64_t rank; if (field->containing_oneof()) { fprintf(stderr, "shouldn't have oneofs here.\n"); abort(); } else if (field->label() == protobuf::FieldDescriptor::LABEL_REPEATED) { rank = 6; } else { switch (field->cpp_type()) { case protobuf::FieldDescriptor::CPPTYPE_MESSAGE: rank = 5; break; case protobuf::FieldDescriptor::CPPTYPE_STRING: rank = 4; break; case protobuf::FieldDescriptor::CPPTYPE_BOOL: rank = 3; break; case protobuf::FieldDescriptor::CPPTYPE_FLOAT: case protobuf::FieldDescriptor::CPPTYPE_INT32: case protobuf::FieldDescriptor::CPPTYPE_UINT32: rank = 2; break; default: rank = 1; break; } } // Break ties with field number. return (rank << 29) | field->number(); } void MessageLayout::ComputeLayout(const protobuf::Descriptor* descriptor) { size_ = Size{0, 0}; maxalign_ = Size{0, 0}; PlaceNonOneofFields(descriptor); PlaceOneofFields(descriptor); // Align overall size up to max size. size_.AlignUp(maxalign_); } void MessageLayout::PlaceNonOneofFields( const protobuf::Descriptor* descriptor) { std::vector field_order; for (int i = 0; i < descriptor->field_count(); i++) { const protobuf::FieldDescriptor* field = descriptor->field(i); if (!field->containing_oneof()) { field_order.push_back(descriptor->field(i)); } } std::sort(field_order.begin(), field_order.end(), [](const protobuf::FieldDescriptor* a, const protobuf::FieldDescriptor* b) { return FieldLayoutRank(a) < FieldLayoutRank(b); }); // Place/count hasbits. int hasbit_count = 0; for (auto field : field_order) { if (HasHasbit(field)) { // We don't use hasbit 0, so that 0 can indicate "no presence" in the // table. This wastes one hasbit, but we don't worry about it for now. hasbit_indexes_[field] = ++hasbit_count; } } // Place hasbits at the beginning. int64_t hasbit_bytes = DivRoundUp(hasbit_count, 8); Place(SizeAndAlign{{hasbit_bytes, hasbit_bytes}, {1, 1}}); // Place non-oneof fields. for (auto field : field_order) { field_offsets_[field] = Place(SizeOf(field)); } } void MessageLayout::PlaceOneofFields(const protobuf::Descriptor* descriptor) { std::vector oneof_order; for (int i = 0; i < descriptor->oneof_decl_count(); i++) { oneof_order.push_back(descriptor->oneof_decl(i)); } std::sort(oneof_order.begin(), oneof_order.end(), [](const protobuf::OneofDescriptor* a, const protobuf::OneofDescriptor* b) { return a->full_name() < b->full_name(); }); for (auto oneof : oneof_order) { SizeAndAlign oneof_maxsize{{0, 0}, {0, 0}}; // Calculate max size. for (int i = 0; i < oneof->field_count(); i++) { oneof_maxsize.MaxFrom(SizeOf(oneof->field(i))); } // Place discriminator enum and data. Size data = Place(oneof_maxsize); Size discriminator = Place(SizeAndAlign{{4, 4}, {4, 4}}); oneof_case_offsets_[oneof] = discriminator; for (int i = 0; i < oneof->field_count(); i++) { field_offsets_[oneof->field(i)] = data; } } } } // namespace upbc