From d22d6d71ed54bc83db0d99a0d007e35bf918f3a1 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 21 Nov 2022 10:33:56 -0800 Subject: [PATCH] Refactored message accessors to share a common set of functions instead of duplicating logic. Prior to this CL, there were several different code paths for reading/writing message data. Generated code, MiniTable accessors, and reflection all performed direct manipulation of the bits and bytes in a message, but they all had distinct implementations that did not share much of any code. This divergence meant that they could easily have different behavior, bugs could creep into one but not another, and we would need three different sets of tests to get full test coverage. This also made it very difficult to change the internal representation in any way, since it would require updating many places in the code. With this CL, the three different APIs for accessing message data now all share a common set of functions. The common functions all take a `upb_MiniTableField` as the canonical description of a field's type and layout. The lowest-level functions are very branchy, as they must test for every possible variation in the field type (field vs oneof, hasbit vs no-hasbit, different field sizes, whether a nonzero default value exists, extension vs. regular field), however these functions are declared inline and designed to be very optimizable when values are known at compile time. In generated accessors, for example, we can declare constant `upb_MiniTableField` instances so that all values can constant-propagate, and we can get fully specialized code even though we are calling a generic function. On the other hand, when we use the generic functions from reflection, we get runtime branches since values are not known at compile time. But even the function is written to still be as efficient as possible even when used from reflection. For example, we use memcpy() calls with constant length so that the compiler can optimize these into inline loads/stores without having to make an out-of-line call to memcpy(). In this way, this CL should be a benefit to both correctness and performance. It will also make it easier to change the message representation, for example to optimize the encoder by giving hasbits to all fields. Note that we have not completely consolidated all access in this CL: 1. Some functions outside of get/set such as clear and hazzers are not yet unified. 2. The encoder and decoder still touch the message without going through the common functions. The encoder and decoder require a bit more specialized code to get good performance when reading/writing fields en masse. PiperOrigin-RevId: 490016095 --- upb/message/accessors.h | 281 ++++++++++++++++++++++++++------ upb/message/internal.h | 1 + upb/message/message.c | 1 + upb/mini_table/common.h | 5 + upb/mini_table/decode.c | 61 +++---- upb/mini_table/decode.h | 18 +- upb/mini_table/field_internal.h | 8 +- upb/port/def.inc | 5 - upb/reflection/field_def.c | 19 ++- upb/reflection/message.c | 44 +---- upb/wire/encode.c | 2 +- upbc/file_layout.cc | 5 +- upbc/file_layout.h | 19 ++- upbc/protoc-gen-upb.cc | 281 +++++++++++++------------------- 14 files changed, 436 insertions(+), 314 deletions(-) diff --git a/upb/message/accessors.h b/upb/message/accessors.h index 8c41dc969d..8d461e0717 100644 --- a/upb/message/accessors.h +++ b/upb/message/accessors.h @@ -29,6 +29,7 @@ #define UPB_MESSAGE_ACCESSORS_H_ #include "upb/collections/array.h" +#include "upb/message/extension_internal.h" #include "upb/message/internal.h" #include "upb/mini_table/common.h" #include "upb/mini_table/enum_internal.h" @@ -45,6 +46,16 @@ UPB_INLINE bool _upb_MiniTableField_InOneOf(const upb_MiniTableField* field) { return field->presence < 0; } +UPB_INLINE void* _upb_MiniTableField_GetPtr(upb_Message* msg, + const upb_MiniTableField* field) { + return (char*)msg + field->offset; +} + +UPB_INLINE const void* _upb_MiniTableField_GetConstPtr( + const upb_Message* msg, const upb_MiniTableField* field) { + return (char*)msg + field->offset; +} + UPB_INLINE void _upb_MiniTable_SetPresence(upb_Message* msg, const upb_MiniTableField* field) { if (field->presence > 0) { @@ -54,6 +65,142 @@ UPB_INLINE void _upb_MiniTable_SetPresence(upb_Message* msg, } } +UPB_INLINE bool upb_MiniTable_HasField(const upb_Message* msg, + const upb_MiniTableField* field); + +UPB_INLINE bool _upb_MiniTable_DefaultIsNonZero( + const void* default_val, const upb_MiniTableField* field) { + char zero[16] = {0}; + switch (_upb_MiniTableField_GetRep(field)) { + case kUpb_FieldRep_1Byte: + return memcmp(&zero, default_val, 1) != 0; + case kUpb_FieldRep_4Byte: + return memcmp(&zero, default_val, 4) != 0; + case kUpb_FieldRep_8Byte: + return memcmp(&zero, default_val, 8) != 0; + case kUpb_FieldRep_StringView: { + const upb_StringView* sv = (const upb_StringView*)default_val; + return sv->size != 0; + } + } + UPB_UNREACHABLE(); +} + +UPB_INLINE void _upb_MiniTable_CopyFieldData(void* to, const void* from, + const upb_MiniTableField* field) { + switch (_upb_MiniTableField_GetRep(field)) { + case kUpb_FieldRep_1Byte: + memcpy(to, from, 1); + return; + case kUpb_FieldRep_4Byte: + memcpy(to, from, 4); + return; + case kUpb_FieldRep_8Byte: + memcpy(to, from, 8); + return; + case kUpb_FieldRep_StringView: { + memcpy(to, from, sizeof(upb_StringView)); + return; + } + } + UPB_UNREACHABLE(); +} + +// Here we define universal getter/setter functions for message fields. +// These look very branchy and inefficient, but as long as the MiniTableField +// values are known at compile time, all the branches are optimized away and +// we are left with ideal code. This can happen either through through +// literals or UPB_ASSUME(): +// +// // Via string literals. +// bool FooMessage_set_bool_field(const upb_Message* msg, bool val) { +// const upb_MiniTableField field = {1, 0, 0, /* etc... */}; +// // All value in "field" are compile-time known. +// _upb_MiniTable_SetNonExtensionField(msg, &field, &value); +// } +// +// // Via UPB_ASSUME(). +// UPB_INLINE void upb_MiniTable_SetBool(upb_Message* msg, +// const upb_MiniTableField* field, +// bool value) { +// UPB_ASSUME(field->descriptortype == kUpb_FieldType_Bool); +// UPB_ASSUME(!upb_IsRepeatedOrMap(field)); +// UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_1Byte); +// _upb_MiniTable_SetNonExtensionField(msg, field, &value); +// } +// +// As a result, we can use these universal getters/setters for *all* message +// accessors: generated code, MiniTable accessors, and reflection. The only +// exception is the binary encoder/decoder, which need to be a bit more clever +// about how the read/write the message data, for efficiency. + +static UPB_FORCEINLINE void _upb_MiniTable_GetNonExtensionField( + const upb_Message* msg, const upb_MiniTableField* field, + const void* default_val, void* val) { + UPB_ASSUME(!upb_MiniTableField_IsExtension(field)); + if ((_upb_MiniTableField_InOneOf(field) || + _upb_MiniTable_DefaultIsNonZero(default_val, field)) && + !upb_MiniTable_HasField(msg, field)) { + _upb_MiniTable_CopyFieldData(val, default_val, field); + return; + } + _upb_MiniTable_CopyFieldData(val, _upb_MiniTableField_GetConstPtr(msg, field), + field); +} + +UPB_INLINE void _upb_MiniTable_GetExtensionField( + const upb_Message* msg, const upb_MiniTableExtension* mt_ext, + const void* default_val, void* val) { + UPB_ASSUME(upb_MiniTableField_IsExtension(&mt_ext->field)); + const upb_Message_Extension* ext = _upb_Message_Getext(msg, mt_ext); + if (ext) { + _upb_MiniTable_CopyFieldData(val, &ext->data, &mt_ext->field); + } else { + _upb_MiniTable_CopyFieldData(val, default_val, &mt_ext->field); + } +} + +UPB_INLINE void _upb_MiniTable_GetField(const upb_Message* msg, + const upb_MiniTableField* field, + const void* default_val, void* val) { + if (upb_MiniTableField_IsExtension(field)) { + _upb_MiniTable_GetExtensionField(msg, (upb_MiniTableExtension*)field, + default_val, val); + } else { + _upb_MiniTable_GetNonExtensionField(msg, field, default_val, val); + } +} + +UPB_INLINE void _upb_MiniTable_SetNonExtensionField( + upb_Message* msg, const upb_MiniTableField* field, const void* val) { + UPB_ASSUME(!upb_MiniTableField_IsExtension(field)); + _upb_MiniTable_SetPresence(msg, field); + _upb_MiniTable_CopyFieldData(_upb_MiniTableField_GetPtr(msg, field), val, + field); +} + +UPB_INLINE bool _upb_MiniTable_SetExtensionField( + upb_Message* msg, const upb_MiniTableExtension* mt_ext, const void* val, + upb_Arena* a) { + upb_Message_Extension* ext = + _upb_Message_GetOrCreateExtension(msg, mt_ext, a); + if (!ext) return false; + _upb_MiniTable_CopyFieldData(&ext->data, val, &mt_ext->field); + return true; +} + +UPB_INLINE bool _upb_MiniTable_SetField(upb_Message* msg, + const upb_MiniTableField* field, + const void* val, upb_Arena* a) { + if (upb_MiniTableField_IsExtension(field)) { + return _upb_MiniTable_SetExtensionField( + msg, (const upb_MiniTableExtension*)field, val, a); + } else { + _upb_MiniTable_SetNonExtensionField(msg, field, val); + return true; + } +} + // EVERYTHING ABOVE THIS LINE IS INTERNAL - DO NOT USE ///////////////////////// void upb_MiniTable_ClearField(upb_Message* msg, @@ -72,56 +219,68 @@ UPB_INLINE bool upb_MiniTable_HasField(const upb_Message* msg, UPB_INLINE bool upb_MiniTable_GetBool(const upb_Message* msg, const upb_MiniTableField* field, bool default_val) { - UPB_ASSERT(field->descriptortype == kUpb_FieldType_Bool); - if (default_val && !upb_MiniTable_HasField(msg, field)) return default_val; - return *UPB_PTR_AT(msg, field->offset, bool); + UPB_ASSUME(field->descriptortype == kUpb_FieldType_Bool); + UPB_ASSUME(!upb_IsRepeatedOrMap(field)); + UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_1Byte); + bool ret; + _upb_MiniTable_GetNonExtensionField(msg, field, &default_val, &ret); + return ret; } UPB_INLINE void upb_MiniTable_SetBool(upb_Message* msg, const upb_MiniTableField* field, bool value) { - UPB_ASSERT(field->descriptortype == kUpb_FieldType_Bool); - _upb_MiniTable_SetPresence(msg, field); - *UPB_PTR_AT(msg, field->offset, bool) = value; + UPB_ASSUME(field->descriptortype == kUpb_FieldType_Bool); + UPB_ASSUME(!upb_IsRepeatedOrMap(field)); + UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_1Byte); + _upb_MiniTable_SetNonExtensionField(msg, field, &value); } UPB_INLINE int32_t upb_MiniTable_GetInt32(const upb_Message* msg, const upb_MiniTableField* field, int32_t default_val) { - UPB_ASSERT(field->descriptortype == kUpb_FieldType_Int32 || + UPB_ASSUME(field->descriptortype == kUpb_FieldType_Int32 || field->descriptortype == kUpb_FieldType_SInt32 || field->descriptortype == kUpb_FieldType_SFixed32 || field->descriptortype == kUpb_FieldType_Enum); - if (default_val && !upb_MiniTable_HasField(msg, field)) return default_val; - return *UPB_PTR_AT(msg, field->offset, int32_t); + UPB_ASSUME(!upb_IsRepeatedOrMap(field)); + UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_4Byte); + int32_t ret; + _upb_MiniTable_GetNonExtensionField(msg, field, &default_val, &ret); + return ret; } UPB_INLINE void upb_MiniTable_SetInt32(upb_Message* msg, const upb_MiniTableField* field, int32_t value) { - UPB_ASSERT(field->descriptortype == kUpb_FieldType_Int32 || + UPB_ASSUME(field->descriptortype == kUpb_FieldType_Int32 || field->descriptortype == kUpb_FieldType_SInt32 || field->descriptortype == kUpb_FieldType_SFixed32); - _upb_MiniTable_SetPresence(msg, field); - *UPB_PTR_AT(msg, field->offset, int32_t) = value; + UPB_ASSUME(!upb_IsRepeatedOrMap(field)); + UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_4Byte); + _upb_MiniTable_SetNonExtensionField(msg, field, &value); } UPB_INLINE uint32_t upb_MiniTable_GetUInt32(const upb_Message* msg, const upb_MiniTableField* field, uint32_t default_val) { - UPB_ASSERT(field->descriptortype == kUpb_FieldType_UInt32 || + UPB_ASSUME(field->descriptortype == kUpb_FieldType_UInt32 || field->descriptortype == kUpb_FieldType_Fixed32); - if (default_val && !upb_MiniTable_HasField(msg, field)) return default_val; - return *UPB_PTR_AT(msg, field->offset, uint32_t); + UPB_ASSUME(!upb_IsRepeatedOrMap(field)); + UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_4Byte); + uint32_t ret; + _upb_MiniTable_GetNonExtensionField(msg, field, &default_val, &ret); + return ret; } UPB_INLINE void upb_MiniTable_SetUInt32(upb_Message* msg, const upb_MiniTableField* field, uint32_t value) { - UPB_ASSERT(field->descriptortype == kUpb_FieldType_UInt32 || + UPB_ASSUME(field->descriptortype == kUpb_FieldType_UInt32 || field->descriptortype == kUpb_FieldType_Fixed32); - _upb_MiniTable_SetPresence(msg, field); - *UPB_PTR_AT(msg, field->offset, uint32_t) = value; + UPB_ASSUME(!upb_IsRepeatedOrMap(field)); + UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_4Byte); + _upb_MiniTable_SetNonExtensionField(msg, field, &value); } UPB_INLINE void upb_MiniTable_SetEnumProto2(upb_Message* msg, @@ -129,10 +288,11 @@ UPB_INLINE void upb_MiniTable_SetEnumProto2(upb_Message* msg, const upb_MiniTableField* field, int32_t value) { UPB_ASSERT(field->descriptortype == kUpb_FieldType_Enum); + UPB_ASSUME(!upb_IsRepeatedOrMap(field)); + UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_4Byte); UPB_ASSERT(upb_MiniTableEnum_CheckValue( upb_MiniTable_GetSubEnumTable(msg_mini_table, field), value)); - _upb_MiniTable_SetPresence(msg, field); - *UPB_PTR_AT(msg, field->offset, int32_t) = value; + _upb_MiniTable_SetNonExtensionField(msg, field, &value); } UPB_INLINE int64_t upb_MiniTable_GetInt64(const upb_Message* msg, @@ -141,8 +301,11 @@ UPB_INLINE int64_t upb_MiniTable_GetInt64(const upb_Message* msg, UPB_ASSERT(field->descriptortype == kUpb_FieldType_Int64 || field->descriptortype == kUpb_FieldType_SInt64 || field->descriptortype == kUpb_FieldType_SFixed64); - if (default_val && !upb_MiniTable_HasField(msg, field)) return default_val; - return *UPB_PTR_AT(msg, field->offset, int64_t); + UPB_ASSUME(!upb_IsRepeatedOrMap(field)); + UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_8Byte); + int64_t ret; + _upb_MiniTable_GetNonExtensionField(msg, field, &default_val, &ret); + return ret; } UPB_INLINE void upb_MiniTable_SetInt64(upb_Message* msg, @@ -151,8 +314,9 @@ UPB_INLINE void upb_MiniTable_SetInt64(upb_Message* msg, UPB_ASSERT(field->descriptortype == kUpb_FieldType_Int64 || field->descriptortype == kUpb_FieldType_SInt64 || field->descriptortype == kUpb_FieldType_SFixed64); - _upb_MiniTable_SetPresence(msg, field); - *UPB_PTR_AT(msg, field->offset, int64_t) = value; + UPB_ASSUME(!upb_IsRepeatedOrMap(field)); + UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_8Byte); + _upb_MiniTable_SetNonExtensionField(msg, field, &value); } UPB_INLINE uint64_t upb_MiniTable_GetUInt64(const upb_Message* msg, @@ -160,8 +324,11 @@ UPB_INLINE uint64_t upb_MiniTable_GetUInt64(const upb_Message* msg, uint64_t default_val) { UPB_ASSERT(field->descriptortype == kUpb_FieldType_UInt64 || field->descriptortype == kUpb_FieldType_Fixed64); - if (default_val && !upb_MiniTable_HasField(msg, field)) return default_val; - return *UPB_PTR_AT(msg, field->offset, uint64_t); + UPB_ASSUME(!upb_IsRepeatedOrMap(field)); + UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_8Byte); + uint64_t ret; + _upb_MiniTable_GetNonExtensionField(msg, field, &default_val, &ret); + return ret; } UPB_INLINE void upb_MiniTable_SetUInt64(upb_Message* msg, @@ -169,40 +336,49 @@ UPB_INLINE void upb_MiniTable_SetUInt64(upb_Message* msg, uint64_t value) { UPB_ASSERT(field->descriptortype == kUpb_FieldType_UInt64 || field->descriptortype == kUpb_FieldType_Fixed64); - _upb_MiniTable_SetPresence(msg, field); - *UPB_PTR_AT(msg, field->offset, uint64_t) = value; + UPB_ASSUME(!upb_IsRepeatedOrMap(field)); + UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_8Byte); + _upb_MiniTable_SetNonExtensionField(msg, field, &value); } UPB_INLINE float upb_MiniTable_GetFloat(const upb_Message* msg, const upb_MiniTableField* field, float default_val) { UPB_ASSERT(field->descriptortype == kUpb_FieldType_Float); - if (default_val && !upb_MiniTable_HasField(msg, field)) return default_val; - return *UPB_PTR_AT(msg, field->offset, float); + UPB_ASSUME(!upb_IsRepeatedOrMap(field)); + UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_4Byte); + float ret; + _upb_MiniTable_GetNonExtensionField(msg, field, &default_val, &ret); + return ret; } UPB_INLINE void upb_MiniTable_SetFloat(upb_Message* msg, const upb_MiniTableField* field, float value) { UPB_ASSERT(field->descriptortype == kUpb_FieldType_Float); - _upb_MiniTable_SetPresence(msg, field); - *UPB_PTR_AT(msg, field->offset, float) = value; + UPB_ASSUME(!upb_IsRepeatedOrMap(field)); + UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_4Byte); + _upb_MiniTable_SetNonExtensionField(msg, field, &value); } UPB_INLINE double upb_MiniTable_GetDouble(const upb_Message* msg, const upb_MiniTableField* field, double default_val) { UPB_ASSERT(field->descriptortype == kUpb_FieldType_Double); - if (default_val && !upb_MiniTable_HasField(msg, field)) return default_val; - return *UPB_PTR_AT(msg, field->offset, double); + UPB_ASSUME(!upb_IsRepeatedOrMap(field)); + UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_8Byte); + double ret; + _upb_MiniTable_GetNonExtensionField(msg, field, &default_val, &ret); + return ret; } UPB_INLINE void upb_MiniTable_SetDouble(upb_Message* msg, const upb_MiniTableField* field, double value) { UPB_ASSERT(field->descriptortype == kUpb_FieldType_Double); - _upb_MiniTable_SetPresence(msg, field); - *UPB_PTR_AT(msg, field->offset, double) = value; + UPB_ASSUME(!upb_IsRepeatedOrMap(field)); + UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_8Byte); + _upb_MiniTable_SetNonExtensionField(msg, field, &value); } UPB_INLINE upb_StringView @@ -210,8 +386,11 @@ upb_MiniTable_GetString(const upb_Message* msg, const upb_MiniTableField* field, upb_StringView def_val) { UPB_ASSERT(field->descriptortype == kUpb_FieldType_Bytes || field->descriptortype == kUpb_FieldType_String); - if (def_val.size && !upb_MiniTable_HasField(msg, field)) return def_val; - return *UPB_PTR_AT(msg, field->offset, upb_StringView); + UPB_ASSUME(!upb_IsRepeatedOrMap(field)); + UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_StringView); + upb_StringView ret; + _upb_MiniTable_GetNonExtensionField(msg, field, &def_val, &ret); + return ret; } UPB_INLINE void upb_MiniTable_SetString(upb_Message* msg, @@ -219,8 +398,9 @@ UPB_INLINE void upb_MiniTable_SetString(upb_Message* msg, upb_StringView value) { UPB_ASSERT(field->descriptortype == kUpb_FieldType_Bytes || field->descriptortype == kUpb_FieldType_String); - _upb_MiniTable_SetPresence(msg, field); - *UPB_PTR_AT(msg, field->offset, upb_StringView) = value; + UPB_ASSUME(!upb_IsRepeatedOrMap(field)); + UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_StringView); + _upb_MiniTable_SetNonExtensionField(msg, field, &value); } UPB_INLINE const upb_Message* upb_MiniTable_GetMessage( @@ -228,8 +408,12 @@ UPB_INLINE const upb_Message* upb_MiniTable_GetMessage( upb_Message* default_val) { UPB_ASSERT(field->descriptortype == kUpb_FieldType_Message || field->descriptortype == kUpb_FieldType_Group); - if (default_val && !upb_MiniTable_HasField(msg, field)) return default_val; - return *UPB_PTR_AT(msg, field->offset, const upb_Message*); + UPB_ASSUME(!upb_IsRepeatedOrMap(field)); + UPB_ASSUME(_upb_MiniTableField_GetRep(field) == + UPB_SIZE(kUpb_FieldRep_4Byte, kUpb_FieldRep_8Byte)); + upb_Message* ret; + _upb_MiniTable_GetNonExtensionField(msg, field, &default_val, &ret); + return ret; } UPB_INLINE void upb_MiniTable_SetMessage(upb_Message* msg, @@ -238,9 +422,11 @@ UPB_INLINE void upb_MiniTable_SetMessage(upb_Message* msg, upb_Message* sub_message) { UPB_ASSERT(field->descriptortype == kUpb_FieldType_Message || field->descriptortype == kUpb_FieldType_Group); + UPB_ASSUME(!upb_IsRepeatedOrMap(field)); + UPB_ASSUME(_upb_MiniTableField_GetRep(field) == + UPB_SIZE(kUpb_FieldRep_4Byte, kUpb_FieldRep_8Byte)); UPB_ASSERT(mini_table->subs[field->submsg_index].submsg); - _upb_MiniTable_SetPresence(msg, field); - *UPB_PTR_AT(msg, field->offset, const upb_Message*) = sub_message; + _upb_MiniTable_SetNonExtensionField(msg, field, &sub_message); } UPB_INLINE upb_Message* upb_MiniTable_GetMutableMessage( @@ -262,12 +448,15 @@ UPB_INLINE upb_Message* upb_MiniTable_GetMutableMessage( UPB_INLINE const upb_Array* upb_MiniTable_GetArray( const upb_Message* msg, const upb_MiniTableField* field) { - return (const upb_Array*)*UPB_PTR_AT(msg, field->offset, upb_Array*); + const upb_Array* ret; + const upb_Array* default_val = NULL; + _upb_MiniTable_GetNonExtensionField(msg, field, &default_val, &ret); + return ret; } UPB_INLINE upb_Array* upb_MiniTable_GetMutableArray( upb_Message* msg, const upb_MiniTableField* field) { - return (upb_Array*)*UPB_PTR_AT(msg, field->offset, upb_Array*); + return (upb_Array*)upb_MiniTable_GetArray(msg, field); } void* upb_MiniTable_ResizeArray(upb_Message* msg, diff --git a/upb/message/internal.h b/upb/message/internal.h index 7a9dcd8b41..36f10e7a5a 100644 --- a/upb/message/internal.h +++ b/upb/message/internal.h @@ -55,6 +55,7 @@ extern "C" { extern const float kUpb_FltInfinity; extern const double kUpb_Infinity; +extern const double kUpb_NaN; /* Internal members of a upb_Message that track unknown fields and/or * extensions. We can change this without breaking binary compatibility. We put diff --git a/upb/message/message.c b/upb/message/message.c index 7d9001f494..c1b0d36363 100644 --- a/upb/message/message.c +++ b/upb/message/message.c @@ -37,6 +37,7 @@ const float kUpb_FltInfinity = INFINITY; const double kUpb_Infinity = INFINITY; +const double kUpb_NaN = NAN; static const size_t overhead = sizeof(upb_Message_InternalData); diff --git a/upb/mini_table/common.h b/upb/mini_table/common.h index 19f4f2a34e..91643b2559 100644 --- a/upb/mini_table/common.h +++ b/upb/mini_table/common.h @@ -58,6 +58,11 @@ const upb_MiniTableField* upb_MiniTable_FindFieldByNumber( upb_FieldType upb_MiniTableField_Type(const upb_MiniTableField* field); +UPB_INLINE bool upb_MiniTableField_IsExtension( + const upb_MiniTableField* field) { + return field->mode & kUpb_LabelFlags_IsExtension; +} + UPB_INLINE const upb_MiniTable* upb_MiniTable_GetSubMessageTable( const upb_MiniTable* mini_table, const upb_MiniTableField* field) { return mini_table->subs[field->submsg_index].submsg; diff --git a/upb/mini_table/decode.c b/upb/mini_table/decode.c index a6edf8dd8c..9c8b4175de 100644 --- a/upb/mini_table/decode.c +++ b/upb/mini_table/decode.c @@ -663,46 +663,30 @@ static void upb_MtDecoder_AssignOffsets(upb_MtDecoder* d) { d->table->size = UPB_ALIGN_UP(d->table->size, 8); } -static void upb_MiniTable_BuildMapEntry(upb_MtDecoder* d, - upb_FieldType key_type, - upb_FieldType value_type, - bool value_is_proto3_enum) { +static void upb_MiniTable_BuildMapEntry(upb_MtDecoder* d, char key_type, + char val_type) { upb_MiniTableField* fields = upb_Arena_Malloc(d->arena, sizeof(*fields) * 2); if (!fields) { upb_MtDecoder_ErrorFormat(d, "OOM while building map mini table field"); UPB_UNREACHABLE(); } - upb_MiniTableSub* subs = NULL; - if (value_is_proto3_enum) { - UPB_ASSERT(value_type == kUpb_FieldType_Enum); - // No sub needed. - } else if (value_type == kUpb_FieldType_Message || - value_type == kUpb_FieldType_Group || - value_type == kUpb_FieldType_Enum) { - subs = upb_Arena_Malloc(d->arena, sizeof(*subs)); - if (!subs) { - upb_MtDecoder_ErrorFormat(d, "OOM while building map mini table sub"); - UPB_UNREACHABLE(); - } - } - size_t field_size = upb_MtDecoder_SizeOfRep(kUpb_FieldRep_StringView, d->platform); + uint32_t sub_count = 0; fields[0].number = 1; fields[1].number = 2; - fields[0].mode = kUpb_FieldMode_Scalar; - fields[1].mode = kUpb_FieldMode_Scalar; + upb_MiniTable_SetField(d, key_type, &fields[0], 0, &sub_count); + upb_MiniTable_SetField(d, val_type, &fields[1], 0, &sub_count); + upb_MtDecoder_AllocateSubs(d, sub_count); + + // Map entries have a pre-determined layout, regardless of types. fields[0].presence = 0; fields[1].presence = 0; fields[0].offset = 0; fields[1].offset = field_size; - upb_MiniTable_SetTypeAndSub(&fields[0], key_type, NULL, 0, false); - upb_MiniTable_SetTypeAndSub(&fields[1], value_type, NULL, 0, - value_is_proto3_enum); - upb_MiniTable* ret = d->table; ret->size = UPB_ALIGN_UP(2 * field_size, 8); ret->field_count = 2; @@ -710,7 +694,6 @@ static void upb_MiniTable_BuildMapEntry(upb_MtDecoder* d, ret->dense_below = 2; ret->table_mask = -1; ret->required_count = 0; - ret->subs = subs; ret->fields = fields; } @@ -720,9 +703,8 @@ static void upb_MtDecoder_ParseMap(upb_MtDecoder* d, const char* data, upb_MtDecoder_ErrorFormat(d, "Invalid map encode length: %zu", len); UPB_UNREACHABLE(); } - const upb_EncodedType e0 = _upb_FromBase92(data[0]); - const upb_EncodedType e1 = _upb_FromBase92(data[1]); - switch (e0) { + const upb_EncodedType key_type = _upb_FromBase92(data[0]); + switch (key_type) { case kUpb_EncodedType_Fixed32: case kUpb_EncodedType_Fixed64: case kUpb_EncodedType_SFixed32: @@ -738,17 +720,10 @@ static void upb_MtDecoder_ParseMap(upb_MtDecoder* d, const char* data, break; default: - upb_MtDecoder_ErrorFormat(d, "Invalid map key field type: %d", e0); + upb_MtDecoder_ErrorFormat(d, "Invalid map key field type: %d", key_type); UPB_UNREACHABLE(); } - if (e1 >= sizeof(kUpb_EncodedToType)) { - upb_MtDecoder_ErrorFormat(d, "Invalid map value field type: %d", e1); - UPB_UNREACHABLE(); - } - const upb_FieldType key_type = kUpb_EncodedToType[e0]; - const upb_FieldType val_type = kUpb_EncodedToType[e1]; - const bool value_is_proto3_enum = (e1 == kUpb_EncodedType_OpenEnum); - upb_MiniTable_BuildMapEntry(d, key_type, val_type, value_is_proto3_enum); + upb_MiniTable_BuildMapEntry(d, data[0], data[1]); } static void upb_MtDecoder_ParseMessageSet(upb_MtDecoder* d, const char* data, @@ -926,15 +901,17 @@ upb_MiniTableEnum* upb_MiniTable_BuildEnum(const char* data, size_t len, return decoder.enum_table; } -const char* upb_MiniTable_BuildExtension(const char* data, size_t len, - upb_MiniTableExtension* ext, - const upb_MiniTable* extendee, - upb_MiniTableSub sub, - upb_Status* status) { +const char* _upb_MiniTable_BuildExtension(const char* data, size_t len, + upb_MiniTableExtension* ext, + const upb_MiniTable* extendee, + upb_MiniTableSub sub, + upb_MiniTablePlatform platform, + upb_Status* status) { upb_MtDecoder decoder = { .arena = NULL, .status = status, .table = NULL, + .platform = platform, }; if (UPB_SETJMP(decoder.err)) return NULL; diff --git a/upb/mini_table/decode.h b/upb/mini_table/decode.h index 8ac4baf2fb..9f79ed7b42 100644 --- a/upb/mini_table/decode.h +++ b/upb/mini_table/decode.h @@ -69,11 +69,19 @@ void upb_MiniTable_SetSubMessage(upb_MiniTable* table, void upb_MiniTable_SetSubEnum(upb_MiniTable* table, upb_MiniTableField* field, const upb_MiniTableEnum* sub); -const char* upb_MiniTable_BuildExtension(const char* data, size_t len, - upb_MiniTableExtension* ext, - const upb_MiniTable* extendee, - upb_MiniTableSub sub, - upb_Status* status); +const char* _upb_MiniTable_BuildExtension(const char* data, size_t len, + upb_MiniTableExtension* ext, + const upb_MiniTable* extendee, + upb_MiniTableSub sub, + upb_MiniTablePlatform platform, + upb_Status* status); + +UPB_INLINE const char* upb_MiniTable_BuildExtension( + const char* data, size_t len, upb_MiniTableExtension* ext, + const upb_MiniTable* extendee, upb_MiniTableSub sub, upb_Status* status) { + return _upb_MiniTable_BuildExtension(data, len, ext, extendee, sub, + kUpb_MiniTablePlatform_Native, status); +} upb_MiniTableEnum* upb_MiniTable_BuildEnum(const char* data, size_t len, upb_Arena* arena, diff --git a/upb/mini_table/field_internal.h b/upb/mini_table/field_internal.h index 2f0ba7bb40..2db708315e 100644 --- a/upb/mini_table/field_internal.h +++ b/upb/mini_table/field_internal.h @@ -75,10 +75,16 @@ typedef enum { kUpb_FieldRep_StringView = 2, kUpb_FieldRep_8Byte = 3, - kUpb_FieldRep_Shift = 6, // Bit offset of the rep in upb_MiniTableField.mode kUpb_FieldRep_Max = kUpb_FieldRep_8Byte, } upb_FieldRep; +#define kUpb_FieldRep_Shift 6 + +UPB_INLINE upb_FieldRep +_upb_MiniTableField_GetRep(const upb_MiniTableField* field) { + return (upb_FieldRep)(field->mode >> kUpb_FieldRep_Shift); +} + #ifdef __cplusplus extern "C" { #endif diff --git a/upb/port/def.inc b/upb/port/def.inc index 86d16382df..90b9df8651 100644 --- a/upb/port/def.inc +++ b/upb/port/def.inc @@ -80,11 +80,6 @@ */ #define UPB_PTR_AT(msg, ofs, type) ((type*)((char*)(msg) + (ofs))) -#define UPB_READ_ONEOF(msg, fieldtype, offset, case_offset, case_val, default) \ - *UPB_PTR_AT(msg, case_offset, int) == case_val \ - ? *UPB_PTR_AT(msg, offset, fieldtype) \ - : default - #define UPB_WRITE_ONEOF(msg, fieldtype, offset, value, case_offset, case_val) \ *UPB_PTR_AT(msg, case_offset, int) = case_val; \ *UPB_PTR_AT(msg, offset, fieldtype) = value; diff --git a/upb/reflection/field_def.c b/upb/reflection/field_def.c index 7c8e5d0339..c1b9e13cd9 100644 --- a/upb/reflection/field_def.c +++ b/upb/reflection/field_def.c @@ -29,6 +29,7 @@ #include #include "upb/mini_table/decode.h" +#include "upb/reflection/def.h" #include "upb/reflection/def_builder_internal.h" #include "upb/reflection/def_pool.h" #include "upb/reflection/def_type.h" @@ -63,6 +64,7 @@ struct upb_FieldDef { float flt; bool boolean; str_t* str; + void* msg; // Always NULL. } defaultval; union { const upb_OneofDef* oneof; @@ -186,9 +188,12 @@ const upb_OneofDef* upb_FieldDef_RealContainingOneof(const upb_FieldDef* f) { } upb_MessageValue upb_FieldDef_Default(const upb_FieldDef* f) { - UPB_ASSERT(!upb_FieldDef_IsSubMessage(f)); upb_MessageValue ret; + if (upb_FieldDef_IsRepeated(f) || upb_FieldDef_IsSubMessage(f)) { + return (upb_MessageValue){.msg_val = NULL}; + } + switch (upb_FieldDef_CType(f)) { case kUpb_CType_Bool: return (upb_MessageValue){.bool_val = f->defaultval.boolean}; @@ -232,9 +237,14 @@ const upb_EnumDef* upb_FieldDef_EnumSubDef(const upb_FieldDef* f) { } const upb_MiniTableField* upb_FieldDef_MiniTable(const upb_FieldDef* f) { - UPB_ASSERT(!upb_FieldDef_IsExtension(f)); - const upb_MiniTable* layout = upb_MessageDef_MiniTable(f->msgdef); - return &layout->fields[f->layout_index]; + if (upb_FieldDef_IsExtension(f)) { + const upb_FileDef* file = upb_FieldDef_File(f); + return (upb_MiniTableField*)_upb_FileDef_ExtensionMiniTable( + file, f->layout_index); + } else { + const upb_MiniTable* layout = upb_MessageDef_MiniTable(f->msgdef); + return &layout->fields[f->layout_index]; + } } const upb_MiniTableExtension* _upb_FieldDef_ExtensionMiniTable( @@ -531,6 +541,7 @@ static void set_default_default(upb_DefBuilder* ctx, upb_FieldDef* f) { case kUpb_CType_Enum: { const upb_EnumValueDef* v = upb_EnumDef_Value(f->sub.enumdef, 0); f->defaultval.sint = upb_EnumValueDef_Number(v); + break; } case kUpb_CType_Message: break; diff --git a/upb/reflection/message.c b/upb/reflection/message.c index 3de55eb415..7fad293801 100644 --- a/upb/reflection/message.c +++ b/upb/reflection/message.c @@ -32,6 +32,7 @@ #include "upb/collections/map.h" #include "upb/hash/common.h" #include "upb/message/message.h" +#include "upb/reflection/def.h" #include "upb/reflection/def_pool.h" #include "upb/reflection/def_type.h" #include "upb/reflection/field_def_internal.h" @@ -115,20 +116,10 @@ const upb_FieldDef* upb_Message_WhichOneof(const upb_Message* msg, upb_MessageValue upb_Message_Get(const upb_Message* msg, const upb_FieldDef* f) { - if (upb_FieldDef_IsExtension(f)) { - const upb_Message_Extension* ext = - _upb_Message_Getext(msg, _upb_FieldDef_ExtensionMiniTable(f)); - if (ext) { - upb_MessageValue val; - memcpy(&val, &ext->data, sizeof(val)); - return val; - } else if (upb_FieldDef_IsRepeated(f)) { - return (upb_MessageValue){.array_val = NULL}; - } - } else if (!upb_FieldDef_HasPresence(f) || upb_Message_Has(msg, f)) { - return _upb_Message_Getraw(msg, f); - } - return upb_FieldDef_Default(f); + upb_MessageValue default_val = upb_FieldDef_Default(f); + upb_MessageValue ret; + _upb_MiniTable_GetField(msg, upb_FieldDef_MiniTable(f), &default_val, &ret); + return ret; } upb_MutableMessageValue upb_Message_Mutable(upb_Message* msg, @@ -172,30 +163,7 @@ make: bool upb_Message_Set(upb_Message* msg, const upb_FieldDef* f, upb_MessageValue val, upb_Arena* a) { - if (upb_FieldDef_IsExtension(f)) { - upb_Message_Extension* ext = _upb_Message_GetOrCreateExtension( - msg, _upb_FieldDef_ExtensionMiniTable(f), a); - if (!ext) return false; - memcpy(&ext->data, &val, sizeof(val)); - } else { - const upb_MiniTableField* field = upb_FieldDef_MiniTable(f); - - // Building reflection should always cause all sub-message types to be - // linked, but double-check here just for extra assurance. - UPB_ASSERT(!upb_FieldDef_IsSubMessage(f) || - upb_MessageDef_MiniTable(upb_FieldDef_ContainingType(f)) - ->subs[field->submsg_index] - .submsg); - - char* mem = UPB_PTR_AT(msg, field->offset, char); - memcpy(mem, &val, get_field_size(field)); - if (field->presence > 0) { - _upb_sethas_field(msg, field); - } else if (in_oneof(field)) { - *_upb_oneofcase_field(msg, field) = field->number; - } - } - return true; + return _upb_MiniTable_SetField(msg, upb_FieldDef_MiniTable(f), &val, a); } void upb_Message_ClearField(upb_Message* msg, const upb_FieldDef* f) { diff --git a/upb/wire/encode.c b/upb/wire/encode.c index 14eb6aeca7..df826c0e8e 100644 --- a/upb/wire/encode.c +++ b/upb/wire/encode.c @@ -447,7 +447,7 @@ static bool encode_shouldencode(upb_encstate* e, const upb_Message* msg, if (f->presence == 0) { /* Proto3 presence or map/array. */ const void* mem = UPB_PTR_AT(msg, f->offset, void); - switch (f->mode >> kUpb_FieldRep_Shift) { + switch (_upb_MiniTableField_GetRep(f)) { case kUpb_FieldRep_1Byte: { char ch; memcpy(&ch, mem, 1); diff --git a/upbc/file_layout.cc b/upbc/file_layout.cc index fab686f918..f12bcceed1 100644 --- a/upbc/file_layout.cc +++ b/upbc/file_layout.cc @@ -269,8 +269,9 @@ void FilePlatformLayout::BuildExtensions(const protobuf::FileDescriptor* fd) { // for it, just for the purpose of building the extension. // Note, we are not caching so this could use more memory than is necessary. upb_MiniTable* extendee = MakeMiniTable(f->containing_type()); - bool ok = upb_MiniTable_BuildExtension(e.data().data(), e.data().size(), - &ext, extendee, sub, status.ptr()); + bool ok = + _upb_MiniTable_BuildExtension(e.data().data(), e.data().size(), &ext, + extendee, sub, platform_, status.ptr()); if (!ok) { // TODO(haberman): Use ABSL CHECK() when it is available. fprintf(stderr, "Error building mini-table: %s\n", diff --git a/upbc/file_layout.h b/upbc/file_layout.h index 0d5f537487..51ea2b72c0 100644 --- a/upbc/file_layout.h +++ b/upbc/file_layout.h @@ -161,6 +161,20 @@ class FileLayout { return layout64_.GetMiniTable(m); } + const upb_MiniTableField* GetField32( + const protobuf::FieldDescriptor* f) const { + if (f->is_extension()) return &layout32_.GetExtension(f)->field; + return upb_MiniTable_FindFieldByNumber(GetMiniTable32(f->containing_type()), + f->number()); + } + + const upb_MiniTableField* GetField64( + const protobuf::FieldDescriptor* f) const { + if (f->is_extension()) return &layout64_.GetExtension(f)->field; + return upb_MiniTable_FindFieldByNumber(GetMiniTable64(f->containing_type()), + f->number()); + } + const upb_MiniTableEnum* GetEnumTable( const protobuf::EnumDescriptor* d) const { return layout64_.GetEnumTable(d); @@ -197,11 +211,6 @@ class FileLayout { return GetHasbitIndex(f) > 0; } - const upb_MiniTableExtension* GetExtension( - const protobuf::FieldDescriptor* f) const { - return layout64_.GetExtension(f); - } - template static std::string UpbSize(T a, T b) { if (a == b) return absl::Substitute("$0", a); diff --git a/upbc/protoc-gen-upb.cc b/upbc/protoc-gen-upb.cc index b01ecd0326..0d301b52f9 100644 --- a/upbc/protoc-gen-upb.cc +++ b/upbc/protoc-gen-upb.cc @@ -23,6 +23,7 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#include #include #include "google/protobuf/descriptor.pb.h" @@ -169,6 +170,8 @@ std::string FloatToCLiteral(float value) { return "kUpb_FltInfinity"; } else if (value == -std::numeric_limits::infinity()) { return "-kUpb_FltInfinity"; + } else if (std::isnan(value)) { + return "kUpb_NaN"; } else { return absl::StrCat(value); } @@ -179,6 +182,8 @@ std::string DoubleToCLiteral(double value) { return "kUpb_Infinity"; } else if (value == -std::numeric_limits::infinity()) { return "-kUpb_Infinity"; + } else if (std::isnan(value)) { + return "kUpb_NaN"; } else { return absl::StrCat(value); } @@ -243,8 +248,11 @@ void DumpEnumValues(const protobuf::EnumDescriptor* desc, Output& output) { } } +std::string GetFieldRep(const FileLayout& layout, + const protobuf::FieldDescriptor* field); + void GenerateExtensionInHeader(const protobuf::FieldDescriptor* ext, - Output& output) { + const FileLayout& layout, Output& output) { output( R"cc( UPB_INLINE bool $0_has_$1(const struct $2* msg) { @@ -264,42 +272,36 @@ void GenerateExtensionInHeader(const protobuf::FieldDescriptor* ext, ExtensionLayout(ext)); if (ext->is_repeated()) { - } else if (ext->message_type()) { + // TODO(b/259861668): We need generated accessors for repeated extensions. + } else { output( R"cc( UPB_INLINE $0 $1_$2(const struct $3* msg) { - const upb_Message_Extension* ext = _upb_Message_Getext(msg, &$4); - UPB_ASSERT(ext); - return *UPB_PTR_AT(&ext->data, 0, $0); - } - )cc", - CTypeConst(ext), ExtensionIdentBase(ext), ext->name(), - MessageName(ext->containing_type()), ExtensionLayout(ext), - FieldDefault(ext)); - output( - R"cc( - UPB_INLINE void $1_set_$2(struct $3* msg, $0 ext, upb_Arena* arena) { - const upb_Message_Extension* msg_ext = - _upb_Message_GetOrCreateExtension(msg, &$4, arena); - UPB_ASSERT(msg_ext); - *UPB_PTR_AT(&msg_ext->data, 0, $0) = ext; + const upb_MiniTableExtension* ext = &$4; + UPB_ASSUME(!upb_IsRepeatedOrMap(&ext->field)); + UPB_ASSUME(_upb_MiniTableField_GetRep(&ext->field) == $5); + $0 default_val = $6; + $0 ret; + _upb_MiniTable_GetExtensionField(msg, ext, &default_val, &ret); + return ret; } )cc", CTypeConst(ext), ExtensionIdentBase(ext), ext->name(), MessageName(ext->containing_type()), ExtensionLayout(ext), - FieldDefault(ext)); - } else { - // Returns default if extension field is not a message. + GetFieldRep(layout, ext), FieldDefault(ext)); output( R"cc( - UPB_INLINE $0 $1_$2(const struct $3* msg) { - const upb_Message_Extension* ext = _upb_Message_Getext(msg, &$4); - return ext ? *UPB_PTR_AT(&ext->data, 0, $0) : $5; + UPB_INLINE void $1_set_$2(struct $3* msg, $0 val, upb_Arena* arena) { + const upb_MiniTableExtension* ext = &$4; + UPB_ASSUME(!upb_IsRepeatedOrMap(&ext->field)); + UPB_ASSUME(_upb_MiniTableField_GetRep(&ext->field) == $5); + bool ok = _upb_MiniTable_SetExtensionField(msg, ext, &val, arena); + UPB_ASSERT(ok); } )cc", CTypeConst(ext), ExtensionIdentBase(ext), ext->name(), MessageName(ext->containing_type()), ExtensionLayout(ext), - FieldDefault(ext)); + GetFieldRep(layout, ext)); } } @@ -590,83 +592,26 @@ void GenerateRepeatedGetters(const protobuf::FieldDescriptor* field, layout.GetFieldOffset(field)); } -void GenerateOneofGetters(const protobuf::FieldDescriptor* field, - const FileLayout& layout, absl::string_view msg_name, - const NameToFieldDescriptorMap& field_names, - Output& output) { - output( - R"cc( - UPB_INLINE $0 $1_$2(const $1* msg) { - return UPB_READ_ONEOF(msg, $0, $3, $4, $5, $6); - } - )cc", - CTypeConst(field), msg_name, ResolveFieldName(field, field_names), - layout.GetFieldOffset(field), - layout.GetOneofCaseOffset(field->real_containing_oneof()), - field->number(), FieldDefault(field)); -} - -std::string GetAccessor(const protobuf::FieldDescriptor* field) { - switch (field->cpp_type()) { - case protobuf::FieldDescriptor::CPPTYPE_BOOL: - return "upb_MiniTable_GetBool"; - - case protobuf::FieldDescriptor::CPPTYPE_INT32: - case protobuf::FieldDescriptor::CPPTYPE_ENUM: - return "upb_MiniTable_GetInt32"; - - case protobuf::FieldDescriptor::CPPTYPE_UINT32: - return "upb_MiniTable_GetUInt32"; - - case protobuf::FieldDescriptor::CPPTYPE_INT64: - return "upb_MiniTable_GetInt64"; - - case protobuf::FieldDescriptor::CPPTYPE_UINT64: - return "upb_MiniTable_GetUInt64"; - - case protobuf::FieldDescriptor::CPPTYPE_FLOAT: - return "upb_MiniTable_GetFloat"; - - case protobuf::FieldDescriptor::CPPTYPE_DOUBLE: - return "upb_MiniTable_GetDouble"; - - case protobuf::FieldDescriptor::CPPTYPE_STRING: - return "upb_MiniTable_GetString"; - - case protobuf::FieldDescriptor::CPPTYPE_MESSAGE: - return absl::StrCat("(", CTypeConst(field), ")upb_MiniTable_GetMessage"); - - default: - fprintf(stderr, "unexpected type %d\n", field->cpp_type()); - abort(); - } -} - -void WriteField(const upb_MiniTableField* field64, - const upb_MiniTableField* field32, Output& output); +std::string FieldInitializer(const FileLayout& layout, + const protobuf::FieldDescriptor* field); void GenerateScalarGetters(const protobuf::FieldDescriptor* field, const FileLayout& layout, absl::string_view msg_name, const NameToFieldDescriptorMap& field_names, Output& output) { - const protobuf::Descriptor* message = field->containing_type(); - const upb_MiniTable* t32 = layout.GetMiniTable32(message); - const upb_MiniTable* t64 = layout.GetMiniTable64(message); - const upb_MiniTableField* f32 = - upb_MiniTable_FindFieldByNumber(t32, field->number()); - const upb_MiniTableField* f64 = - upb_MiniTable_FindFieldByNumber(t64, field->number()); - - std::string resolved_name = ResolveFieldName(field, field_names); + std::string field_name = ResolveFieldName(field, field_names); output( R"cc( UPB_INLINE $0 $1_$2(const $1* msg) { - const upb_MiniTableField field =)cc", - CTypeConst(field), msg_name, resolved_name); - WriteField(f64, f32, output); - output(";\n"); - output(" return $0(msg, &field, $1);\n}\n", GetAccessor(field), - FieldDefault(field)); + $0 default_val = $3; + $0 ret; + const upb_MiniTableField field = $4; + _upb_MiniTable_GetNonExtensionField(msg, &field, &default_val, &ret); + return ret; + } + )cc", + CTypeConst(field), msg_name, field_name, FieldDefault(field), + FieldInitializer(layout, field)); } void GenerateGetters(const protobuf::FieldDescriptor* field, @@ -679,8 +624,6 @@ void GenerateGetters(const protobuf::FieldDescriptor* field, GenerateMapEntryGetters(field, msg_name, output); } else if (field->is_repeated()) { GenerateRepeatedGetters(field, layout, msg_name, field_names, output); - } else if (field->real_containing_oneof()) { - GenerateOneofGetters(field, layout, msg_name, field_names, output); } else { GenerateScalarGetters(field, layout, msg_name, field_names, output); } @@ -787,34 +730,25 @@ void GenerateNonRepeatedSetters(const protobuf::FieldDescriptor* field, // Key cannot be mutated. return; } - std::string resolved_name = ResolveFieldName(field, field_names); - // The common function signature for all setters. Varying - // implementations follow. - output("UPB_INLINE void $0_set_$1($0 *msg, $2 value) {\n", msg_name, - resolved_name, CType(field)); + + std::string field_name = ResolveFieldName(field, field_names); if (field == field->containing_type()->map_value()) { - output( - " _upb_msg_map_set_value(msg, &value, $0);\n" - "}\n", - field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING - ? "0" - : "sizeof(" + CType(field) + ")"); - } else if (field->real_containing_oneof()) { - output( - " UPB_WRITE_ONEOF(msg, $0, $1, value, $2, $3);\n" - "}\n", - CType(field), layout.GetFieldOffset(field), - layout.GetOneofCaseOffset(field->real_containing_oneof()), - field->number()); + output(R"cc( + UPB_INLINE void $0_set_$1($0 *msg, $2 value) { + _upb_msg_map_set_value(msg, &value, $3); + })cc", + msg_name, field_name, CType(field), + field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING + ? "0" + : "sizeof(" + CType(field) + ")"); } else { - if (layout.HasHasbit(field)) { - output(" _upb_sethas(msg, $0);\n", layout.GetHasbitIndex(field)); - } - output( - " *UPB_PTR_AT(msg, $1, $0) = value;\n" - "}\n", - CType(field), layout.GetFieldOffset(field)); + output(R"cc( + UPB_INLINE void $0_set_$1($0 *msg, $2 value) { + const upb_MiniTableField field = $3; + _upb_MiniTable_SetNonExtensionField(msg, &field, &value); + })cc", + msg_name, field_name, CType(field), FieldInitializer(layout, field)); } // Message fields also have a Msg_mutable_foo() accessor that will create @@ -827,13 +761,12 @@ void GenerateNonRepeatedSetters(const protobuf::FieldDescriptor* field, struct $0* sub = (struct $0*)$1_$2(msg); if (sub == NULL) { sub = (struct $0*)_upb_Message_New(&$3, arena); - if (!sub) return NULL; - $1_set_$2(msg, sub); + if (sub) $1_set_$2(msg, sub); } return sub; } )cc", - MessageName(field->message_type()), msg_name, resolved_name, + MessageName(field->message_type()), msg_name, field_name, MessageInit(field->message_type())); } } @@ -992,7 +925,7 @@ void WriteHeader(const FileLayout& layout, Output& output) { } for (auto ext : this_file_exts) { - GenerateExtensionInHeader(ext, output); + GenerateExtensionInHeader(ext, layout, output); } output("extern const upb_MiniTableFile $0;\n\n", FileLayoutName(file)); @@ -1226,13 +1159,45 @@ std::vector FastDecodeTable(const protobuf::Descriptor* message, return table; } +std::string GetFieldRep(const upb_MiniTableField* field32, + const upb_MiniTableField* field64) { + switch (_upb_MiniTableField_GetRep(field32)) { + case kUpb_FieldRep_1Byte: + return "kUpb_FieldRep_1Byte"; + break; + case kUpb_FieldRep_4Byte: { + if (_upb_MiniTableField_GetRep(field64) == kUpb_FieldRep_4Byte) { + return "kUpb_FieldRep_4Byte"; + } else { + assert(_upb_MiniTableField_GetRep(field64) == kUpb_FieldRep_8Byte); + return "UPB_SIZE(kUpb_FieldRep_4Byte, kUpb_FieldRep_8Byte)"; + } + break; + } + case kUpb_FieldRep_StringView: + return "kUpb_FieldRep_StringView"; + break; + case kUpb_FieldRep_8Byte: + return "kUpb_FieldRep_8Byte"; + break; + } + UPB_UNREACHABLE(); +} + +std::string GetFieldRep(const FileLayout& layout, + const protobuf::FieldDescriptor* field) { + return GetFieldRep(layout.GetField32(field), layout.GetField64(field)); +} + // Returns the field mode as a string initializer. // // We could just emit this as a number (and we may yet go in that direction) but // for now emitting symbolic constants gives this better readability and // debuggability. -std::string GetModeInit(uint8_t mode32, uint8_t mode64) { +std::string GetModeInit(const upb_MiniTableField* field32, + const upb_MiniTableField* field64) { std::string ret; + uint8_t mode32 = field32->mode; switch (mode32 & kUpb_FieldMode_Mask) { case kUpb_FieldMode_Map: ret = "kUpb_FieldMode_Map"; @@ -1259,48 +1224,32 @@ std::string GetModeInit(uint8_t mode32, uint8_t mode64) { absl::StrAppend(&ret, " | kUpb_LabelFlags_IsAlternate"); } - std::string rep; - switch (mode32 >> kUpb_FieldRep_Shift) { - case kUpb_FieldRep_1Byte: - rep = "kUpb_FieldRep_1Byte"; - break; - case kUpb_FieldRep_4Byte: - if (mode64 >> kUpb_FieldRep_Shift == kUpb_FieldRep_4Byte) { - rep = "kUpb_FieldRep_4Byte"; - } else { - assert(mode64 >> kUpb_FieldRep_Shift == kUpb_FieldRep_8Byte); - rep = "UPB_SIZE(kUpb_FieldRep_4Byte, kUpb_FieldRep_8Byte)"; - } - break; - case kUpb_FieldRep_StringView: - rep = "kUpb_FieldRep_StringView"; - break; - case kUpb_FieldRep_8Byte: - rep = "kUpb_FieldRep_8Byte"; - break; - } - - absl::StrAppend(&ret, " | (", rep, " << kUpb_FieldRep_Shift)"); + absl::StrAppend(&ret, " | (", GetFieldRep(field32, field64), + " << kUpb_FieldRep_Shift)"); return ret; } -void WriteField(const upb_MiniTableField* field64, - const upb_MiniTableField* field32, Output& output) { - output("{$0, $1, $2, $3, $4, $5}", field64->number, - FileLayout::UpbSize(field32->offset, field64->offset), - FileLayout::UpbSize(field32->presence, field64->presence), - field64->submsg_index == kUpb_NoSub - ? "kUpb_NoSub" - : absl::StrCat(field64->submsg_index).c_str(), - field64->descriptortype, GetModeInit(field32->mode, field64->mode)); +std::string FieldInitializer(const upb_MiniTableField* field64, + const upb_MiniTableField* field32) { + return absl::Substitute( + "{$0, $1, $2, $3, $4, $5}", field64->number, + FileLayout::UpbSize(field32->offset, field64->offset), + FileLayout::UpbSize(field32->presence, field64->presence), + field64->submsg_index == kUpb_NoSub + ? "kUpb_NoSub" + : absl::StrCat(field64->submsg_index).c_str(), + field64->descriptortype, GetModeInit(field32, field64)); +} + +std::string FieldInitializer(const FileLayout& layout, + const protobuf::FieldDescriptor* field) { + return FieldInitializer(layout.GetField64(field), layout.GetField32(field)); } // Writes a single field into a .upb.c source file. void WriteMessageField(const upb_MiniTableField* field64, const upb_MiniTableField* field32, Output& output) { - output(" "); - WriteField(field64, field32, output); - output(",\n"); + output(" $0,\n", FieldInitializer(field64, field32)); } // Writes a single message into a .upb.c source file. @@ -1454,11 +1403,13 @@ int WriteMessages(const FileLayout& layout, Output& output, return file_messages.size(); } -void WriteExtension(const upb_MiniTableExtension* ext, Output& output) { - WriteField(&ext->field, &ext->field, output); - output(",\n"); - output(" &$0,\n", reinterpret_cast(ext->extendee)); - output(" $0,\n", FilePlatformLayout::GetSub(ext->sub)); +void WriteExtension(const protobuf::FieldDescriptor* ext, + const FileLayout& layout, Output& output) { + output("$0,\n", FieldInitializer(layout, ext)); + const upb_MiniTableExtension* mt_ext = + reinterpret_cast(layout.GetField32(ext)); + output(" &$0,\n", reinterpret_cast(mt_ext->extendee)); + output(" $0,\n", FilePlatformLayout::GetSub(mt_ext->sub)); } int WriteExtensions(const FileLayout& layout, Output& output) { @@ -1484,7 +1435,7 @@ int WriteExtensions(const FileLayout& layout, Output& output) { for (auto ext : exts) { output("const upb_MiniTableExtension $0 = {\n ", ExtensionLayout(ext)); - WriteExtension(layout.GetExtension(ext), output); + WriteExtension(ext, layout, output); output("\n};\n"); }