From 45dfd77d8799000a55840b1af150db6629a3e972 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Thu, 22 Dec 2022 14:16:23 -0800 Subject: [PATCH 01/21] Unified accessor API for map getters and setters. This is part of the ongoing effort to remove any hard-coding of layout offsets into the generated code (except via `upb_MiniTableField` values). PiperOrigin-RevId: 497238313 --- protos_generator/tests/test_generated.cc | 2 + upb/collections/map.c | 31 ++++--- upb/collections/map_gencode_util.h | 44 ---------- upb/collections/map_internal.h | 7 ++ upb/message/accessors.h | 53 ++++++++---- upb/mini_table/common.c | 35 -------- upb/mini_table/common.h | 35 +++++++- upbc/protoc-gen-upb.cc | 103 +++++++++++++++-------- 8 files changed, 159 insertions(+), 151 deletions(-) diff --git a/protos_generator/tests/test_generated.cc b/protos_generator/tests/test_generated.cc index 558476c60a..416d9f466c 100644 --- a/protos_generator/tests/test_generated.cc +++ b/protos_generator/tests/test_generated.cc @@ -425,7 +425,9 @@ TEST(CppGeneratedCode, MessageMapStringKeyAndInt32Value) { test_model.clear_str_to_int_map(); EXPECT_EQ(0, test_model.str_to_int_map_size()); test_model.set_str_to_int_map("first", 10); + EXPECT_EQ(1, test_model.str_to_int_map_size()); test_model.set_str_to_int_map("second", 20); + EXPECT_EQ(2, test_model.str_to_int_map_size()); auto result = test_model.get_str_to_int_map("second"); EXPECT_EQ(true, result.ok()); EXPECT_EQ(20, result.value()); diff --git a/upb/collections/map.c b/upb/collections/map.c index e27e8b8980..23bbc8cec1 100644 --- a/upb/collections/map.c +++ b/upb/collections/map.c @@ -35,25 +35,24 @@ // Must be last. #include "upb/port/def.inc" -/* Strings/bytes are special-cased in maps. */ -static char _upb_CTypeo_mapsize[12] = { - 0, - 1, /* kUpb_CType_Bool */ - 4, /* kUpb_CType_Float */ - 4, /* kUpb_CType_Int32 */ - 4, /* kUpb_CType_UInt32 */ - 4, /* kUpb_CType_Enum */ - sizeof(void*), /* kUpb_CType_Message */ - 8, /* kUpb_CType_Double */ - 8, /* kUpb_CType_Int64 */ - 8, /* kUpb_CType_UInt64 */ - 0, /* kUpb_CType_String */ - 0, /* kUpb_CType_Bytes */ +// Strings/bytes are special-cased in maps. +char _upb_Map_CTypeSizeTable[12] = { + [kUpb_CType_Bool] = 1, + [kUpb_CType_Float] = 4, + [kUpb_CType_Int32] = 4, + [kUpb_CType_UInt32] = 4, + [kUpb_CType_Enum] = 4, + [kUpb_CType_Message] = sizeof(void*), + [kUpb_CType_Double] = 8, + [kUpb_CType_Int64] = 8, + [kUpb_CType_UInt64] = 8, + [kUpb_CType_String] = UPB_MAPTYPE_STRING, + [kUpb_CType_Bytes] = UPB_MAPTYPE_STRING, }; upb_Map* upb_Map_New(upb_Arena* a, upb_CType key_type, upb_CType value_type) { - return _upb_Map_New(a, _upb_CTypeo_mapsize[key_type], - _upb_CTypeo_mapsize[value_type]); + return _upb_Map_New(a, _upb_Map_CTypeSize(key_type), + _upb_Map_CTypeSize(value_type)); } size_t upb_Map_Size(const upb_Map* map) { return _upb_Map_Size(map); } diff --git a/upb/collections/map_gencode_util.h b/upb/collections/map_gencode_util.h index 59764c96b2..794d76b9f4 100644 --- a/upb/collections/map_gencode_util.h +++ b/upb/collections/map_gencode_util.h @@ -41,50 +41,6 @@ extern "C" { // Message map operations, these get the map from the message first. -UPB_INLINE size_t _upb_msg_map_size(const upb_Message* msg, size_t ofs) { - upb_Map* map = *UPB_PTR_AT(msg, ofs, upb_Map*); - return map ? _upb_Map_Size(map) : 0; -} - -UPB_INLINE bool _upb_msg_map_get(const upb_Message* msg, size_t ofs, - const void* key, size_t key_size, void* val, - size_t val_size) { - upb_Map* map = *UPB_PTR_AT(msg, ofs, upb_Map*); - if (!map) return false; - return _upb_Map_Get(map, key, key_size, val, val_size); -} - -UPB_INLINE void* _upb_msg_map_next(const upb_Message* msg, size_t ofs, - size_t* iter) { - upb_Map* map = *UPB_PTR_AT(msg, ofs, upb_Map*); - if (!map) return NULL; - return _upb_map_next(map, iter); -} - -UPB_INLINE bool _upb_msg_map_set(upb_Message* msg, size_t ofs, const void* key, - size_t key_size, void* val, size_t val_size, - upb_Arena* arena) { - upb_Map** map = UPB_PTR_AT(msg, ofs, upb_Map*); - if (!*map) { - *map = _upb_Map_New(arena, key_size, val_size); - } - return _upb_Map_Insert(*map, key, key_size, val, val_size, arena) != - kUpb_MapInsertStatus_OutOfMemory; -} - -UPB_INLINE bool _upb_msg_map_delete(upb_Message* msg, size_t ofs, - const void* key, size_t key_size) { - upb_Map* map = *UPB_PTR_AT(msg, ofs, upb_Map*); - if (!map) return false; - return _upb_Map_Delete(map, key, key_size); -} - -UPB_INLINE void _upb_msg_map_clear(upb_Message* msg, size_t ofs) { - upb_Map* map = *UPB_PTR_AT(msg, ofs, upb_Map*); - if (!map) return; - _upb_Map_Clear(map); -} - UPB_INLINE void _upb_msg_map_key(const void* msg, void* key, size_t size) { const upb_tabent* ent = (const upb_tabent*)msg; uint32_t u32len; diff --git a/upb/collections/map_internal.h b/upb/collections/map_internal.h index f235cb3ecd..51a169eeba 100644 --- a/upb/collections/map_internal.h +++ b/upb/collections/map_internal.h @@ -151,6 +151,13 @@ UPB_INLINE size_t _upb_Map_Size(const upb_Map* map) { return map->table.t.count; } +// Strings/bytes are special-cased in maps. +extern char _upb_Map_CTypeSizeTable[12]; + +UPB_INLINE size_t _upb_Map_CTypeSize(upb_CType ctype) { + return _upb_Map_CTypeSizeTable[ctype]; +} + // Creates a new map on the given arena with this key/value type. upb_Map* _upb_Map_New(upb_Arena* a, size_t key_size, size_t value_size); diff --git a/upb/message/accessors.h b/upb/message/accessors.h index 697b5ed777..cafd0f8c18 100644 --- a/upb/message/accessors.h +++ b/upb/message/accessors.h @@ -251,6 +251,20 @@ UPB_INLINE void _upb_Message_ClearNonExtensionField( field); } +UPB_INLINE upb_Map* _upb_MiniTable_GetOrCreateMutableMap( + upb_Message* msg, const upb_MiniTableField* field, size_t key_size, + size_t val_size, upb_Arena* arena) { + UPB_ASSUME(upb_IsRepeatedOrMap(field)); + upb_Map* map = NULL; + upb_Map* default_map_value = NULL; + _upb_Message_GetNonExtensionField(msg, field, &default_map_value, &map); + if (!map) { + map = _upb_Map_New(arena, key_size, val_size); + _upb_Message_SetNonExtensionField(msg, field, &map); + } + return map; +} + // EVERYTHING ABOVE THIS LINE IS INTERNAL - DO NOT USE ///////////////////////// UPB_API_INLINE void upb_Message_ClearField(upb_Message* msg, @@ -540,27 +554,30 @@ UPB_API_INLINE bool upb_MiniTableField_IsClosedEnum( return field->descriptortype == kUpb_FieldType_Enum; } +UPB_API_INLINE const upb_Map* upb_Message_GetMap( + const upb_Message* msg, const upb_MiniTableField* field) { + UPB_ASSUME(upb_FieldMode_Get(field) == kUpb_FieldMode_Map); + const upb_Map* ret; + const upb_Map* default_val = NULL; + _upb_Message_GetNonExtensionField(msg, field, &default_val, &ret); + return ret; +} + +// TODO: rename to GetOrCreateMutableMap UPB_API_INLINE upb_Map* upb_MiniTable_GetMutableMap( upb_Message* msg, const upb_MiniTable* map_entry_mini_table, const upb_MiniTableField* field, upb_Arena* arena) { - UPB_ASSERT(map_entry_mini_table != NULL); - UPB_ASSUME(upb_IsRepeatedOrMap(field)); - upb_Map* map = NULL; - upb_Map* default_map_value = NULL; - _upb_Message_GetNonExtensionField(msg, field, &default_map_value, &map); - if (!map) { - // Allocate map. - UPB_ASSERT(field->descriptortype == kUpb_FieldType_Message || - field->descriptortype == kUpb_FieldType_Group); - const upb_MiniTableField* map_entry_key_field = - &map_entry_mini_table->fields[0]; - const upb_MiniTableField* map_entry_value_field = - &map_entry_mini_table->fields[1]; - map = upb_Map_New(arena, upb_MiniTableField_CType(map_entry_key_field), - upb_MiniTableField_CType(map_entry_value_field)); - _upb_Message_SetNonExtensionField(msg, field, &map); - } - return map; + UPB_ASSERT(field->descriptortype == kUpb_FieldType_Message || + field->descriptortype == kUpb_FieldType_Group); + const upb_MiniTableField* map_entry_key_field = + &map_entry_mini_table->fields[0]; + const upb_MiniTableField* map_entry_value_field = + &map_entry_mini_table->fields[1]; + return _upb_MiniTable_GetOrCreateMutableMap( + msg, field, + _upb_Map_CTypeSize(upb_MiniTableField_CType(map_entry_key_field)), + _upb_Map_CTypeSize(upb_MiniTableField_CType(map_entry_value_field)), + arena); } // Updates a map entry given an entry message. diff --git a/upb/mini_table/common.c b/upb/mini_table/common.c index 2b84af1a7f..818bfa2a7c 100644 --- a/upb/mini_table/common.c +++ b/upb/mini_table/common.c @@ -76,38 +76,3 @@ upb_FieldType upb_MiniTableField_Type(const upb_MiniTableField* field) { } return field->descriptortype; } - -upb_CType upb_MiniTableField_CType(const upb_MiniTableField* f) { - switch (f->descriptortype) { - case kUpb_FieldType_Double: - return kUpb_CType_Double; - case kUpb_FieldType_Float: - return kUpb_CType_Float; - case kUpb_FieldType_Int64: - case kUpb_FieldType_SInt64: - case kUpb_FieldType_SFixed64: - return kUpb_CType_Int64; - case kUpb_FieldType_Int32: - case kUpb_FieldType_SFixed32: - case kUpb_FieldType_SInt32: - return kUpb_CType_Int32; - case kUpb_FieldType_UInt64: - case kUpb_FieldType_Fixed64: - return kUpb_CType_UInt64; - case kUpb_FieldType_UInt32: - case kUpb_FieldType_Fixed32: - return kUpb_CType_UInt32; - case kUpb_FieldType_Enum: - return kUpb_CType_Enum; - case kUpb_FieldType_Bool: - return kUpb_CType_Bool; - case kUpb_FieldType_String: - return kUpb_CType_String; - case kUpb_FieldType_Bytes: - return kUpb_CType_Bytes; - case kUpb_FieldType_Group: - case kUpb_FieldType_Message: - return kUpb_CType_Message; - } - UPB_UNREACHABLE(); -} diff --git a/upb/mini_table/common.h b/upb/mini_table/common.h index 0f94cbc2fd..bb632e3bed 100644 --- a/upb/mini_table/common.h +++ b/upb/mini_table/common.h @@ -58,7 +58,40 @@ UPB_API const upb_MiniTableField* upb_MiniTable_FindFieldByNumber( UPB_API upb_FieldType upb_MiniTableField_Type(const upb_MiniTableField* field); -UPB_API upb_CType upb_MiniTableField_CType(const upb_MiniTableField* field); +UPB_API_INLINE upb_CType upb_MiniTableField_CType(const upb_MiniTableField* f) { + switch (f->descriptortype) { + case kUpb_FieldType_Double: + return kUpb_CType_Double; + case kUpb_FieldType_Float: + return kUpb_CType_Float; + case kUpb_FieldType_Int64: + case kUpb_FieldType_SInt64: + case kUpb_FieldType_SFixed64: + return kUpb_CType_Int64; + case kUpb_FieldType_Int32: + case kUpb_FieldType_SFixed32: + case kUpb_FieldType_SInt32: + return kUpb_CType_Int32; + case kUpb_FieldType_UInt64: + case kUpb_FieldType_Fixed64: + return kUpb_CType_UInt64; + case kUpb_FieldType_UInt32: + case kUpb_FieldType_Fixed32: + return kUpb_CType_UInt32; + case kUpb_FieldType_Enum: + return kUpb_CType_Enum; + case kUpb_FieldType_Bool: + return kUpb_CType_Bool; + case kUpb_FieldType_String: + return kUpb_CType_String; + case kUpb_FieldType_Bytes: + return kUpb_CType_Bytes; + case kUpb_FieldType_Group: + case kUpb_FieldType_Message: + return kUpb_CType_Message; + } + UPB_UNREACHABLE(); +} UPB_API_INLINE bool upb_MiniTableField_IsExtension( const upb_MiniTableField* field) { diff --git a/upbc/protoc-gen-upb.cc b/upbc/protoc-gen-upb.cc index 6ce24beb8f..c0d9aa7ff7 100644 --- a/upbc/protoc-gen-upb.cc +++ b/upbc/protoc-gen-upb.cc @@ -227,6 +227,30 @@ std::string CTypeConst(const protobuf::FieldDescriptor* field) { return CTypeInternal(field, true); } +std::string MapKeyCType(const protobuf::FieldDescriptor* map_field) { + return CType(map_field->message_type()->map_key()); +} + +std::string MapValueCType(const protobuf::FieldDescriptor* map_field) { + return CType(map_field->message_type()->map_value()); +} + +std::string MapKeySize(const protobuf::FieldDescriptor* map_field, + absl::string_view expr) { + return map_field->message_type()->map_key()->cpp_type() == + protobuf::FieldDescriptor::CPPTYPE_STRING + ? "0" + : absl::StrCat("sizeof(", expr, ")"); +} + +std::string MapValueSize(const protobuf::FieldDescriptor* map_field, + absl::string_view expr) { + return map_field->message_type()->map_value()->cpp_type() == + protobuf::FieldDescriptor::CPPTYPE_STRING + ? "0" + : absl::StrCat("sizeof(", expr, ")"); +} + std::string FieldInitializer(const FileLayout& layout, const protobuf::FieldDescriptor* field); @@ -436,38 +460,39 @@ void GenerateMapGetters(const protobuf::FieldDescriptor* field, const FileLayout& layout, absl::string_view msg_name, const NameToFieldDescriptorMap& field_names, Output& output) { - const protobuf::Descriptor* entry = field->message_type(); - const protobuf::FieldDescriptor* key = entry->FindFieldByNumber(1); - const protobuf::FieldDescriptor* val = entry->FindFieldByNumber(2); std::string resolved_name = ResolveFieldName(field, field_names); output( R"cc( UPB_INLINE size_t $0_$1_size(const $0* msg) { - return _upb_msg_map_size(msg, $2); + const upb_MiniTableField field = $2; + const upb_Map* map = upb_Message_GetMap(msg, &field); + return map ? _upb_Map_Size(map) : 0; } )cc", - msg_name, resolved_name, layout.GetFieldOffset(field)); + msg_name, resolved_name, FieldInitializer(layout, field)); output( R"cc( UPB_INLINE bool $0_$1_get(const $0* msg, $2 key, $3* val) { - return _upb_msg_map_get(msg, $4, &key, $5, val, $6); + const upb_MiniTableField field = $4; + const upb_Map* map = upb_Message_GetMap(msg, &field); + if (!map) return false; + return _upb_Map_Get(map, &key, $5, val, $6); } )cc", - msg_name, resolved_name, CType(key), CType(val), - layout.GetFieldOffset(field), - key->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING - ? "0" - : "sizeof(key)", - val->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING - ? "0" - : "sizeof(*val)"); + msg_name, resolved_name, MapKeyCType(field), MapValueCType(field), + FieldInitializer(layout, field), MapKeySize(field, "key"), + MapValueSize(field, "*val")); output( R"cc( UPB_INLINE $0 $1_$2_next(const $1* msg, size_t* iter) { - return ($0)_upb_msg_map_next(msg, $3, iter); + const upb_MiniTableField field = $3; + const upb_Map* map = upb_Message_GetMap(msg, &field); + if (!map) return NULL; + return ($0)_upb_map_next(map, iter); } )cc", - CTypeConst(field), msg_name, resolved_name, layout.GetFieldOffset(field)); + CTypeConst(field), msg_name, resolved_name, + FieldInitializer(layout, field)); } void GenerateMapEntryGetters(const protobuf::FieldDescriptor* field, @@ -539,46 +564,50 @@ void GenerateMapSetters(const protobuf::FieldDescriptor* field, const FileLayout& layout, absl::string_view msg_name, const NameToFieldDescriptorMap& field_names, Output& output) { - const protobuf::Descriptor* entry = field->message_type(); - const protobuf::FieldDescriptor* key = entry->FindFieldByNumber(1); - const protobuf::FieldDescriptor* val = entry->FindFieldByNumber(2); std::string resolved_name = ResolveFieldName(field, field_names); output( R"cc( - UPB_INLINE void $0_$1_clear($0* msg) { _upb_msg_map_clear(msg, $2); } + UPB_INLINE void $0_$1_clear($0* msg) { + const upb_MiniTableField field = $2; + upb_Map* map = (upb_Map*)upb_Message_GetMap(msg, &field); + if (!map) return; + _upb_Map_Clear(map); + } )cc", - msg_name, resolved_name, layout.GetFieldOffset(field)); + msg_name, resolved_name, FieldInitializer(layout, field)); output( R"cc( UPB_INLINE bool $0_$1_set($0* msg, $2 key, $3 val, upb_Arena* a) { - return _upb_msg_map_set(msg, $4, &key, $5, &val, $6, a); + const upb_MiniTableField field = $4; + upb_Map* map = _upb_MiniTable_GetOrCreateMutableMap(msg, &field, $5, $6, a); + return _upb_Map_Insert(map, &key, $5, &val, $6, a) != + kUpb_MapInsertStatus_OutOfMemory; } )cc", - msg_name, resolved_name, CType(key), CType(val), - layout.GetFieldOffset(field), - key->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING - ? "0" - : "sizeof(key)", - val->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING - ? "0" - : "sizeof(val)"); + msg_name, resolved_name, MapKeyCType(field), MapValueCType(field), + FieldInitializer(layout, field), MapKeySize(field, "key"), + MapValueSize(field, "val")); output( R"cc( UPB_INLINE bool $0_$1_delete($0* msg, $2 key) { - return _upb_msg_map_delete(msg, $3, &key, $4); + const upb_MiniTableField field = $3; + upb_Map* map = (upb_Map*)upb_Message_GetMap(msg, &field); + if (!map) return false; + return _upb_Map_Delete(map, &key, $4); } )cc", - msg_name, resolved_name, CType(key), layout.GetFieldOffset(field), - key->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING - ? "0" - : "sizeof(key)"); + msg_name, resolved_name, MapKeyCType(field), + FieldInitializer(layout, field), MapKeySize(field, "key")); output( R"cc( UPB_INLINE $0 $1_$2_nextmutable($1* msg, size_t* iter) { - return ($0)_upb_msg_map_next(msg, $3, iter); + const upb_MiniTableField field = $3; + upb_Map* map = (upb_Map*)upb_Message_GetMap(msg, &field); + if (!map) return NULL; + return ($0)_upb_map_next(map, iter); } )cc", - CType(field), msg_name, resolved_name, layout.GetFieldOffset(field)); + CType(field), msg_name, resolved_name, FieldInitializer(layout, field)); } void GenerateRepeatedSetters(const protobuf::FieldDescriptor* field, From e34e3bd32855d0132815e9110bfee71721090dc6 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Thu, 22 Dec 2022 17:08:29 -0800 Subject: [PATCH 02/21] Unified accessor API for map getters and setters. This is part of the ongoing effort to remove any hard-coding of layout offsets into the generated code (except via `upb_MiniTableField` values). PiperOrigin-RevId: 497266785 --- protos_generator/tests/test_generated.cc | 2 - upb/collections/map.c | 31 +++---- upb/collections/map_gencode_util.h | 44 ++++++++++ upb/collections/map_internal.h | 7 -- upb/message/accessors.h | 53 ++++-------- upb/mini_table/common.c | 35 ++++++++ upb/mini_table/common.h | 35 +------- upbc/protoc-gen-upb.cc | 103 ++++++++--------------- 8 files changed, 151 insertions(+), 159 deletions(-) diff --git a/protos_generator/tests/test_generated.cc b/protos_generator/tests/test_generated.cc index 416d9f466c..558476c60a 100644 --- a/protos_generator/tests/test_generated.cc +++ b/protos_generator/tests/test_generated.cc @@ -425,9 +425,7 @@ TEST(CppGeneratedCode, MessageMapStringKeyAndInt32Value) { test_model.clear_str_to_int_map(); EXPECT_EQ(0, test_model.str_to_int_map_size()); test_model.set_str_to_int_map("first", 10); - EXPECT_EQ(1, test_model.str_to_int_map_size()); test_model.set_str_to_int_map("second", 20); - EXPECT_EQ(2, test_model.str_to_int_map_size()); auto result = test_model.get_str_to_int_map("second"); EXPECT_EQ(true, result.ok()); EXPECT_EQ(20, result.value()); diff --git a/upb/collections/map.c b/upb/collections/map.c index 23bbc8cec1..e27e8b8980 100644 --- a/upb/collections/map.c +++ b/upb/collections/map.c @@ -35,24 +35,25 @@ // Must be last. #include "upb/port/def.inc" -// Strings/bytes are special-cased in maps. -char _upb_Map_CTypeSizeTable[12] = { - [kUpb_CType_Bool] = 1, - [kUpb_CType_Float] = 4, - [kUpb_CType_Int32] = 4, - [kUpb_CType_UInt32] = 4, - [kUpb_CType_Enum] = 4, - [kUpb_CType_Message] = sizeof(void*), - [kUpb_CType_Double] = 8, - [kUpb_CType_Int64] = 8, - [kUpb_CType_UInt64] = 8, - [kUpb_CType_String] = UPB_MAPTYPE_STRING, - [kUpb_CType_Bytes] = UPB_MAPTYPE_STRING, +/* Strings/bytes are special-cased in maps. */ +static char _upb_CTypeo_mapsize[12] = { + 0, + 1, /* kUpb_CType_Bool */ + 4, /* kUpb_CType_Float */ + 4, /* kUpb_CType_Int32 */ + 4, /* kUpb_CType_UInt32 */ + 4, /* kUpb_CType_Enum */ + sizeof(void*), /* kUpb_CType_Message */ + 8, /* kUpb_CType_Double */ + 8, /* kUpb_CType_Int64 */ + 8, /* kUpb_CType_UInt64 */ + 0, /* kUpb_CType_String */ + 0, /* kUpb_CType_Bytes */ }; upb_Map* upb_Map_New(upb_Arena* a, upb_CType key_type, upb_CType value_type) { - return _upb_Map_New(a, _upb_Map_CTypeSize(key_type), - _upb_Map_CTypeSize(value_type)); + return _upb_Map_New(a, _upb_CTypeo_mapsize[key_type], + _upb_CTypeo_mapsize[value_type]); } size_t upb_Map_Size(const upb_Map* map) { return _upb_Map_Size(map); } diff --git a/upb/collections/map_gencode_util.h b/upb/collections/map_gencode_util.h index 794d76b9f4..59764c96b2 100644 --- a/upb/collections/map_gencode_util.h +++ b/upb/collections/map_gencode_util.h @@ -41,6 +41,50 @@ extern "C" { // Message map operations, these get the map from the message first. +UPB_INLINE size_t _upb_msg_map_size(const upb_Message* msg, size_t ofs) { + upb_Map* map = *UPB_PTR_AT(msg, ofs, upb_Map*); + return map ? _upb_Map_Size(map) : 0; +} + +UPB_INLINE bool _upb_msg_map_get(const upb_Message* msg, size_t ofs, + const void* key, size_t key_size, void* val, + size_t val_size) { + upb_Map* map = *UPB_PTR_AT(msg, ofs, upb_Map*); + if (!map) return false; + return _upb_Map_Get(map, key, key_size, val, val_size); +} + +UPB_INLINE void* _upb_msg_map_next(const upb_Message* msg, size_t ofs, + size_t* iter) { + upb_Map* map = *UPB_PTR_AT(msg, ofs, upb_Map*); + if (!map) return NULL; + return _upb_map_next(map, iter); +} + +UPB_INLINE bool _upb_msg_map_set(upb_Message* msg, size_t ofs, const void* key, + size_t key_size, void* val, size_t val_size, + upb_Arena* arena) { + upb_Map** map = UPB_PTR_AT(msg, ofs, upb_Map*); + if (!*map) { + *map = _upb_Map_New(arena, key_size, val_size); + } + return _upb_Map_Insert(*map, key, key_size, val, val_size, arena) != + kUpb_MapInsertStatus_OutOfMemory; +} + +UPB_INLINE bool _upb_msg_map_delete(upb_Message* msg, size_t ofs, + const void* key, size_t key_size) { + upb_Map* map = *UPB_PTR_AT(msg, ofs, upb_Map*); + if (!map) return false; + return _upb_Map_Delete(map, key, key_size); +} + +UPB_INLINE void _upb_msg_map_clear(upb_Message* msg, size_t ofs) { + upb_Map* map = *UPB_PTR_AT(msg, ofs, upb_Map*); + if (!map) return; + _upb_Map_Clear(map); +} + UPB_INLINE void _upb_msg_map_key(const void* msg, void* key, size_t size) { const upb_tabent* ent = (const upb_tabent*)msg; uint32_t u32len; diff --git a/upb/collections/map_internal.h b/upb/collections/map_internal.h index 51a169eeba..f235cb3ecd 100644 --- a/upb/collections/map_internal.h +++ b/upb/collections/map_internal.h @@ -151,13 +151,6 @@ UPB_INLINE size_t _upb_Map_Size(const upb_Map* map) { return map->table.t.count; } -// Strings/bytes are special-cased in maps. -extern char _upb_Map_CTypeSizeTable[12]; - -UPB_INLINE size_t _upb_Map_CTypeSize(upb_CType ctype) { - return _upb_Map_CTypeSizeTable[ctype]; -} - // Creates a new map on the given arena with this key/value type. upb_Map* _upb_Map_New(upb_Arena* a, size_t key_size, size_t value_size); diff --git a/upb/message/accessors.h b/upb/message/accessors.h index cafd0f8c18..697b5ed777 100644 --- a/upb/message/accessors.h +++ b/upb/message/accessors.h @@ -251,20 +251,6 @@ UPB_INLINE void _upb_Message_ClearNonExtensionField( field); } -UPB_INLINE upb_Map* _upb_MiniTable_GetOrCreateMutableMap( - upb_Message* msg, const upb_MiniTableField* field, size_t key_size, - size_t val_size, upb_Arena* arena) { - UPB_ASSUME(upb_IsRepeatedOrMap(field)); - upb_Map* map = NULL; - upb_Map* default_map_value = NULL; - _upb_Message_GetNonExtensionField(msg, field, &default_map_value, &map); - if (!map) { - map = _upb_Map_New(arena, key_size, val_size); - _upb_Message_SetNonExtensionField(msg, field, &map); - } - return map; -} - // EVERYTHING ABOVE THIS LINE IS INTERNAL - DO NOT USE ///////////////////////// UPB_API_INLINE void upb_Message_ClearField(upb_Message* msg, @@ -554,30 +540,27 @@ UPB_API_INLINE bool upb_MiniTableField_IsClosedEnum( return field->descriptortype == kUpb_FieldType_Enum; } -UPB_API_INLINE const upb_Map* upb_Message_GetMap( - const upb_Message* msg, const upb_MiniTableField* field) { - UPB_ASSUME(upb_FieldMode_Get(field) == kUpb_FieldMode_Map); - const upb_Map* ret; - const upb_Map* default_val = NULL; - _upb_Message_GetNonExtensionField(msg, field, &default_val, &ret); - return ret; -} - -// TODO: rename to GetOrCreateMutableMap UPB_API_INLINE upb_Map* upb_MiniTable_GetMutableMap( upb_Message* msg, const upb_MiniTable* map_entry_mini_table, const upb_MiniTableField* field, upb_Arena* arena) { - UPB_ASSERT(field->descriptortype == kUpb_FieldType_Message || - field->descriptortype == kUpb_FieldType_Group); - const upb_MiniTableField* map_entry_key_field = - &map_entry_mini_table->fields[0]; - const upb_MiniTableField* map_entry_value_field = - &map_entry_mini_table->fields[1]; - return _upb_MiniTable_GetOrCreateMutableMap( - msg, field, - _upb_Map_CTypeSize(upb_MiniTableField_CType(map_entry_key_field)), - _upb_Map_CTypeSize(upb_MiniTableField_CType(map_entry_value_field)), - arena); + UPB_ASSERT(map_entry_mini_table != NULL); + UPB_ASSUME(upb_IsRepeatedOrMap(field)); + upb_Map* map = NULL; + upb_Map* default_map_value = NULL; + _upb_Message_GetNonExtensionField(msg, field, &default_map_value, &map); + if (!map) { + // Allocate map. + UPB_ASSERT(field->descriptortype == kUpb_FieldType_Message || + field->descriptortype == kUpb_FieldType_Group); + const upb_MiniTableField* map_entry_key_field = + &map_entry_mini_table->fields[0]; + const upb_MiniTableField* map_entry_value_field = + &map_entry_mini_table->fields[1]; + map = upb_Map_New(arena, upb_MiniTableField_CType(map_entry_key_field), + upb_MiniTableField_CType(map_entry_value_field)); + _upb_Message_SetNonExtensionField(msg, field, &map); + } + return map; } // Updates a map entry given an entry message. diff --git a/upb/mini_table/common.c b/upb/mini_table/common.c index 818bfa2a7c..2b84af1a7f 100644 --- a/upb/mini_table/common.c +++ b/upb/mini_table/common.c @@ -76,3 +76,38 @@ upb_FieldType upb_MiniTableField_Type(const upb_MiniTableField* field) { } return field->descriptortype; } + +upb_CType upb_MiniTableField_CType(const upb_MiniTableField* f) { + switch (f->descriptortype) { + case kUpb_FieldType_Double: + return kUpb_CType_Double; + case kUpb_FieldType_Float: + return kUpb_CType_Float; + case kUpb_FieldType_Int64: + case kUpb_FieldType_SInt64: + case kUpb_FieldType_SFixed64: + return kUpb_CType_Int64; + case kUpb_FieldType_Int32: + case kUpb_FieldType_SFixed32: + case kUpb_FieldType_SInt32: + return kUpb_CType_Int32; + case kUpb_FieldType_UInt64: + case kUpb_FieldType_Fixed64: + return kUpb_CType_UInt64; + case kUpb_FieldType_UInt32: + case kUpb_FieldType_Fixed32: + return kUpb_CType_UInt32; + case kUpb_FieldType_Enum: + return kUpb_CType_Enum; + case kUpb_FieldType_Bool: + return kUpb_CType_Bool; + case kUpb_FieldType_String: + return kUpb_CType_String; + case kUpb_FieldType_Bytes: + return kUpb_CType_Bytes; + case kUpb_FieldType_Group: + case kUpb_FieldType_Message: + return kUpb_CType_Message; + } + UPB_UNREACHABLE(); +} diff --git a/upb/mini_table/common.h b/upb/mini_table/common.h index bb632e3bed..0f94cbc2fd 100644 --- a/upb/mini_table/common.h +++ b/upb/mini_table/common.h @@ -58,40 +58,7 @@ UPB_API const upb_MiniTableField* upb_MiniTable_FindFieldByNumber( UPB_API upb_FieldType upb_MiniTableField_Type(const upb_MiniTableField* field); -UPB_API_INLINE upb_CType upb_MiniTableField_CType(const upb_MiniTableField* f) { - switch (f->descriptortype) { - case kUpb_FieldType_Double: - return kUpb_CType_Double; - case kUpb_FieldType_Float: - return kUpb_CType_Float; - case kUpb_FieldType_Int64: - case kUpb_FieldType_SInt64: - case kUpb_FieldType_SFixed64: - return kUpb_CType_Int64; - case kUpb_FieldType_Int32: - case kUpb_FieldType_SFixed32: - case kUpb_FieldType_SInt32: - return kUpb_CType_Int32; - case kUpb_FieldType_UInt64: - case kUpb_FieldType_Fixed64: - return kUpb_CType_UInt64; - case kUpb_FieldType_UInt32: - case kUpb_FieldType_Fixed32: - return kUpb_CType_UInt32; - case kUpb_FieldType_Enum: - return kUpb_CType_Enum; - case kUpb_FieldType_Bool: - return kUpb_CType_Bool; - case kUpb_FieldType_String: - return kUpb_CType_String; - case kUpb_FieldType_Bytes: - return kUpb_CType_Bytes; - case kUpb_FieldType_Group: - case kUpb_FieldType_Message: - return kUpb_CType_Message; - } - UPB_UNREACHABLE(); -} +UPB_API upb_CType upb_MiniTableField_CType(const upb_MiniTableField* field); UPB_API_INLINE bool upb_MiniTableField_IsExtension( const upb_MiniTableField* field) { diff --git a/upbc/protoc-gen-upb.cc b/upbc/protoc-gen-upb.cc index c0d9aa7ff7..6ce24beb8f 100644 --- a/upbc/protoc-gen-upb.cc +++ b/upbc/protoc-gen-upb.cc @@ -227,30 +227,6 @@ std::string CTypeConst(const protobuf::FieldDescriptor* field) { return CTypeInternal(field, true); } -std::string MapKeyCType(const protobuf::FieldDescriptor* map_field) { - return CType(map_field->message_type()->map_key()); -} - -std::string MapValueCType(const protobuf::FieldDescriptor* map_field) { - return CType(map_field->message_type()->map_value()); -} - -std::string MapKeySize(const protobuf::FieldDescriptor* map_field, - absl::string_view expr) { - return map_field->message_type()->map_key()->cpp_type() == - protobuf::FieldDescriptor::CPPTYPE_STRING - ? "0" - : absl::StrCat("sizeof(", expr, ")"); -} - -std::string MapValueSize(const protobuf::FieldDescriptor* map_field, - absl::string_view expr) { - return map_field->message_type()->map_value()->cpp_type() == - protobuf::FieldDescriptor::CPPTYPE_STRING - ? "0" - : absl::StrCat("sizeof(", expr, ")"); -} - std::string FieldInitializer(const FileLayout& layout, const protobuf::FieldDescriptor* field); @@ -460,39 +436,38 @@ void GenerateMapGetters(const protobuf::FieldDescriptor* field, const FileLayout& layout, absl::string_view msg_name, const NameToFieldDescriptorMap& field_names, Output& output) { + const protobuf::Descriptor* entry = field->message_type(); + const protobuf::FieldDescriptor* key = entry->FindFieldByNumber(1); + const protobuf::FieldDescriptor* val = entry->FindFieldByNumber(2); std::string resolved_name = ResolveFieldName(field, field_names); output( R"cc( UPB_INLINE size_t $0_$1_size(const $0* msg) { - const upb_MiniTableField field = $2; - const upb_Map* map = upb_Message_GetMap(msg, &field); - return map ? _upb_Map_Size(map) : 0; + return _upb_msg_map_size(msg, $2); } )cc", - msg_name, resolved_name, FieldInitializer(layout, field)); + msg_name, resolved_name, layout.GetFieldOffset(field)); output( R"cc( UPB_INLINE bool $0_$1_get(const $0* msg, $2 key, $3* val) { - const upb_MiniTableField field = $4; - const upb_Map* map = upb_Message_GetMap(msg, &field); - if (!map) return false; - return _upb_Map_Get(map, &key, $5, val, $6); + return _upb_msg_map_get(msg, $4, &key, $5, val, $6); } )cc", - msg_name, resolved_name, MapKeyCType(field), MapValueCType(field), - FieldInitializer(layout, field), MapKeySize(field, "key"), - MapValueSize(field, "*val")); + msg_name, resolved_name, CType(key), CType(val), + layout.GetFieldOffset(field), + key->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING + ? "0" + : "sizeof(key)", + val->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING + ? "0" + : "sizeof(*val)"); output( R"cc( UPB_INLINE $0 $1_$2_next(const $1* msg, size_t* iter) { - const upb_MiniTableField field = $3; - const upb_Map* map = upb_Message_GetMap(msg, &field); - if (!map) return NULL; - return ($0)_upb_map_next(map, iter); + return ($0)_upb_msg_map_next(msg, $3, iter); } )cc", - CTypeConst(field), msg_name, resolved_name, - FieldInitializer(layout, field)); + CTypeConst(field), msg_name, resolved_name, layout.GetFieldOffset(field)); } void GenerateMapEntryGetters(const protobuf::FieldDescriptor* field, @@ -564,50 +539,46 @@ void GenerateMapSetters(const protobuf::FieldDescriptor* field, const FileLayout& layout, absl::string_view msg_name, const NameToFieldDescriptorMap& field_names, Output& output) { + const protobuf::Descriptor* entry = field->message_type(); + const protobuf::FieldDescriptor* key = entry->FindFieldByNumber(1); + const protobuf::FieldDescriptor* val = entry->FindFieldByNumber(2); std::string resolved_name = ResolveFieldName(field, field_names); output( R"cc( - UPB_INLINE void $0_$1_clear($0* msg) { - const upb_MiniTableField field = $2; - upb_Map* map = (upb_Map*)upb_Message_GetMap(msg, &field); - if (!map) return; - _upb_Map_Clear(map); - } + UPB_INLINE void $0_$1_clear($0* msg) { _upb_msg_map_clear(msg, $2); } )cc", - msg_name, resolved_name, FieldInitializer(layout, field)); + msg_name, resolved_name, layout.GetFieldOffset(field)); output( R"cc( UPB_INLINE bool $0_$1_set($0* msg, $2 key, $3 val, upb_Arena* a) { - const upb_MiniTableField field = $4; - upb_Map* map = _upb_MiniTable_GetOrCreateMutableMap(msg, &field, $5, $6, a); - return _upb_Map_Insert(map, &key, $5, &val, $6, a) != - kUpb_MapInsertStatus_OutOfMemory; + return _upb_msg_map_set(msg, $4, &key, $5, &val, $6, a); } )cc", - msg_name, resolved_name, MapKeyCType(field), MapValueCType(field), - FieldInitializer(layout, field), MapKeySize(field, "key"), - MapValueSize(field, "val")); + msg_name, resolved_name, CType(key), CType(val), + layout.GetFieldOffset(field), + key->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING + ? "0" + : "sizeof(key)", + val->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING + ? "0" + : "sizeof(val)"); output( R"cc( UPB_INLINE bool $0_$1_delete($0* msg, $2 key) { - const upb_MiniTableField field = $3; - upb_Map* map = (upb_Map*)upb_Message_GetMap(msg, &field); - if (!map) return false; - return _upb_Map_Delete(map, &key, $4); + return _upb_msg_map_delete(msg, $3, &key, $4); } )cc", - msg_name, resolved_name, MapKeyCType(field), - FieldInitializer(layout, field), MapKeySize(field, "key")); + msg_name, resolved_name, CType(key), layout.GetFieldOffset(field), + key->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING + ? "0" + : "sizeof(key)"); output( R"cc( UPB_INLINE $0 $1_$2_nextmutable($1* msg, size_t* iter) { - const upb_MiniTableField field = $3; - upb_Map* map = (upb_Map*)upb_Message_GetMap(msg, &field); - if (!map) return NULL; - return ($0)_upb_map_next(map, iter); + return ($0)_upb_msg_map_next(msg, $3, iter); } )cc", - CType(field), msg_name, resolved_name, FieldInitializer(layout, field)); + CType(field), msg_name, resolved_name, layout.GetFieldOffset(field)); } void GenerateRepeatedSetters(const protobuf::FieldDescriptor* field, From 0623093498b2d0abff4fce7df845d7e61e3f68e6 Mon Sep 17 00:00:00 2001 From: Eric Salo Date: Thu, 22 Dec 2022 17:58:26 -0800 Subject: [PATCH 03/21] speed up upb_MiniTable_FindFieldByNumber() PiperOrigin-RevId: 497272353 --- upb/mini_table/common.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/upb/mini_table/common.c b/upb/mini_table/common.c index 2b84af1a7f..258d7aa040 100644 --- a/upb/mini_table/common.c +++ b/upb/mini_table/common.c @@ -54,12 +54,30 @@ const int8_t _kUpb_FromBase92[] = { }; const upb_MiniTableField* upb_MiniTable_FindFieldByNumber( - const upb_MiniTable* table, uint32_t number) { - int n = table->field_count; - for (int i = 0; i < n; i++) { - if (table->fields[i].number == number) { - return &table->fields[i]; + const upb_MiniTable* t, uint32_t number) { + const size_t i = ((size_t)number) - 1; // 0 wraps to SIZE_MAX + + // Ideal case: index into dense fields + if (i < t->dense_below) { + UPB_ASSERT(t->fields[i].number == number); + return &t->fields[i]; + } + + // Slow case: binary search + int lo = t->dense_below; + int hi = t->field_count - 1; + while (lo <= hi) { + int mid = (lo + hi) / 2; + int num = t->fields[mid].number; + if (num < number) { + lo = mid + 1; + continue; + } + if (num > number) { + hi = mid - 1; + continue; } + return &t->fields[mid]; } return NULL; } From 3f173c4b810dd36c1ed24f4ffc82c1b2fb3eb60c Mon Sep 17 00:00:00 2001 From: Eric Salo Date: Thu, 22 Dec 2022 18:26:48 -0800 Subject: [PATCH 04/21] add upb_Map_Delete2() We would like for upb_Map_Delete() to optionally return the deleted value. Unfortunately this will require several steps since we are crossing repos. Step #1: Add a new version of the function and point all local uses at it. PiperOrigin-RevId: 497275930 --- lua/msg.c | 2 +- python/map.c | 2 +- upb/collections/map.c | 8 ++++++-- upb/collections/map.h | 9 ++++++++- upb/collections/map_gencode_util.h | 2 +- upb/collections/map_internal.h | 6 +++--- 6 files changed, 20 insertions(+), 9 deletions(-) diff --git a/lua/msg.c b/lua/msg.c index 010671b597..8a6f5cc04c 100644 --- a/lua/msg.c +++ b/lua/msg.c @@ -579,7 +579,7 @@ static int lupb_Map_Newindex(lua_State* L) { upb_MessageValue key = lupb_tomsgval(L, lmap->key_type, 2, 1, LUPB_REF); if (lua_isnil(L, 3)) { - upb_Map_Delete(map, key); + upb_Map_Delete2(map, key, NULL); } else { upb_MessageValue val = lupb_tomsgval(L, lmap->value_type, 3, 1, LUPB_COPY); upb_Map_Set(map, key, val, lupb_Arenaget(L, 1)); diff --git a/python/map.c b/python/map.c index b3a8fff11f..a058856016 100644 --- a/python/map.c +++ b/python/map.c @@ -182,7 +182,7 @@ int PyUpb_MapContainer_AssignSubscript(PyObject* _self, PyObject* key, if (!PyUpb_PyToUpb(val, val_f, &u_val, arena)) return -1; if (!PyUpb_MapContainer_Set(self, map, u_key, u_val, arena)) return -1; } else { - if (!upb_Map_Delete(map, u_key)) { + if (!upb_Map_Delete2(map, u_key, NULL)) { PyErr_Format(PyExc_KeyError, "Key not present in map"); return -1; } diff --git a/upb/collections/map.c b/upb/collections/map.c index e27e8b8980..3f5eb3b709 100644 --- a/upb/collections/map.c +++ b/upb/collections/map.c @@ -71,8 +71,12 @@ upb_MapInsertStatus upb_Map_Insert(upb_Map* map, upb_MessageValue key, map->val_size, arena); } -bool upb_Map_Delete(upb_Map* map, upb_MessageValue key) { - return _upb_Map_Delete(map, &key, map->key_size); +bool upb_Map_Delete2(upb_Map* map, upb_MessageValue key, + upb_MessageValue* val) { + upb_value v; + const bool ok = _upb_Map_Delete(map, &key, map->key_size, &v); + if (val) val->uint64_val = v.val; + return ok; } bool upb_Map_Next(const upb_Map* map, upb_MessageValue* key, diff --git a/upb/collections/map.h b/upb/collections/map.h index ae2cf05d23..4e398dc554 100644 --- a/upb/collections/map.h +++ b/upb/collections/map.h @@ -76,7 +76,14 @@ UPB_INLINE bool upb_Map_Set(upb_Map* map, upb_MessageValue key, } // Deletes this key from the table. Returns true if the key was present. -bool upb_Map_Delete(upb_Map* map, upb_MessageValue key); +// If present and |val| is non-NULL, stores the deleted value. +bool upb_Map_Delete2(upb_Map* map, upb_MessageValue key, upb_MessageValue* val); + +// Deletes this key from the table. Returns true if the key was present. +// (DEPRECATED and going away soon. Do not use.) +UPB_INLINE bool upb_Map_Delete(upb_Map* map, upb_MessageValue key) { + return upb_Map_Delete2(map, key, NULL); +} // Map iteration: // diff --git a/upb/collections/map_gencode_util.h b/upb/collections/map_gencode_util.h index 59764c96b2..ca764c7557 100644 --- a/upb/collections/map_gencode_util.h +++ b/upb/collections/map_gencode_util.h @@ -76,7 +76,7 @@ UPB_INLINE bool _upb_msg_map_delete(upb_Message* msg, size_t ofs, const void* key, size_t key_size) { upb_Map* map = *UPB_PTR_AT(msg, ofs, upb_Map*); if (!map) return false; - return _upb_Map_Delete(map, key, key_size); + return _upb_Map_Delete(map, key, key_size, NULL); } UPB_INLINE void _upb_msg_map_clear(upb_Message* msg, size_t ofs) { diff --git a/upb/collections/map_internal.h b/upb/collections/map_internal.h index f235cb3ecd..d7ca1fc48f 100644 --- a/upb/collections/map_internal.h +++ b/upb/collections/map_internal.h @@ -111,10 +111,10 @@ UPB_INLINE void _upb_Map_Clear(upb_Map* map) { upb_strtable_clear(&map->table); } -UPB_INLINE bool _upb_Map_Delete(upb_Map* map, const void* key, - size_t key_size) { +UPB_INLINE bool _upb_Map_Delete(upb_Map* map, const void* key, size_t key_size, + upb_value* val) { upb_StringView k = _upb_map_tokey(key, key_size); - return upb_strtable_remove2(&map->table, k.data, k.size, NULL); + return upb_strtable_remove2(&map->table, k.data, k.size, val); } UPB_INLINE bool _upb_Map_Get(const upb_Map* map, const void* key, From 0f938ec4df5290ad8bd3d42a4b22a7fef9788396 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Thu, 22 Dec 2022 19:11:07 -0800 Subject: [PATCH 05/21] Unified accessor API for map getters and setters. This is part of the ongoing effort to remove any hard-coding of layout offsets into the generated code (except via `upb_MiniTableField` values). PiperOrigin-RevId: 497281306 --- protos_generator/tests/test_generated.cc | 2 + upb/collections/map.c | 31 ++++--- upb/collections/map_gencode_util.h | 44 ---------- upb/collections/map_internal.h | 7 ++ upb/message/accessors.h | 60 +++++++++---- upb/mini_table/common.c | 35 -------- upb/mini_table/common.h | 35 +++++++- upbc/protoc-gen-upb.cc | 103 +++++++++++++++-------- 8 files changed, 166 insertions(+), 151 deletions(-) diff --git a/protos_generator/tests/test_generated.cc b/protos_generator/tests/test_generated.cc index 558476c60a..416d9f466c 100644 --- a/protos_generator/tests/test_generated.cc +++ b/protos_generator/tests/test_generated.cc @@ -425,7 +425,9 @@ TEST(CppGeneratedCode, MessageMapStringKeyAndInt32Value) { test_model.clear_str_to_int_map(); EXPECT_EQ(0, test_model.str_to_int_map_size()); test_model.set_str_to_int_map("first", 10); + EXPECT_EQ(1, test_model.str_to_int_map_size()); test_model.set_str_to_int_map("second", 20); + EXPECT_EQ(2, test_model.str_to_int_map_size()); auto result = test_model.get_str_to_int_map("second"); EXPECT_EQ(true, result.ok()); EXPECT_EQ(20, result.value()); diff --git a/upb/collections/map.c b/upb/collections/map.c index 3f5eb3b709..5724351c86 100644 --- a/upb/collections/map.c +++ b/upb/collections/map.c @@ -35,25 +35,24 @@ // Must be last. #include "upb/port/def.inc" -/* Strings/bytes are special-cased in maps. */ -static char _upb_CTypeo_mapsize[12] = { - 0, - 1, /* kUpb_CType_Bool */ - 4, /* kUpb_CType_Float */ - 4, /* kUpb_CType_Int32 */ - 4, /* kUpb_CType_UInt32 */ - 4, /* kUpb_CType_Enum */ - sizeof(void*), /* kUpb_CType_Message */ - 8, /* kUpb_CType_Double */ - 8, /* kUpb_CType_Int64 */ - 8, /* kUpb_CType_UInt64 */ - 0, /* kUpb_CType_String */ - 0, /* kUpb_CType_Bytes */ +// Strings/bytes are special-cased in maps. +char _upb_Map_CTypeSizeTable[12] = { + [kUpb_CType_Bool] = 1, + [kUpb_CType_Float] = 4, + [kUpb_CType_Int32] = 4, + [kUpb_CType_UInt32] = 4, + [kUpb_CType_Enum] = 4, + [kUpb_CType_Message] = sizeof(void*), + [kUpb_CType_Double] = 8, + [kUpb_CType_Int64] = 8, + [kUpb_CType_UInt64] = 8, + [kUpb_CType_String] = UPB_MAPTYPE_STRING, + [kUpb_CType_Bytes] = UPB_MAPTYPE_STRING, }; upb_Map* upb_Map_New(upb_Arena* a, upb_CType key_type, upb_CType value_type) { - return _upb_Map_New(a, _upb_CTypeo_mapsize[key_type], - _upb_CTypeo_mapsize[value_type]); + return _upb_Map_New(a, _upb_Map_CTypeSize(key_type), + _upb_Map_CTypeSize(value_type)); } size_t upb_Map_Size(const upb_Map* map) { return _upb_Map_Size(map); } diff --git a/upb/collections/map_gencode_util.h b/upb/collections/map_gencode_util.h index ca764c7557..794d76b9f4 100644 --- a/upb/collections/map_gencode_util.h +++ b/upb/collections/map_gencode_util.h @@ -41,50 +41,6 @@ extern "C" { // Message map operations, these get the map from the message first. -UPB_INLINE size_t _upb_msg_map_size(const upb_Message* msg, size_t ofs) { - upb_Map* map = *UPB_PTR_AT(msg, ofs, upb_Map*); - return map ? _upb_Map_Size(map) : 0; -} - -UPB_INLINE bool _upb_msg_map_get(const upb_Message* msg, size_t ofs, - const void* key, size_t key_size, void* val, - size_t val_size) { - upb_Map* map = *UPB_PTR_AT(msg, ofs, upb_Map*); - if (!map) return false; - return _upb_Map_Get(map, key, key_size, val, val_size); -} - -UPB_INLINE void* _upb_msg_map_next(const upb_Message* msg, size_t ofs, - size_t* iter) { - upb_Map* map = *UPB_PTR_AT(msg, ofs, upb_Map*); - if (!map) return NULL; - return _upb_map_next(map, iter); -} - -UPB_INLINE bool _upb_msg_map_set(upb_Message* msg, size_t ofs, const void* key, - size_t key_size, void* val, size_t val_size, - upb_Arena* arena) { - upb_Map** map = UPB_PTR_AT(msg, ofs, upb_Map*); - if (!*map) { - *map = _upb_Map_New(arena, key_size, val_size); - } - return _upb_Map_Insert(*map, key, key_size, val, val_size, arena) != - kUpb_MapInsertStatus_OutOfMemory; -} - -UPB_INLINE bool _upb_msg_map_delete(upb_Message* msg, size_t ofs, - const void* key, size_t key_size) { - upb_Map* map = *UPB_PTR_AT(msg, ofs, upb_Map*); - if (!map) return false; - return _upb_Map_Delete(map, key, key_size, NULL); -} - -UPB_INLINE void _upb_msg_map_clear(upb_Message* msg, size_t ofs) { - upb_Map* map = *UPB_PTR_AT(msg, ofs, upb_Map*); - if (!map) return; - _upb_Map_Clear(map); -} - UPB_INLINE void _upb_msg_map_key(const void* msg, void* key, size_t size) { const upb_tabent* ent = (const upb_tabent*)msg; uint32_t u32len; diff --git a/upb/collections/map_internal.h b/upb/collections/map_internal.h index d7ca1fc48f..ceadf83359 100644 --- a/upb/collections/map_internal.h +++ b/upb/collections/map_internal.h @@ -151,6 +151,13 @@ UPB_INLINE size_t _upb_Map_Size(const upb_Map* map) { return map->table.t.count; } +// Strings/bytes are special-cased in maps. +extern char _upb_Map_CTypeSizeTable[12]; + +UPB_INLINE size_t _upb_Map_CTypeSize(upb_CType ctype) { + return _upb_Map_CTypeSizeTable[ctype]; +} + // Creates a new map on the given arena with this key/value type. upb_Map* _upb_Map_New(upb_Arena* a, size_t key_size, size_t value_size); diff --git a/upb/message/accessors.h b/upb/message/accessors.h index 697b5ed777..c40b9ddca8 100644 --- a/upb/message/accessors.h +++ b/upb/message/accessors.h @@ -71,6 +71,9 @@ UPB_INLINE void _upb_Message_SetPresence(upb_Message* msg, UPB_INLINE bool _upb_MiniTable_ValueIsNonZero(const void* default_val, const upb_MiniTableField* field) { char zero[16] = {0}; + if (upb_IsRepeatedOrMap(field)) { + return memcmp(&zero, default_val, sizeof(void*)); + } switch (_upb_MiniTableField_GetRep(field)) { case kUpb_FieldRep_1Byte: return memcmp(&zero, default_val, 1) != 0; @@ -88,6 +91,10 @@ UPB_INLINE bool _upb_MiniTable_ValueIsNonZero(const void* default_val, UPB_INLINE void _upb_MiniTable_CopyFieldData(void* to, const void* from, const upb_MiniTableField* field) { + if (upb_IsRepeatedOrMap(field)) { + memcpy(to, from, sizeof(void*)); + return; + } switch (_upb_MiniTableField_GetRep(field)) { case kUpb_FieldRep_1Byte: memcpy(to, from, 1); @@ -251,6 +258,20 @@ UPB_INLINE void _upb_Message_ClearNonExtensionField( field); } +UPB_INLINE upb_Map* _upb_MiniTable_GetOrCreateMutableMap( + upb_Message* msg, const upb_MiniTableField* field, size_t key_size, + size_t val_size, upb_Arena* arena) { + UPB_ASSUME(upb_IsRepeatedOrMap(field)); + upb_Map* map = NULL; + upb_Map* default_map_value = NULL; + _upb_Message_GetNonExtensionField(msg, field, &default_map_value, &map); + if (!map) { + map = _upb_Map_New(arena, key_size, val_size); + _upb_Message_SetNonExtensionField(msg, field, &map); + } + return map; +} + // EVERYTHING ABOVE THIS LINE IS INTERNAL - DO NOT USE ///////////////////////// UPB_API_INLINE void upb_Message_ClearField(upb_Message* msg, @@ -540,27 +561,30 @@ UPB_API_INLINE bool upb_MiniTableField_IsClosedEnum( return field->descriptortype == kUpb_FieldType_Enum; } +UPB_API_INLINE const upb_Map* upb_Message_GetMap( + const upb_Message* msg, const upb_MiniTableField* field) { + UPB_ASSUME(upb_FieldMode_Get(field) == kUpb_FieldMode_Map); + const upb_Map* ret; + const upb_Map* default_val = NULL; + _upb_Message_GetNonExtensionField(msg, field, &default_val, &ret); + return ret; +} + +// TODO: rename to GetOrCreateMutableMap UPB_API_INLINE upb_Map* upb_MiniTable_GetMutableMap( upb_Message* msg, const upb_MiniTable* map_entry_mini_table, const upb_MiniTableField* field, upb_Arena* arena) { - UPB_ASSERT(map_entry_mini_table != NULL); - UPB_ASSUME(upb_IsRepeatedOrMap(field)); - upb_Map* map = NULL; - upb_Map* default_map_value = NULL; - _upb_Message_GetNonExtensionField(msg, field, &default_map_value, &map); - if (!map) { - // Allocate map. - UPB_ASSERT(field->descriptortype == kUpb_FieldType_Message || - field->descriptortype == kUpb_FieldType_Group); - const upb_MiniTableField* map_entry_key_field = - &map_entry_mini_table->fields[0]; - const upb_MiniTableField* map_entry_value_field = - &map_entry_mini_table->fields[1]; - map = upb_Map_New(arena, upb_MiniTableField_CType(map_entry_key_field), - upb_MiniTableField_CType(map_entry_value_field)); - _upb_Message_SetNonExtensionField(msg, field, &map); - } - return map; + UPB_ASSERT(field->descriptortype == kUpb_FieldType_Message || + field->descriptortype == kUpb_FieldType_Group); + const upb_MiniTableField* map_entry_key_field = + &map_entry_mini_table->fields[0]; + const upb_MiniTableField* map_entry_value_field = + &map_entry_mini_table->fields[1]; + return _upb_MiniTable_GetOrCreateMutableMap( + msg, field, + _upb_Map_CTypeSize(upb_MiniTableField_CType(map_entry_key_field)), + _upb_Map_CTypeSize(upb_MiniTableField_CType(map_entry_value_field)), + arena); } // Updates a map entry given an entry message. diff --git a/upb/mini_table/common.c b/upb/mini_table/common.c index 258d7aa040..19e9006b01 100644 --- a/upb/mini_table/common.c +++ b/upb/mini_table/common.c @@ -94,38 +94,3 @@ upb_FieldType upb_MiniTableField_Type(const upb_MiniTableField* field) { } return field->descriptortype; } - -upb_CType upb_MiniTableField_CType(const upb_MiniTableField* f) { - switch (f->descriptortype) { - case kUpb_FieldType_Double: - return kUpb_CType_Double; - case kUpb_FieldType_Float: - return kUpb_CType_Float; - case kUpb_FieldType_Int64: - case kUpb_FieldType_SInt64: - case kUpb_FieldType_SFixed64: - return kUpb_CType_Int64; - case kUpb_FieldType_Int32: - case kUpb_FieldType_SFixed32: - case kUpb_FieldType_SInt32: - return kUpb_CType_Int32; - case kUpb_FieldType_UInt64: - case kUpb_FieldType_Fixed64: - return kUpb_CType_UInt64; - case kUpb_FieldType_UInt32: - case kUpb_FieldType_Fixed32: - return kUpb_CType_UInt32; - case kUpb_FieldType_Enum: - return kUpb_CType_Enum; - case kUpb_FieldType_Bool: - return kUpb_CType_Bool; - case kUpb_FieldType_String: - return kUpb_CType_String; - case kUpb_FieldType_Bytes: - return kUpb_CType_Bytes; - case kUpb_FieldType_Group: - case kUpb_FieldType_Message: - return kUpb_CType_Message; - } - UPB_UNREACHABLE(); -} diff --git a/upb/mini_table/common.h b/upb/mini_table/common.h index 0f94cbc2fd..bb632e3bed 100644 --- a/upb/mini_table/common.h +++ b/upb/mini_table/common.h @@ -58,7 +58,40 @@ UPB_API const upb_MiniTableField* upb_MiniTable_FindFieldByNumber( UPB_API upb_FieldType upb_MiniTableField_Type(const upb_MiniTableField* field); -UPB_API upb_CType upb_MiniTableField_CType(const upb_MiniTableField* field); +UPB_API_INLINE upb_CType upb_MiniTableField_CType(const upb_MiniTableField* f) { + switch (f->descriptortype) { + case kUpb_FieldType_Double: + return kUpb_CType_Double; + case kUpb_FieldType_Float: + return kUpb_CType_Float; + case kUpb_FieldType_Int64: + case kUpb_FieldType_SInt64: + case kUpb_FieldType_SFixed64: + return kUpb_CType_Int64; + case kUpb_FieldType_Int32: + case kUpb_FieldType_SFixed32: + case kUpb_FieldType_SInt32: + return kUpb_CType_Int32; + case kUpb_FieldType_UInt64: + case kUpb_FieldType_Fixed64: + return kUpb_CType_UInt64; + case kUpb_FieldType_UInt32: + case kUpb_FieldType_Fixed32: + return kUpb_CType_UInt32; + case kUpb_FieldType_Enum: + return kUpb_CType_Enum; + case kUpb_FieldType_Bool: + return kUpb_CType_Bool; + case kUpb_FieldType_String: + return kUpb_CType_String; + case kUpb_FieldType_Bytes: + return kUpb_CType_Bytes; + case kUpb_FieldType_Group: + case kUpb_FieldType_Message: + return kUpb_CType_Message; + } + UPB_UNREACHABLE(); +} UPB_API_INLINE bool upb_MiniTableField_IsExtension( const upb_MiniTableField* field) { diff --git a/upbc/protoc-gen-upb.cc b/upbc/protoc-gen-upb.cc index 6ce24beb8f..71d34963e0 100644 --- a/upbc/protoc-gen-upb.cc +++ b/upbc/protoc-gen-upb.cc @@ -227,6 +227,30 @@ std::string CTypeConst(const protobuf::FieldDescriptor* field) { return CTypeInternal(field, true); } +std::string MapKeyCType(const protobuf::FieldDescriptor* map_field) { + return CType(map_field->message_type()->map_key()); +} + +std::string MapValueCType(const protobuf::FieldDescriptor* map_field) { + return CType(map_field->message_type()->map_value()); +} + +std::string MapKeySize(const protobuf::FieldDescriptor* map_field, + absl::string_view expr) { + return map_field->message_type()->map_key()->cpp_type() == + protobuf::FieldDescriptor::CPPTYPE_STRING + ? "0" + : absl::StrCat("sizeof(", expr, ")"); +} + +std::string MapValueSize(const protobuf::FieldDescriptor* map_field, + absl::string_view expr) { + return map_field->message_type()->map_value()->cpp_type() == + protobuf::FieldDescriptor::CPPTYPE_STRING + ? "0" + : absl::StrCat("sizeof(", expr, ")"); +} + std::string FieldInitializer(const FileLayout& layout, const protobuf::FieldDescriptor* field); @@ -436,38 +460,39 @@ void GenerateMapGetters(const protobuf::FieldDescriptor* field, const FileLayout& layout, absl::string_view msg_name, const NameToFieldDescriptorMap& field_names, Output& output) { - const protobuf::Descriptor* entry = field->message_type(); - const protobuf::FieldDescriptor* key = entry->FindFieldByNumber(1); - const protobuf::FieldDescriptor* val = entry->FindFieldByNumber(2); std::string resolved_name = ResolveFieldName(field, field_names); output( R"cc( UPB_INLINE size_t $0_$1_size(const $0* msg) { - return _upb_msg_map_size(msg, $2); + const upb_MiniTableField field = $2; + const upb_Map* map = upb_Message_GetMap(msg, &field); + return map ? _upb_Map_Size(map) : 0; } )cc", - msg_name, resolved_name, layout.GetFieldOffset(field)); + msg_name, resolved_name, FieldInitializer(layout, field)); output( R"cc( UPB_INLINE bool $0_$1_get(const $0* msg, $2 key, $3* val) { - return _upb_msg_map_get(msg, $4, &key, $5, val, $6); + const upb_MiniTableField field = $4; + const upb_Map* map = upb_Message_GetMap(msg, &field); + if (!map) return false; + return _upb_Map_Get(map, &key, $5, val, $6); } )cc", - msg_name, resolved_name, CType(key), CType(val), - layout.GetFieldOffset(field), - key->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING - ? "0" - : "sizeof(key)", - val->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING - ? "0" - : "sizeof(*val)"); + msg_name, resolved_name, MapKeyCType(field), MapValueCType(field), + FieldInitializer(layout, field), MapKeySize(field, "key"), + MapValueSize(field, "*val")); output( R"cc( UPB_INLINE $0 $1_$2_next(const $1* msg, size_t* iter) { - return ($0)_upb_msg_map_next(msg, $3, iter); + const upb_MiniTableField field = $3; + const upb_Map* map = upb_Message_GetMap(msg, &field); + if (!map) return NULL; + return ($0)_upb_map_next(map, iter); } )cc", - CTypeConst(field), msg_name, resolved_name, layout.GetFieldOffset(field)); + CTypeConst(field), msg_name, resolved_name, + FieldInitializer(layout, field)); } void GenerateMapEntryGetters(const protobuf::FieldDescriptor* field, @@ -539,46 +564,50 @@ void GenerateMapSetters(const protobuf::FieldDescriptor* field, const FileLayout& layout, absl::string_view msg_name, const NameToFieldDescriptorMap& field_names, Output& output) { - const protobuf::Descriptor* entry = field->message_type(); - const protobuf::FieldDescriptor* key = entry->FindFieldByNumber(1); - const protobuf::FieldDescriptor* val = entry->FindFieldByNumber(2); std::string resolved_name = ResolveFieldName(field, field_names); output( R"cc( - UPB_INLINE void $0_$1_clear($0* msg) { _upb_msg_map_clear(msg, $2); } + UPB_INLINE void $0_$1_clear($0* msg) { + const upb_MiniTableField field = $2; + upb_Map* map = (upb_Map*)upb_Message_GetMap(msg, &field); + if (!map) return; + _upb_Map_Clear(map); + } )cc", - msg_name, resolved_name, layout.GetFieldOffset(field)); + msg_name, resolved_name, FieldInitializer(layout, field)); output( R"cc( UPB_INLINE bool $0_$1_set($0* msg, $2 key, $3 val, upb_Arena* a) { - return _upb_msg_map_set(msg, $4, &key, $5, &val, $6, a); + const upb_MiniTableField field = $4; + upb_Map* map = _upb_MiniTable_GetOrCreateMutableMap(msg, &field, $5, $6, a); + return _upb_Map_Insert(map, &key, $5, &val, $6, a) != + kUpb_MapInsertStatus_OutOfMemory; } )cc", - msg_name, resolved_name, CType(key), CType(val), - layout.GetFieldOffset(field), - key->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING - ? "0" - : "sizeof(key)", - val->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING - ? "0" - : "sizeof(val)"); + msg_name, resolved_name, MapKeyCType(field), MapValueCType(field), + FieldInitializer(layout, field), MapKeySize(field, "key"), + MapValueSize(field, "val")); output( R"cc( UPB_INLINE bool $0_$1_delete($0* msg, $2 key) { - return _upb_msg_map_delete(msg, $3, &key, $4); + const upb_MiniTableField field = $3; + upb_Map* map = (upb_Map*)upb_Message_GetMap(msg, &field); + if (!map) return false; + return _upb_Map_Delete(map, &key, $4, NULL); } )cc", - msg_name, resolved_name, CType(key), layout.GetFieldOffset(field), - key->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING - ? "0" - : "sizeof(key)"); + msg_name, resolved_name, MapKeyCType(field), + FieldInitializer(layout, field), MapKeySize(field, "key")); output( R"cc( UPB_INLINE $0 $1_$2_nextmutable($1* msg, size_t* iter) { - return ($0)_upb_msg_map_next(msg, $3, iter); + const upb_MiniTableField field = $3; + upb_Map* map = (upb_Map*)upb_Message_GetMap(msg, &field); + if (!map) return NULL; + return ($0)_upb_map_next(map, iter); } )cc", - CType(field), msg_name, resolved_name, layout.GetFieldOffset(field)); + CType(field), msg_name, resolved_name, FieldInitializer(layout, field)); } void GenerateRepeatedSetters(const protobuf::FieldDescriptor* field, From 00623c3cde8e7d492b68ec9ea21282956c42428d Mon Sep 17 00:00:00 2001 From: Eric Salo Date: Sat, 24 Dec 2022 23:35:27 -0800 Subject: [PATCH 06/21] delete obsolete array wrapper funcs from upbc_so PiperOrigin-RevId: 497607419 --- upbc/upbc_so.c | 58 +------------------------------------------------- 1 file changed, 1 insertion(+), 57 deletions(-) diff --git a/upbc/upbc_so.c b/upbc/upbc_so.c index 16df62e7e1..6e498865be 100644 --- a/upbc/upbc_so.c +++ b/upbc/upbc_so.c @@ -32,6 +32,7 @@ #endif #include "upb/collections/array.h" +#include "upb/collections/map.h" #include "upb/message/accessors.h" #include "upb/message/message.h" #include "upb/mini_table/decode.h" @@ -39,60 +40,3 @@ // Must be last. #include "upb/port/def.inc" - -UPB_API bool upb_Array_AppendBool(upb_Array* array, bool val, - upb_Arena* arena) { - const upb_MessageValue mv = {.bool_val = val}; - return upb_Array_Append(array, mv, arena); -} - -UPB_API bool upb_Array_AppendDouble(upb_Array* array, double val, - upb_Arena* arena) { - const upb_MessageValue mv = {.double_val = val}; - return upb_Array_Append(array, mv, arena); -} - -UPB_API bool upb_Array_AppendFloat(upb_Array* array, float val, - upb_Arena* arena) { - const upb_MessageValue mv = {.float_val = val}; - return upb_Array_Append(array, mv, arena); -} - -UPB_API bool upb_Array_AppendInt32(upb_Array* array, int32_t val, - upb_Arena* arena) { - const upb_MessageValue mv = {.int32_val = val}; - return upb_Array_Append(array, mv, arena); -} - -UPB_API bool upb_Array_AppendUInt32(upb_Array* array, uint32_t val, - upb_Arena* arena) { - const upb_MessageValue mv = {.uint32_val = val}; - return upb_Array_Append(array, mv, arena); -} - -//////////////////////////////////////////////////////////////////////////////// - -UPB_API void upb_Array_SetBool(upb_Array* array, size_t i, bool val) { - const upb_MessageValue mv = {.bool_val = val}; - upb_Array_Set(array, i, mv); -} - -UPB_API void upb_Array_SetDouble(upb_Array* array, size_t i, double val) { - const upb_MessageValue mv = {.double_val = val}; - upb_Array_Set(array, i, mv); -} - -UPB_API void upb_Array_SetFloat(upb_Array* array, size_t i, float val) { - const upb_MessageValue mv = {.float_val = val}; - upb_Array_Set(array, i, mv); -} - -UPB_API void upb_Array_SetInt32(upb_Array* array, size_t i, int32_t val) { - const upb_MessageValue mv = {.int32_val = val}; - upb_Array_Set(array, i, mv); -} - -UPB_API void upb_Array_SetUInt32(upb_Array* array, size_t i, uint32_t val) { - const upb_MessageValue mv = {.uint32_val = val}; - upb_Array_Set(array, i, mv); -} From 48ad4bbd0f0a795723ff9713c1faea2460920c54 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 26 Dec 2022 17:32:44 -0800 Subject: [PATCH 07/21] Unified accessor API for repeated field getters and setters. This CL eliminates the last remaining callers of GetFieldOffset(), therefore opening the door to a more principled bootstrapping process. PiperOrigin-RevId: 497864910 --- .github/workflows/bazel_tests.yml | 6 +- protos/protos_internal_test.cc | 2 +- upb/collections/array.c | 59 ++++----------- upb/collections/array_internal.h | 122 ++++++------------------------ upb/message/accessors.c | 32 -------- upb/message/accessors.h | 94 +++++++++++++++++++---- upb/mini_table/field_internal.h | 16 ++++ upbc/file_layout.h | 17 ----- upbc/protoc-gen-upb.cc | 87 ++++++++++----------- 9 files changed, 181 insertions(+), 254 deletions(-) diff --git a/.github/workflows/bazel_tests.yml b/.github/workflows/bazel_tests.yml index efe6fe5e69..b0affa1859 100644 --- a/.github/workflows/bazel_tests.yml +++ b/.github/workflows/bazel_tests.yml @@ -24,7 +24,7 @@ jobs: include: - { NAME: "Fastbuild", BAZEL: bazel, CC: clang, os: ubuntu-20.04, flags: "" } - { NAME: "Optmized", BAZEL: bazel, CC: clang, os: ubuntu-20.04, flags: "-c opt" } # Some warnings only fire with -c opt - - { NAME: "GCC Optimized", BAZEL: bazel, CC: gcc, os: ubuntu-20.04, flags: "-c opt" } + - { NAME: "GCC Optimized", BAZEL: bazel, CC: gcc-12, os: ubuntu-22.04, flags: "-c opt" } - { NAME: "FastTable", BAZEL: bazel, CC: clang, os: ubuntu-20.04, flags: "--//:fasttable_enabled=true -- -cmake:test_generated_files" } - { NAME: "ASAN", BAZEL: bazel, CC: clang, os: ubuntu-20.04, flags: "--config=asan -c dbg -- -benchmarks:benchmark -python/..." } - { NAME: "UBSAN", BAZEL: bazel, CC: clang, os: ubuntu-20.04, flags: "--config=ubsan -c dbg -- -benchmarks:benchmark -python/... -lua/...", install: "libunwind-dev" } @@ -53,6 +53,10 @@ jobs: wget -O $FILENAME https://github.com/bazelbuild/bazel/releases/download/$VERSION/${{ matrix.BAZEL }} chmod a+x $FILENAME if: ${{ matrix.BAZEL != 'bazel' }} + - name: Check tool versions + run: | + ${{ matrix.CC }} --version + ${{ matrix.BAZEL }} --version - name: Set up Bazel read/write caching run: echo "BAZEL_CACHE_AUTH=--google_default_credentials" >> $GITHUB_ENV if: ${{ github.event.pull_request.head.repo.full_name == 'protocolbuffers/upb' }} diff --git a/protos/protos_internal_test.cc b/protos/protos_internal_test.cc index 835601c549..99bfaa2402 100644 --- a/protos/protos_internal_test.cc +++ b/protos/protos_internal_test.cc @@ -40,9 +40,9 @@ TEST(CppGeneratedCode, InternalMoveMessage) { upb_Arena* source_arena = upb_Arena_New(); protos_generator_test_TestModel* message = protos_generator_test_TestModel_new(source_arena); + ASSERT_NE(message, nullptr); protos_generator_test_TestModel_set_int_value_with_default(message, 123); - EXPECT_NE(message, nullptr); // Move ownership. TestModel model = protos::internal::MoveMessage(message, source_arena); diff --git a/upb/collections/array.c b/upb/collections/array.c index a957f26ab8..fdb1bd206c 100644 --- a/upb/collections/array.c +++ b/upb/collections/array.c @@ -32,23 +32,22 @@ // Must be last. #include "upb/port/def.inc" -static const char _upb_CTypeo_sizelg2[12] = { - 0, - 0, /* kUpb_CType_Bool */ - 2, /* kUpb_CType_Float */ - 2, /* kUpb_CType_Int32 */ - 2, /* kUpb_CType_UInt32 */ - 2, /* kUpb_CType_Enum */ - UPB_SIZE(2, 3), /* kUpb_CType_Message */ - 3, /* kUpb_CType_Double */ - 3, /* kUpb_CType_Int64 */ - 3, /* kUpb_CType_UInt64 */ - UPB_SIZE(3, 4), /* kUpb_CType_String */ - UPB_SIZE(3, 4), /* kUpb_CType_Bytes */ +const char _upb_Array_CTypeSizeLg2Table[] = { + [kUpb_CType_Bool] = 0, + [kUpb_CType_Float] = 2, + [kUpb_CType_Int32] = 2, + [kUpb_CType_UInt32] = 2, + [kUpb_CType_Enum] = 2, + [kUpb_CType_Message] = UPB_SIZE(2, 3), + [kUpb_CType_Double] = 3, + [kUpb_CType_Int64] = 3, + [kUpb_CType_UInt64] = 3, + [kUpb_CType_String] = UPB_SIZE(3, 4), + [kUpb_CType_Bytes] = UPB_SIZE(3, 4), }; upb_Array* upb_Array_New(upb_Arena* a, upb_CType type) { - return _upb_Array_New(a, 4, _upb_CTypeo_sizelg2[type]); + return _upb_Array_New(a, 4, _upb_Array_CTypeSizeLg2(type)); } size_t upb_Array_Size(const upb_Array* arr) { return arr->size; } @@ -142,35 +141,3 @@ bool _upb_array_realloc(upb_Array* arr, size_t min_capacity, upb_Arena* arena) { arr->capacity = new_capacity; return true; } - -static upb_Array* getorcreate_array(upb_Array** arr_ptr, int elem_size_lg2, - upb_Arena* arena) { - upb_Array* arr = *arr_ptr; - if (!arr) { - arr = _upb_Array_New(arena, 4, elem_size_lg2); - if (!arr) return NULL; - *arr_ptr = arr; - } - return arr; -} - -void* _upb_Array_Resize_fallback(upb_Array** arr_ptr, size_t size, - int elem_size_lg2, upb_Arena* arena) { - upb_Array* arr = getorcreate_array(arr_ptr, elem_size_lg2, arena); - return arr && _upb_Array_ResizeUninitialized(arr, size, arena) - ? _upb_array_ptr(arr) - : NULL; -} - -bool _upb_Array_Append_fallback(upb_Array** arr_ptr, const void* value, - int elem_size_lg2, upb_Arena* arena) { - upb_Array* arr = getorcreate_array(arr_ptr, elem_size_lg2, arena); - if (!arr) return false; - - size_t elems = arr->size; - if (!_upb_Array_ResizeUninitialized(arr, elems + 1, arena)) return false; - - char* data = _upb_array_ptr(arr); - memcpy(data + (elems << elem_size_lg2), value, 1 << elem_size_lg2); - return true; -} diff --git a/upb/collections/array_internal.h b/upb/collections/array_internal.h index e1a4cefde2..0c126eb642 100644 --- a/upb/collections/array_internal.h +++ b/upb/collections/array_internal.h @@ -48,8 +48,14 @@ struct upb_Array { }; // LINT.ThenChange(GoogleInternalName1) +UPB_INLINE size_t _upb_Array_ElementSizeLg2(const upb_Array* arr) { + size_t ret = arr->data & 7; + UPB_ASSERT(ret <= 4); + return ret; +} + UPB_INLINE const void* _upb_array_constptr(const upb_Array* arr) { - UPB_ASSERT((arr->data & 7) <= 4); + _upb_Array_ElementSizeLg2(arr); // Check assertion. return (void*)(arr->data & ~(uintptr_t)7); } @@ -68,8 +74,15 @@ UPB_INLINE uintptr_t _upb_tag_arrptr(void* ptr, int elem_size_lg2) { return (uintptr_t)ptr | (unsigned)elem_size_lg2; } +extern const char _upb_Array_CTypeSizeLg2Table[]; + +UPB_INLINE size_t _upb_Array_CTypeSizeLg2(upb_CType ctype) { + return _upb_Array_CTypeSizeLg2Table[ctype]; +} + UPB_INLINE upb_Array* _upb_Array_New(upb_Arena* a, size_t init_capacity, int elem_size_lg2) { + UPB_ASSERT(elem_size_lg2 <= 4); const size_t arr_size = UPB_ALIGN_UP(sizeof(upb_Array), UPB_MALLOC_ALIGN); const size_t bytes = arr_size + (init_capacity << elem_size_lg2); upb_Array* arr = (upb_Array*)upb_Arena_Malloc(a, bytes); @@ -83,12 +96,6 @@ UPB_INLINE upb_Array* _upb_Array_New(upb_Arena* a, size_t init_capacity, // Resizes the capacity of the array to be at least min_size. bool _upb_array_realloc(upb_Array* arr, size_t min_size, upb_Arena* arena); -// Fallback functions for when the accessors require a resize. -void* _upb_Array_Resize_fallback(upb_Array** arr_ptr, size_t size, - int elem_size_lg2, upb_Arena* arena); -bool _upb_Array_Append_fallback(upb_Array** arr_ptr, const void* value, - int elem_size_lg2, upb_Arena* arena); - UPB_INLINE bool _upb_array_reserve(upb_Array* arr, size_t size, upb_Arena* arena) { if (arr->capacity < size) return _upb_array_realloc(arr, size, arena); @@ -103,98 +110,19 @@ UPB_INLINE bool _upb_Array_ResizeUninitialized(upb_Array* arr, size_t size, return true; } -UPB_INLINE void _upb_array_detach(const void* msg, size_t ofs) { - *UPB_PTR_AT(msg, ofs, upb_Array*) = NULL; -} - -UPB_INLINE const void* _upb_array_accessor(const void* msg, size_t ofs, - size_t* size) { - const upb_Array* arr = *UPB_PTR_AT(msg, ofs, const upb_Array*); - if (arr) { - if (size) *size = arr->size; - return _upb_array_constptr(arr); - } else { - if (size) *size = 0; - return NULL; - } -} - -UPB_INLINE void* _upb_array_mutable_accessor(void* msg, size_t ofs, - size_t* size) { - upb_Array* arr = *UPB_PTR_AT(msg, ofs, upb_Array*); - if (arr) { - if (size) *size = arr->size; - return _upb_array_ptr(arr); - } else { - if (size) *size = 0; - return NULL; - } +// This function is intended for situations where elem_size is compile-time +// constant or a known expression of the form (1 << lg2), so that the expression +// i*elem_size does not result in an actual multiplication. +UPB_INLINE void _upb_Array_Set(upb_Array* arr, size_t i, const void* data, + size_t elem_size) { + UPB_ASSERT(i < arr->size); + UPB_ASSERT(elem_size == 1U << _upb_Array_ElementSizeLg2(arr)); + char* arr_data = (char*)_upb_array_ptr(arr); + memcpy(arr_data + (i * elem_size), data, elem_size); } -UPB_INLINE void* _upb_Array_Resize_accessor2(void* msg, size_t ofs, size_t size, - int elem_size_lg2, - upb_Arena* arena) { - upb_Array** arr_ptr = UPB_PTR_AT(msg, ofs, upb_Array*); - upb_Array* arr = *arr_ptr; - if (!arr || arr->capacity < size) { - return _upb_Array_Resize_fallback(arr_ptr, size, elem_size_lg2, arena); - } - arr->size = size; - return _upb_array_ptr(arr); -} - -UPB_INLINE bool _upb_Array_Append_accessor2(void* msg, size_t ofs, - int elem_size_lg2, - const void* value, - upb_Arena* arena) { - upb_Array** arr_ptr = UPB_PTR_AT(msg, ofs, upb_Array*); - size_t elem_size = 1 << elem_size_lg2; - upb_Array* arr = *arr_ptr; - void* ptr; - if (!arr || arr->size == arr->capacity) { - return _upb_Array_Append_fallback(arr_ptr, value, elem_size_lg2, arena); - } - ptr = _upb_array_ptr(arr); - memcpy(UPB_PTR_AT(ptr, arr->size * elem_size, char), value, elem_size); - arr->size++; - return true; -} - -// Used by old generated code, remove once all code has been regenerated. -UPB_INLINE int _upb_sizelg2(upb_CType type) { - switch (type) { - case kUpb_CType_Bool: - return 0; - case kUpb_CType_Float: - case kUpb_CType_Int32: - case kUpb_CType_UInt32: - case kUpb_CType_Enum: - return 2; - case kUpb_CType_Message: - return UPB_SIZE(2, 3); - case kUpb_CType_Double: - case kUpb_CType_Int64: - case kUpb_CType_UInt64: - return 3; - case kUpb_CType_String: - case kUpb_CType_Bytes: - return UPB_SIZE(3, 4); - } - UPB_UNREACHABLE(); -} - -UPB_INLINE void* _upb_Array_Resize_accessor(void* msg, size_t ofs, size_t size, - upb_CType type, upb_Arena* arena) { - return _upb_Array_Resize_accessor2(msg, ofs, size, _upb_sizelg2(type), arena); -} - -UPB_INLINE bool _upb_Array_Append_accessor(void* msg, size_t ofs, - size_t elem_size, upb_CType type, - const void* value, - upb_Arena* arena) { - (void)elem_size; - return _upb_Array_Append_accessor2(msg, ofs, _upb_sizelg2(type), value, - arena); +UPB_INLINE void _upb_array_detach(const void* msg, size_t ofs) { + *UPB_PTR_AT(msg, ofs, upb_Array*) = NULL; } #ifdef __cplusplus diff --git a/upb/message/accessors.c b/upb/message/accessors.c index 83ca4040ec..4359441a33 100644 --- a/upb/message/accessors.c +++ b/upb/message/accessors.c @@ -38,38 +38,6 @@ // Must be last. #include "upb/port/def.inc" -// Maps descriptor type to elem_size_lg2. -static int _upb_MiniTableField_CTypeLg2Size(const upb_MiniTableField* f) { - static const uint8_t sizes[] = { - -1, /* invalid descriptor type */ - 3, /* DOUBLE */ - 2, /* FLOAT */ - 3, /* INT64 */ - 3, /* UINT64 */ - 2, /* INT32 */ - 3, /* FIXED64 */ - 2, /* FIXED32 */ - 0, /* BOOL */ - UPB_SIZE(3, 4), /* STRING */ - UPB_SIZE(2, 3), /* GROUP */ - UPB_SIZE(2, 3), /* MESSAGE */ - UPB_SIZE(3, 4), /* BYTES */ - 2, /* UINT32 */ - 2, /* ENUM */ - 2, /* SFIXED32 */ - 3, /* SFIXED64 */ - 2, /* SINT32 */ - 3, /* SINT64 */ - }; - return sizes[f->descriptortype]; -} - -void* upb_Message_ResizeArray(upb_Message* msg, const upb_MiniTableField* field, - size_t len, upb_Arena* arena) { - return _upb_Array_Resize_accessor2( - msg, field->offset, len, _upb_MiniTableField_CTypeLg2Size(field), arena); -} - typedef struct { const char* ptr; uint64_t val; diff --git a/upb/message/accessors.h b/upb/message/accessors.h index c40b9ddca8..94da35b231 100644 --- a/upb/message/accessors.h +++ b/upb/message/accessors.h @@ -30,6 +30,7 @@ #include "upb/base/descriptor_constants.h" #include "upb/collections/array.h" +#include "upb/collections/array_internal.h" #include "upb/collections/map.h" #include "upb/collections/map_internal.h" #include "upb/message/extension_internal.h" @@ -68,12 +69,25 @@ UPB_INLINE void _upb_Message_SetPresence(upb_Message* msg, } } +#if defined(__GNUC__) && !defined(__clang__) +// GCC raises incorrect warnings in these functions. It thinks that we are +// overrunning buffers, but we carefully write the functions in this file to +// guarantee that this is impossible. GCC gets this wrong due it its failure +// to perform constant propagation as we expect: +// - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108217 +// - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108226 +// +// Unfortunately this also indicates that GCC is not optimizing away the +// switch() in cases where it should be, compromising the performance. +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Warray-bounds" +#pragma GCC diagnostic ignored "-Wstringop-overflow" +#pragma GCC diagnostic ignored "-Wstringop-overread" +#endif + UPB_INLINE bool _upb_MiniTable_ValueIsNonZero(const void* default_val, const upb_MiniTableField* field) { char zero[16] = {0}; - if (upb_IsRepeatedOrMap(field)) { - return memcmp(&zero, default_val, sizeof(void*)); - } switch (_upb_MiniTableField_GetRep(field)) { case kUpb_FieldRep_1Byte: return memcmp(&zero, default_val, 1) != 0; @@ -91,10 +105,6 @@ UPB_INLINE bool _upb_MiniTable_ValueIsNonZero(const void* default_val, UPB_INLINE void _upb_MiniTable_CopyFieldData(void* to, const void* from, const upb_MiniTableField* field) { - if (upb_IsRepeatedOrMap(field)) { - memcpy(to, from, sizeof(void*)); - return; - } switch (_upb_MiniTableField_GetRep(field)) { case kUpb_FieldRep_1Byte: memcpy(to, from, 1); @@ -113,6 +123,36 @@ UPB_INLINE void _upb_MiniTable_CopyFieldData(void* to, const void* from, UPB_UNREACHABLE(); } +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif + +UPB_INLINE size_t +_upb_MiniTable_ElementSizeLg2(const upb_MiniTableField* field) { + const unsigned char table[] = { + 0, + 3, // kUpb_FieldType_Double = 1, + 2, // kUpb_FieldType_Float = 2, + 3, // kUpb_FieldType_Int64 = 3, + 3, // kUpb_FieldType_UInt64 = 4, + 2, // kUpb_FieldType_Int32 = 5, + 3, // kUpb_FieldType_Fixed64 = 6, + 2, // kUpb_FieldType_Fixed32 = 7, + 0, // kUpb_FieldType_Bool = 8, + UPB_SIZE(3, 4), // kUpb_FieldType_String = 9, + UPB_SIZE(2, 3), // kUpb_FieldType_Group = 10, + UPB_SIZE(2, 3), // kUpb_FieldType_Message = 11, + UPB_SIZE(3, 4), // kUpb_FieldType_Bytes = 12, + 2, // kUpb_FieldType_UInt32 = 13, + 2, // kUpb_FieldType_Enum = 14, + 2, // kUpb_FieldType_SFixed32 = 15, + 3, // kUpb_FieldType_SFixed64 = 16, + 2, // kUpb_FieldType_SInt32 = 17, + 3, // kUpb_FieldType_SInt64 = 18, + }; + return table[field->descriptortype]; +} + // Here we define universal getter/setter functions for message fields. // These look very branchy and inefficient, but as long as the MiniTableField // values are known at compile time, all the branches are optimized away and @@ -261,12 +301,14 @@ UPB_INLINE void _upb_Message_ClearNonExtensionField( UPB_INLINE upb_Map* _upb_MiniTable_GetOrCreateMutableMap( upb_Message* msg, const upb_MiniTableField* field, size_t key_size, size_t val_size, upb_Arena* arena) { - UPB_ASSUME(upb_IsRepeatedOrMap(field)); + _upb_MiniTableField_CheckIsMap(field); upb_Map* map = NULL; upb_Map* default_map_value = NULL; _upb_Message_GetNonExtensionField(msg, field, &default_map_value, &map); if (!map) { map = _upb_Map_New(arena, key_size, val_size); + // Check again due to: https://godbolt.org/z/7WfaoKG1r + _upb_MiniTableField_CheckIsMap(field); _upb_Message_SetNonExtensionField(msg, field, &map); } return map; @@ -531,6 +573,7 @@ UPB_API_INLINE upb_Message* upb_MiniTable_GetMutableMessage( UPB_API_INLINE const upb_Array* upb_Message_GetArray( const upb_Message* msg, const upb_MiniTableField* field) { + _upb_MiniTableField_CheckIsArray(field); const upb_Array* ret; const upb_Array* default_val = NULL; _upb_Message_GetNonExtensionField(msg, field, &default_val, &ret); @@ -539,22 +582,43 @@ UPB_API_INLINE const upb_Array* upb_Message_GetArray( UPB_API_INLINE upb_Array* upb_Message_GetMutableArray( upb_Message* msg, const upb_MiniTableField* field) { + _upb_MiniTableField_CheckIsArray(field); return (upb_Array*)upb_Message_GetArray(msg, field); } -UPB_API_INLINE upb_Array* upb_Message_GetOrCreateMutableArray( - upb_Message* msg, const upb_MiniTableField* field, upb_CType ctype, - upb_Arena* arena) { +UPB_INLINE upb_Array* upb_Message_GetOrCreateMutableArray( + upb_Message* msg, const upb_MiniTableField* field, upb_Arena* arena) { + _upb_MiniTableField_CheckIsArray(field); upb_Array* array = upb_Message_GetMutableArray(msg, field); if (!array) { - array = upb_Array_New(arena, ctype); + array = _upb_Array_New(arena, 4, _upb_MiniTable_ElementSizeLg2(field)); + // Check again due to: https://godbolt.org/z/7WfaoKG1r + _upb_MiniTableField_CheckIsArray(field); _upb_Message_SetField(msg, field, &array, arena); } return array; } -void* upb_Message_ResizeArray(upb_Message* msg, const upb_MiniTableField* field, - size_t len, upb_Arena* arena); +UPB_INLINE upb_Array* upb_Message_ResizeArrayUninitialized( + upb_Message* msg, const upb_MiniTableField* field, size_t size, + upb_Arena* arena) { + _upb_MiniTableField_CheckIsArray(field); + upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, field, arena); + if (!arr || !_upb_Array_ResizeUninitialized(arr, size, arena)) return NULL; + return arr; +} + +// TODO: remove, migrate users to upb_Message_ResizeArrayUninitialized(), which +// has the same semantics but a clearer name. Alternatively, if users want an +// initialized variant, we can also offer that. +UPB_API_INLINE void* upb_Message_ResizeArray(upb_Message* msg, + const upb_MiniTableField* field, + size_t size, upb_Arena* arena) { + _upb_MiniTableField_CheckIsArray(field); + upb_Array* arr = + upb_Message_ResizeArrayUninitialized(msg, field, size, arena); + return _upb_array_ptr(arr); +} UPB_API_INLINE bool upb_MiniTableField_IsClosedEnum( const upb_MiniTableField* field) { @@ -563,7 +627,7 @@ UPB_API_INLINE bool upb_MiniTableField_IsClosedEnum( UPB_API_INLINE const upb_Map* upb_Message_GetMap( const upb_Message* msg, const upb_MiniTableField* field) { - UPB_ASSUME(upb_FieldMode_Get(field) == kUpb_FieldMode_Map); + _upb_MiniTableField_CheckIsMap(field); const upb_Map* ret; const upb_Map* default_val = NULL; _upb_Message_GetNonExtensionField(msg, field, &default_val, &ret); diff --git a/upb/mini_table/field_internal.h b/upb/mini_table/field_internal.h index f58531a5a6..e2b8f6bcc2 100644 --- a/upb/mini_table/field_internal.h +++ b/upb/mini_table/field_internal.h @@ -75,6 +75,8 @@ typedef enum { kUpb_FieldRep_StringView = 2, kUpb_FieldRep_8Byte = 3, + kUpb_FieldRep_NativePointer = + UPB_SIZE(kUpb_FieldRep_4Byte, kUpb_FieldRep_8Byte), kUpb_FieldRep_Max = kUpb_FieldRep_8Byte, } upb_FieldRep; @@ -93,6 +95,20 @@ UPB_INLINE upb_FieldMode upb_FieldMode_Get(const upb_MiniTableField* field) { return (upb_FieldMode)(field->mode & 3); } +UPB_INLINE void _upb_MiniTableField_CheckIsArray( + const upb_MiniTableField* field) { + UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_NativePointer); + UPB_ASSUME(upb_FieldMode_Get(field) == kUpb_FieldMode_Array); + UPB_ASSUME(field->presence == 0); +} + +UPB_INLINE void _upb_MiniTableField_CheckIsMap( + const upb_MiniTableField* field) { + UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_NativePointer); + UPB_ASSUME(upb_FieldMode_Get(field) == kUpb_FieldMode_Map); + UPB_ASSUME(field->presence == 0); +} + UPB_INLINE bool upb_IsRepeatedOrMap(const upb_MiniTableField* field) { // This works because upb_FieldMode has no value 3. return !(field->mode & kUpb_FieldMode_Scalar); diff --git a/upbc/file_layout.h b/upbc/file_layout.h index 51ea2b72c0..265dfe3c83 100644 --- a/upbc/file_layout.h +++ b/upbc/file_layout.h @@ -180,23 +180,6 @@ class FileLayout { return layout64_.GetEnumTable(d); } - std::string GetFieldOffset(const protobuf::FieldDescriptor* f) const { - const upb_MiniTableField* f_32 = upb_MiniTable_FindFieldByNumber( - GetMiniTable32(f->containing_type()), f->number()); - const upb_MiniTableField* f_64 = upb_MiniTable_FindFieldByNumber( - GetMiniTable64(f->containing_type()), f->number()); - return UpbSize(f_32->offset, f_64->offset); - } - - std::string GetOneofCaseOffset(const protobuf::OneofDescriptor* o) const { - const protobuf::FieldDescriptor* f = o->field(0); - const upb_MiniTableField* f_32 = upb_MiniTable_FindFieldByNumber( - GetMiniTable32(f->containing_type()), f->number()); - const upb_MiniTableField* f_64 = upb_MiniTable_FindFieldByNumber( - GetMiniTable64(f->containing_type()), f->number()); - return UpbSize(~f_32->presence, ~f_64->presence); - } - std::string GetMessageSize(const protobuf::Descriptor* d) const { return UpbSize(GetMiniTable32(d)->size, GetMiniTable64(d)->size); } diff --git a/upbc/protoc-gen-upb.cc b/upbc/protoc-gen-upb.cc index 71d34963e0..4c27f53233 100644 --- a/upbc/protoc-gen-upb.cc +++ b/upbc/protoc-gen-upb.cc @@ -137,34 +137,6 @@ std::string CTypeInternal(const protobuf::FieldDescriptor* field, } } -std::string SizeLg2(const protobuf::FieldDescriptor* field) { - switch (field->cpp_type()) { - case protobuf::FieldDescriptor::CPPTYPE_MESSAGE: - return "UPB_SIZE(2, 3)"; - case protobuf::FieldDescriptor::CPPTYPE_ENUM: - return std::to_string(2); - case protobuf::FieldDescriptor::CPPTYPE_BOOL: - return std::to_string(0); - case protobuf::FieldDescriptor::CPPTYPE_FLOAT: - return std::to_string(2); - case protobuf::FieldDescriptor::CPPTYPE_INT32: - return std::to_string(2); - case protobuf::FieldDescriptor::CPPTYPE_UINT32: - return std::to_string(2); - case protobuf::FieldDescriptor::CPPTYPE_DOUBLE: - return std::to_string(3); - case protobuf::FieldDescriptor::CPPTYPE_INT64: - return std::to_string(3); - case protobuf::FieldDescriptor::CPPTYPE_UINT64: - return std::to_string(3); - case protobuf::FieldDescriptor::CPPTYPE_STRING: - return "UPB_SIZE(3, 4)"; - default: - fprintf(stderr, "Unexpected type"); - abort(); - } -} - std::string FloatToCLiteral(float value) { if (value == std::numeric_limits::infinity()) { return "kUpb_FltInfinity"; @@ -518,12 +490,20 @@ void GenerateRepeatedGetters(const protobuf::FieldDescriptor* field, Output& output) { output( R"cc( - UPB_INLINE $0 const* $1_$2(const $1* msg, size_t* len) { - return ($0 const*)_upb_array_accessor(msg, $3, len); + UPB_INLINE $0 const* $1_$2(const $1* msg, size_t* size) { + const upb_MiniTableField field = $3; + const upb_Array* arr = upb_Message_GetArray(msg, &field); + if (arr) { + if (size) *size = arr->size; + return ($0 const*)_upb_array_constptr(arr); + } else { + if (size) *size = 0; + return NULL; + } } )cc", CTypeConst(field), msg_name, ResolveFieldName(field, field_names), - layout.GetFieldOffset(field)); + FieldInitializer(layout, field)); } void GenerateScalarGetters(const protobuf::FieldDescriptor* field, @@ -618,41 +598,58 @@ void GenerateRepeatedSetters(const protobuf::FieldDescriptor* field, std::string resolved_name = ResolveFieldName(field, field_names); output( R"cc( - UPB_INLINE $0* $1_mutable_$2($1* msg, size_t* len) { - return ($0*)_upb_array_mutable_accessor(msg, $3, len); + UPB_INLINE $0* $1_mutable_$2($1* msg, size_t* size) { + upb_MiniTableField field = $3; + upb_Array* arr = upb_Message_GetMutableArray(msg, &field); + if (arr) { + if (size) *size = arr->size; + return ($0*)_upb_array_ptr(arr); + } else { + if (size) *size = 0; + return NULL; + } } )cc", - CType(field), msg_name, resolved_name, layout.GetFieldOffset(field)); + CType(field), msg_name, resolved_name, FieldInitializer(layout, field)); output( R"cc( - UPB_INLINE $0* $1_resize_$2($1* msg, size_t len, upb_Arena* arena) { - return ($0*)_upb_Array_Resize_accessor2(msg, $3, len, $4, arena); + UPB_INLINE $0* $1_resize_$2($1* msg, size_t size, upb_Arena* arena) { + upb_MiniTableField field = $3; + return ($0*)upb_Message_ResizeArray(msg, &field, size, arena); } )cc", - CType(field), msg_name, resolved_name, layout.GetFieldOffset(field), - SizeLg2(field)); + CType(field), msg_name, resolved_name, FieldInitializer(layout, field)); if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE) { output( R"cc( UPB_INLINE struct $0* $1_add_$2($1* msg, upb_Arena* arena) { + upb_MiniTableField field = $4; + upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena); + if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) { + return NULL; + } struct $0* sub = (struct $0*)_upb_Message_New(&$3, arena); - bool ok = _upb_Array_Append_accessor2(msg, $4, $5, &sub, arena); - if (!ok) return NULL; + if (!arr || !sub) return NULL; + _upb_Array_Set(arr, arr->size - 1, &sub, sizeof(sub)); return sub; } )cc", MessageName(field->message_type()), msg_name, resolved_name, - MessageInit(field->message_type()), layout.GetFieldOffset(field), - SizeLg2(field)); + MessageInit(field->message_type()), FieldInitializer(layout, field)); } else { output( R"cc( UPB_INLINE bool $1_add_$2($1* msg, $0 val, upb_Arena* arena) { - return _upb_Array_Append_accessor2(msg, $3, $4, &val, arena); + upb_MiniTableField field = $3; + upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena); + if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) { + return false; + } + _upb_Array_Set(arr, arr->size - 1, &val, sizeof(val)); + return true; } )cc", - CType(field), msg_name, resolved_name, layout.GetFieldOffset(field), - SizeLg2(field)); + CType(field), msg_name, resolved_name, FieldInitializer(layout, field)); } } From 565b28c0d1d04008f3af87629e059a9f96ed73e1 Mon Sep 17 00:00:00 2001 From: Protobuf Team Bot Date: Mon, 26 Dec 2022 18:44:24 -0800 Subject: [PATCH 08/21] Unified accessor API for repeated field getters and setters. This CL eliminates the last remaining callers of GetFieldOffset(), therefore opening the door to a more principled bootstrapping process. PiperOrigin-RevId: 497871886 --- .github/workflows/bazel_tests.yml | 6 +- protos/protos_internal_test.cc | 2 +- upb/collections/array.c | 59 +++++++++++---- upb/collections/array_internal.h | 122 ++++++++++++++++++++++++------ upb/message/accessors.c | 32 ++++++++ upb/message/accessors.h | 94 ++++------------------- upb/mini_table/field_internal.h | 16 ---- upbc/file_layout.h | 17 +++++ upbc/protoc-gen-upb.cc | 87 +++++++++++---------- 9 files changed, 254 insertions(+), 181 deletions(-) diff --git a/.github/workflows/bazel_tests.yml b/.github/workflows/bazel_tests.yml index b0affa1859..efe6fe5e69 100644 --- a/.github/workflows/bazel_tests.yml +++ b/.github/workflows/bazel_tests.yml @@ -24,7 +24,7 @@ jobs: include: - { NAME: "Fastbuild", BAZEL: bazel, CC: clang, os: ubuntu-20.04, flags: "" } - { NAME: "Optmized", BAZEL: bazel, CC: clang, os: ubuntu-20.04, flags: "-c opt" } # Some warnings only fire with -c opt - - { NAME: "GCC Optimized", BAZEL: bazel, CC: gcc-12, os: ubuntu-22.04, flags: "-c opt" } + - { NAME: "GCC Optimized", BAZEL: bazel, CC: gcc, os: ubuntu-20.04, flags: "-c opt" } - { NAME: "FastTable", BAZEL: bazel, CC: clang, os: ubuntu-20.04, flags: "--//:fasttable_enabled=true -- -cmake:test_generated_files" } - { NAME: "ASAN", BAZEL: bazel, CC: clang, os: ubuntu-20.04, flags: "--config=asan -c dbg -- -benchmarks:benchmark -python/..." } - { NAME: "UBSAN", BAZEL: bazel, CC: clang, os: ubuntu-20.04, flags: "--config=ubsan -c dbg -- -benchmarks:benchmark -python/... -lua/...", install: "libunwind-dev" } @@ -53,10 +53,6 @@ jobs: wget -O $FILENAME https://github.com/bazelbuild/bazel/releases/download/$VERSION/${{ matrix.BAZEL }} chmod a+x $FILENAME if: ${{ matrix.BAZEL != 'bazel' }} - - name: Check tool versions - run: | - ${{ matrix.CC }} --version - ${{ matrix.BAZEL }} --version - name: Set up Bazel read/write caching run: echo "BAZEL_CACHE_AUTH=--google_default_credentials" >> $GITHUB_ENV if: ${{ github.event.pull_request.head.repo.full_name == 'protocolbuffers/upb' }} diff --git a/protos/protos_internal_test.cc b/protos/protos_internal_test.cc index 99bfaa2402..835601c549 100644 --- a/protos/protos_internal_test.cc +++ b/protos/protos_internal_test.cc @@ -40,9 +40,9 @@ TEST(CppGeneratedCode, InternalMoveMessage) { upb_Arena* source_arena = upb_Arena_New(); protos_generator_test_TestModel* message = protos_generator_test_TestModel_new(source_arena); - ASSERT_NE(message, nullptr); protos_generator_test_TestModel_set_int_value_with_default(message, 123); + EXPECT_NE(message, nullptr); // Move ownership. TestModel model = protos::internal::MoveMessage(message, source_arena); diff --git a/upb/collections/array.c b/upb/collections/array.c index fdb1bd206c..a957f26ab8 100644 --- a/upb/collections/array.c +++ b/upb/collections/array.c @@ -32,22 +32,23 @@ // Must be last. #include "upb/port/def.inc" -const char _upb_Array_CTypeSizeLg2Table[] = { - [kUpb_CType_Bool] = 0, - [kUpb_CType_Float] = 2, - [kUpb_CType_Int32] = 2, - [kUpb_CType_UInt32] = 2, - [kUpb_CType_Enum] = 2, - [kUpb_CType_Message] = UPB_SIZE(2, 3), - [kUpb_CType_Double] = 3, - [kUpb_CType_Int64] = 3, - [kUpb_CType_UInt64] = 3, - [kUpb_CType_String] = UPB_SIZE(3, 4), - [kUpb_CType_Bytes] = UPB_SIZE(3, 4), +static const char _upb_CTypeo_sizelg2[12] = { + 0, + 0, /* kUpb_CType_Bool */ + 2, /* kUpb_CType_Float */ + 2, /* kUpb_CType_Int32 */ + 2, /* kUpb_CType_UInt32 */ + 2, /* kUpb_CType_Enum */ + UPB_SIZE(2, 3), /* kUpb_CType_Message */ + 3, /* kUpb_CType_Double */ + 3, /* kUpb_CType_Int64 */ + 3, /* kUpb_CType_UInt64 */ + UPB_SIZE(3, 4), /* kUpb_CType_String */ + UPB_SIZE(3, 4), /* kUpb_CType_Bytes */ }; upb_Array* upb_Array_New(upb_Arena* a, upb_CType type) { - return _upb_Array_New(a, 4, _upb_Array_CTypeSizeLg2(type)); + return _upb_Array_New(a, 4, _upb_CTypeo_sizelg2[type]); } size_t upb_Array_Size(const upb_Array* arr) { return arr->size; } @@ -141,3 +142,35 @@ bool _upb_array_realloc(upb_Array* arr, size_t min_capacity, upb_Arena* arena) { arr->capacity = new_capacity; return true; } + +static upb_Array* getorcreate_array(upb_Array** arr_ptr, int elem_size_lg2, + upb_Arena* arena) { + upb_Array* arr = *arr_ptr; + if (!arr) { + arr = _upb_Array_New(arena, 4, elem_size_lg2); + if (!arr) return NULL; + *arr_ptr = arr; + } + return arr; +} + +void* _upb_Array_Resize_fallback(upb_Array** arr_ptr, size_t size, + int elem_size_lg2, upb_Arena* arena) { + upb_Array* arr = getorcreate_array(arr_ptr, elem_size_lg2, arena); + return arr && _upb_Array_ResizeUninitialized(arr, size, arena) + ? _upb_array_ptr(arr) + : NULL; +} + +bool _upb_Array_Append_fallback(upb_Array** arr_ptr, const void* value, + int elem_size_lg2, upb_Arena* arena) { + upb_Array* arr = getorcreate_array(arr_ptr, elem_size_lg2, arena); + if (!arr) return false; + + size_t elems = arr->size; + if (!_upb_Array_ResizeUninitialized(arr, elems + 1, arena)) return false; + + char* data = _upb_array_ptr(arr); + memcpy(data + (elems << elem_size_lg2), value, 1 << elem_size_lg2); + return true; +} diff --git a/upb/collections/array_internal.h b/upb/collections/array_internal.h index 0c126eb642..e1a4cefde2 100644 --- a/upb/collections/array_internal.h +++ b/upb/collections/array_internal.h @@ -48,14 +48,8 @@ struct upb_Array { }; // LINT.ThenChange(GoogleInternalName1) -UPB_INLINE size_t _upb_Array_ElementSizeLg2(const upb_Array* arr) { - size_t ret = arr->data & 7; - UPB_ASSERT(ret <= 4); - return ret; -} - UPB_INLINE const void* _upb_array_constptr(const upb_Array* arr) { - _upb_Array_ElementSizeLg2(arr); // Check assertion. + UPB_ASSERT((arr->data & 7) <= 4); return (void*)(arr->data & ~(uintptr_t)7); } @@ -74,15 +68,8 @@ UPB_INLINE uintptr_t _upb_tag_arrptr(void* ptr, int elem_size_lg2) { return (uintptr_t)ptr | (unsigned)elem_size_lg2; } -extern const char _upb_Array_CTypeSizeLg2Table[]; - -UPB_INLINE size_t _upb_Array_CTypeSizeLg2(upb_CType ctype) { - return _upb_Array_CTypeSizeLg2Table[ctype]; -} - UPB_INLINE upb_Array* _upb_Array_New(upb_Arena* a, size_t init_capacity, int elem_size_lg2) { - UPB_ASSERT(elem_size_lg2 <= 4); const size_t arr_size = UPB_ALIGN_UP(sizeof(upb_Array), UPB_MALLOC_ALIGN); const size_t bytes = arr_size + (init_capacity << elem_size_lg2); upb_Array* arr = (upb_Array*)upb_Arena_Malloc(a, bytes); @@ -96,6 +83,12 @@ UPB_INLINE upb_Array* _upb_Array_New(upb_Arena* a, size_t init_capacity, // Resizes the capacity of the array to be at least min_size. bool _upb_array_realloc(upb_Array* arr, size_t min_size, upb_Arena* arena); +// Fallback functions for when the accessors require a resize. +void* _upb_Array_Resize_fallback(upb_Array** arr_ptr, size_t size, + int elem_size_lg2, upb_Arena* arena); +bool _upb_Array_Append_fallback(upb_Array** arr_ptr, const void* value, + int elem_size_lg2, upb_Arena* arena); + UPB_INLINE bool _upb_array_reserve(upb_Array* arr, size_t size, upb_Arena* arena) { if (arr->capacity < size) return _upb_array_realloc(arr, size, arena); @@ -110,21 +103,100 @@ UPB_INLINE bool _upb_Array_ResizeUninitialized(upb_Array* arr, size_t size, return true; } -// This function is intended for situations where elem_size is compile-time -// constant or a known expression of the form (1 << lg2), so that the expression -// i*elem_size does not result in an actual multiplication. -UPB_INLINE void _upb_Array_Set(upb_Array* arr, size_t i, const void* data, - size_t elem_size) { - UPB_ASSERT(i < arr->size); - UPB_ASSERT(elem_size == 1U << _upb_Array_ElementSizeLg2(arr)); - char* arr_data = (char*)_upb_array_ptr(arr); - memcpy(arr_data + (i * elem_size), data, elem_size); -} - UPB_INLINE void _upb_array_detach(const void* msg, size_t ofs) { *UPB_PTR_AT(msg, ofs, upb_Array*) = NULL; } +UPB_INLINE const void* _upb_array_accessor(const void* msg, size_t ofs, + size_t* size) { + const upb_Array* arr = *UPB_PTR_AT(msg, ofs, const upb_Array*); + if (arr) { + if (size) *size = arr->size; + return _upb_array_constptr(arr); + } else { + if (size) *size = 0; + return NULL; + } +} + +UPB_INLINE void* _upb_array_mutable_accessor(void* msg, size_t ofs, + size_t* size) { + upb_Array* arr = *UPB_PTR_AT(msg, ofs, upb_Array*); + if (arr) { + if (size) *size = arr->size; + return _upb_array_ptr(arr); + } else { + if (size) *size = 0; + return NULL; + } +} + +UPB_INLINE void* _upb_Array_Resize_accessor2(void* msg, size_t ofs, size_t size, + int elem_size_lg2, + upb_Arena* arena) { + upb_Array** arr_ptr = UPB_PTR_AT(msg, ofs, upb_Array*); + upb_Array* arr = *arr_ptr; + if (!arr || arr->capacity < size) { + return _upb_Array_Resize_fallback(arr_ptr, size, elem_size_lg2, arena); + } + arr->size = size; + return _upb_array_ptr(arr); +} + +UPB_INLINE bool _upb_Array_Append_accessor2(void* msg, size_t ofs, + int elem_size_lg2, + const void* value, + upb_Arena* arena) { + upb_Array** arr_ptr = UPB_PTR_AT(msg, ofs, upb_Array*); + size_t elem_size = 1 << elem_size_lg2; + upb_Array* arr = *arr_ptr; + void* ptr; + if (!arr || arr->size == arr->capacity) { + return _upb_Array_Append_fallback(arr_ptr, value, elem_size_lg2, arena); + } + ptr = _upb_array_ptr(arr); + memcpy(UPB_PTR_AT(ptr, arr->size * elem_size, char), value, elem_size); + arr->size++; + return true; +} + +// Used by old generated code, remove once all code has been regenerated. +UPB_INLINE int _upb_sizelg2(upb_CType type) { + switch (type) { + case kUpb_CType_Bool: + return 0; + case kUpb_CType_Float: + case kUpb_CType_Int32: + case kUpb_CType_UInt32: + case kUpb_CType_Enum: + return 2; + case kUpb_CType_Message: + return UPB_SIZE(2, 3); + case kUpb_CType_Double: + case kUpb_CType_Int64: + case kUpb_CType_UInt64: + return 3; + case kUpb_CType_String: + case kUpb_CType_Bytes: + return UPB_SIZE(3, 4); + } + UPB_UNREACHABLE(); +} + +UPB_INLINE void* _upb_Array_Resize_accessor(void* msg, size_t ofs, size_t size, + upb_CType type, upb_Arena* arena) { + return _upb_Array_Resize_accessor2(msg, ofs, size, _upb_sizelg2(type), arena); +} + +UPB_INLINE bool _upb_Array_Append_accessor(void* msg, size_t ofs, + size_t elem_size, upb_CType type, + const void* value, + upb_Arena* arena) { + (void)elem_size; + return _upb_Array_Append_accessor2(msg, ofs, _upb_sizelg2(type), value, + arena); +} + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/upb/message/accessors.c b/upb/message/accessors.c index 4359441a33..83ca4040ec 100644 --- a/upb/message/accessors.c +++ b/upb/message/accessors.c @@ -38,6 +38,38 @@ // Must be last. #include "upb/port/def.inc" +// Maps descriptor type to elem_size_lg2. +static int _upb_MiniTableField_CTypeLg2Size(const upb_MiniTableField* f) { + static const uint8_t sizes[] = { + -1, /* invalid descriptor type */ + 3, /* DOUBLE */ + 2, /* FLOAT */ + 3, /* INT64 */ + 3, /* UINT64 */ + 2, /* INT32 */ + 3, /* FIXED64 */ + 2, /* FIXED32 */ + 0, /* BOOL */ + UPB_SIZE(3, 4), /* STRING */ + UPB_SIZE(2, 3), /* GROUP */ + UPB_SIZE(2, 3), /* MESSAGE */ + UPB_SIZE(3, 4), /* BYTES */ + 2, /* UINT32 */ + 2, /* ENUM */ + 2, /* SFIXED32 */ + 3, /* SFIXED64 */ + 2, /* SINT32 */ + 3, /* SINT64 */ + }; + return sizes[f->descriptortype]; +} + +void* upb_Message_ResizeArray(upb_Message* msg, const upb_MiniTableField* field, + size_t len, upb_Arena* arena) { + return _upb_Array_Resize_accessor2( + msg, field->offset, len, _upb_MiniTableField_CTypeLg2Size(field), arena); +} + typedef struct { const char* ptr; uint64_t val; diff --git a/upb/message/accessors.h b/upb/message/accessors.h index 94da35b231..c40b9ddca8 100644 --- a/upb/message/accessors.h +++ b/upb/message/accessors.h @@ -30,7 +30,6 @@ #include "upb/base/descriptor_constants.h" #include "upb/collections/array.h" -#include "upb/collections/array_internal.h" #include "upb/collections/map.h" #include "upb/collections/map_internal.h" #include "upb/message/extension_internal.h" @@ -69,25 +68,12 @@ UPB_INLINE void _upb_Message_SetPresence(upb_Message* msg, } } -#if defined(__GNUC__) && !defined(__clang__) -// GCC raises incorrect warnings in these functions. It thinks that we are -// overrunning buffers, but we carefully write the functions in this file to -// guarantee that this is impossible. GCC gets this wrong due it its failure -// to perform constant propagation as we expect: -// - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108217 -// - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108226 -// -// Unfortunately this also indicates that GCC is not optimizing away the -// switch() in cases where it should be, compromising the performance. -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Warray-bounds" -#pragma GCC diagnostic ignored "-Wstringop-overflow" -#pragma GCC diagnostic ignored "-Wstringop-overread" -#endif - UPB_INLINE bool _upb_MiniTable_ValueIsNonZero(const void* default_val, const upb_MiniTableField* field) { char zero[16] = {0}; + if (upb_IsRepeatedOrMap(field)) { + return memcmp(&zero, default_val, sizeof(void*)); + } switch (_upb_MiniTableField_GetRep(field)) { case kUpb_FieldRep_1Byte: return memcmp(&zero, default_val, 1) != 0; @@ -105,6 +91,10 @@ UPB_INLINE bool _upb_MiniTable_ValueIsNonZero(const void* default_val, UPB_INLINE void _upb_MiniTable_CopyFieldData(void* to, const void* from, const upb_MiniTableField* field) { + if (upb_IsRepeatedOrMap(field)) { + memcpy(to, from, sizeof(void*)); + return; + } switch (_upb_MiniTableField_GetRep(field)) { case kUpb_FieldRep_1Byte: memcpy(to, from, 1); @@ -123,36 +113,6 @@ UPB_INLINE void _upb_MiniTable_CopyFieldData(void* to, const void* from, UPB_UNREACHABLE(); } -#if defined(__GNUC__) && !defined(__clang__) -#pragma GCC diagnostic pop -#endif - -UPB_INLINE size_t -_upb_MiniTable_ElementSizeLg2(const upb_MiniTableField* field) { - const unsigned char table[] = { - 0, - 3, // kUpb_FieldType_Double = 1, - 2, // kUpb_FieldType_Float = 2, - 3, // kUpb_FieldType_Int64 = 3, - 3, // kUpb_FieldType_UInt64 = 4, - 2, // kUpb_FieldType_Int32 = 5, - 3, // kUpb_FieldType_Fixed64 = 6, - 2, // kUpb_FieldType_Fixed32 = 7, - 0, // kUpb_FieldType_Bool = 8, - UPB_SIZE(3, 4), // kUpb_FieldType_String = 9, - UPB_SIZE(2, 3), // kUpb_FieldType_Group = 10, - UPB_SIZE(2, 3), // kUpb_FieldType_Message = 11, - UPB_SIZE(3, 4), // kUpb_FieldType_Bytes = 12, - 2, // kUpb_FieldType_UInt32 = 13, - 2, // kUpb_FieldType_Enum = 14, - 2, // kUpb_FieldType_SFixed32 = 15, - 3, // kUpb_FieldType_SFixed64 = 16, - 2, // kUpb_FieldType_SInt32 = 17, - 3, // kUpb_FieldType_SInt64 = 18, - }; - return table[field->descriptortype]; -} - // Here we define universal getter/setter functions for message fields. // These look very branchy and inefficient, but as long as the MiniTableField // values are known at compile time, all the branches are optimized away and @@ -301,14 +261,12 @@ UPB_INLINE void _upb_Message_ClearNonExtensionField( UPB_INLINE upb_Map* _upb_MiniTable_GetOrCreateMutableMap( upb_Message* msg, const upb_MiniTableField* field, size_t key_size, size_t val_size, upb_Arena* arena) { - _upb_MiniTableField_CheckIsMap(field); + UPB_ASSUME(upb_IsRepeatedOrMap(field)); upb_Map* map = NULL; upb_Map* default_map_value = NULL; _upb_Message_GetNonExtensionField(msg, field, &default_map_value, &map); if (!map) { map = _upb_Map_New(arena, key_size, val_size); - // Check again due to: https://godbolt.org/z/7WfaoKG1r - _upb_MiniTableField_CheckIsMap(field); _upb_Message_SetNonExtensionField(msg, field, &map); } return map; @@ -573,7 +531,6 @@ UPB_API_INLINE upb_Message* upb_MiniTable_GetMutableMessage( UPB_API_INLINE const upb_Array* upb_Message_GetArray( const upb_Message* msg, const upb_MiniTableField* field) { - _upb_MiniTableField_CheckIsArray(field); const upb_Array* ret; const upb_Array* default_val = NULL; _upb_Message_GetNonExtensionField(msg, field, &default_val, &ret); @@ -582,43 +539,22 @@ UPB_API_INLINE const upb_Array* upb_Message_GetArray( UPB_API_INLINE upb_Array* upb_Message_GetMutableArray( upb_Message* msg, const upb_MiniTableField* field) { - _upb_MiniTableField_CheckIsArray(field); return (upb_Array*)upb_Message_GetArray(msg, field); } -UPB_INLINE upb_Array* upb_Message_GetOrCreateMutableArray( - upb_Message* msg, const upb_MiniTableField* field, upb_Arena* arena) { - _upb_MiniTableField_CheckIsArray(field); +UPB_API_INLINE upb_Array* upb_Message_GetOrCreateMutableArray( + upb_Message* msg, const upb_MiniTableField* field, upb_CType ctype, + upb_Arena* arena) { upb_Array* array = upb_Message_GetMutableArray(msg, field); if (!array) { - array = _upb_Array_New(arena, 4, _upb_MiniTable_ElementSizeLg2(field)); - // Check again due to: https://godbolt.org/z/7WfaoKG1r - _upb_MiniTableField_CheckIsArray(field); + array = upb_Array_New(arena, ctype); _upb_Message_SetField(msg, field, &array, arena); } return array; } -UPB_INLINE upb_Array* upb_Message_ResizeArrayUninitialized( - upb_Message* msg, const upb_MiniTableField* field, size_t size, - upb_Arena* arena) { - _upb_MiniTableField_CheckIsArray(field); - upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, field, arena); - if (!arr || !_upb_Array_ResizeUninitialized(arr, size, arena)) return NULL; - return arr; -} - -// TODO: remove, migrate users to upb_Message_ResizeArrayUninitialized(), which -// has the same semantics but a clearer name. Alternatively, if users want an -// initialized variant, we can also offer that. -UPB_API_INLINE void* upb_Message_ResizeArray(upb_Message* msg, - const upb_MiniTableField* field, - size_t size, upb_Arena* arena) { - _upb_MiniTableField_CheckIsArray(field); - upb_Array* arr = - upb_Message_ResizeArrayUninitialized(msg, field, size, arena); - return _upb_array_ptr(arr); -} +void* upb_Message_ResizeArray(upb_Message* msg, const upb_MiniTableField* field, + size_t len, upb_Arena* arena); UPB_API_INLINE bool upb_MiniTableField_IsClosedEnum( const upb_MiniTableField* field) { @@ -627,7 +563,7 @@ UPB_API_INLINE bool upb_MiniTableField_IsClosedEnum( UPB_API_INLINE const upb_Map* upb_Message_GetMap( const upb_Message* msg, const upb_MiniTableField* field) { - _upb_MiniTableField_CheckIsMap(field); + UPB_ASSUME(upb_FieldMode_Get(field) == kUpb_FieldMode_Map); const upb_Map* ret; const upb_Map* default_val = NULL; _upb_Message_GetNonExtensionField(msg, field, &default_val, &ret); diff --git a/upb/mini_table/field_internal.h b/upb/mini_table/field_internal.h index e2b8f6bcc2..f58531a5a6 100644 --- a/upb/mini_table/field_internal.h +++ b/upb/mini_table/field_internal.h @@ -75,8 +75,6 @@ typedef enum { kUpb_FieldRep_StringView = 2, kUpb_FieldRep_8Byte = 3, - kUpb_FieldRep_NativePointer = - UPB_SIZE(kUpb_FieldRep_4Byte, kUpb_FieldRep_8Byte), kUpb_FieldRep_Max = kUpb_FieldRep_8Byte, } upb_FieldRep; @@ -95,20 +93,6 @@ UPB_INLINE upb_FieldMode upb_FieldMode_Get(const upb_MiniTableField* field) { return (upb_FieldMode)(field->mode & 3); } -UPB_INLINE void _upb_MiniTableField_CheckIsArray( - const upb_MiniTableField* field) { - UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_NativePointer); - UPB_ASSUME(upb_FieldMode_Get(field) == kUpb_FieldMode_Array); - UPB_ASSUME(field->presence == 0); -} - -UPB_INLINE void _upb_MiniTableField_CheckIsMap( - const upb_MiniTableField* field) { - UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_NativePointer); - UPB_ASSUME(upb_FieldMode_Get(field) == kUpb_FieldMode_Map); - UPB_ASSUME(field->presence == 0); -} - UPB_INLINE bool upb_IsRepeatedOrMap(const upb_MiniTableField* field) { // This works because upb_FieldMode has no value 3. return !(field->mode & kUpb_FieldMode_Scalar); diff --git a/upbc/file_layout.h b/upbc/file_layout.h index 265dfe3c83..51ea2b72c0 100644 --- a/upbc/file_layout.h +++ b/upbc/file_layout.h @@ -180,6 +180,23 @@ class FileLayout { return layout64_.GetEnumTable(d); } + std::string GetFieldOffset(const protobuf::FieldDescriptor* f) const { + const upb_MiniTableField* f_32 = upb_MiniTable_FindFieldByNumber( + GetMiniTable32(f->containing_type()), f->number()); + const upb_MiniTableField* f_64 = upb_MiniTable_FindFieldByNumber( + GetMiniTable64(f->containing_type()), f->number()); + return UpbSize(f_32->offset, f_64->offset); + } + + std::string GetOneofCaseOffset(const protobuf::OneofDescriptor* o) const { + const protobuf::FieldDescriptor* f = o->field(0); + const upb_MiniTableField* f_32 = upb_MiniTable_FindFieldByNumber( + GetMiniTable32(f->containing_type()), f->number()); + const upb_MiniTableField* f_64 = upb_MiniTable_FindFieldByNumber( + GetMiniTable64(f->containing_type()), f->number()); + return UpbSize(~f_32->presence, ~f_64->presence); + } + std::string GetMessageSize(const protobuf::Descriptor* d) const { return UpbSize(GetMiniTable32(d)->size, GetMiniTable64(d)->size); } diff --git a/upbc/protoc-gen-upb.cc b/upbc/protoc-gen-upb.cc index 4c27f53233..71d34963e0 100644 --- a/upbc/protoc-gen-upb.cc +++ b/upbc/protoc-gen-upb.cc @@ -137,6 +137,34 @@ std::string CTypeInternal(const protobuf::FieldDescriptor* field, } } +std::string SizeLg2(const protobuf::FieldDescriptor* field) { + switch (field->cpp_type()) { + case protobuf::FieldDescriptor::CPPTYPE_MESSAGE: + return "UPB_SIZE(2, 3)"; + case protobuf::FieldDescriptor::CPPTYPE_ENUM: + return std::to_string(2); + case protobuf::FieldDescriptor::CPPTYPE_BOOL: + return std::to_string(0); + case protobuf::FieldDescriptor::CPPTYPE_FLOAT: + return std::to_string(2); + case protobuf::FieldDescriptor::CPPTYPE_INT32: + return std::to_string(2); + case protobuf::FieldDescriptor::CPPTYPE_UINT32: + return std::to_string(2); + case protobuf::FieldDescriptor::CPPTYPE_DOUBLE: + return std::to_string(3); + case protobuf::FieldDescriptor::CPPTYPE_INT64: + return std::to_string(3); + case protobuf::FieldDescriptor::CPPTYPE_UINT64: + return std::to_string(3); + case protobuf::FieldDescriptor::CPPTYPE_STRING: + return "UPB_SIZE(3, 4)"; + default: + fprintf(stderr, "Unexpected type"); + abort(); + } +} + std::string FloatToCLiteral(float value) { if (value == std::numeric_limits::infinity()) { return "kUpb_FltInfinity"; @@ -490,20 +518,12 @@ void GenerateRepeatedGetters(const protobuf::FieldDescriptor* field, Output& output) { output( R"cc( - UPB_INLINE $0 const* $1_$2(const $1* msg, size_t* size) { - const upb_MiniTableField field = $3; - const upb_Array* arr = upb_Message_GetArray(msg, &field); - if (arr) { - if (size) *size = arr->size; - return ($0 const*)_upb_array_constptr(arr); - } else { - if (size) *size = 0; - return NULL; - } + UPB_INLINE $0 const* $1_$2(const $1* msg, size_t* len) { + return ($0 const*)_upb_array_accessor(msg, $3, len); } )cc", CTypeConst(field), msg_name, ResolveFieldName(field, field_names), - FieldInitializer(layout, field)); + layout.GetFieldOffset(field)); } void GenerateScalarGetters(const protobuf::FieldDescriptor* field, @@ -598,58 +618,41 @@ void GenerateRepeatedSetters(const protobuf::FieldDescriptor* field, std::string resolved_name = ResolveFieldName(field, field_names); output( R"cc( - UPB_INLINE $0* $1_mutable_$2($1* msg, size_t* size) { - upb_MiniTableField field = $3; - upb_Array* arr = upb_Message_GetMutableArray(msg, &field); - if (arr) { - if (size) *size = arr->size; - return ($0*)_upb_array_ptr(arr); - } else { - if (size) *size = 0; - return NULL; - } + UPB_INLINE $0* $1_mutable_$2($1* msg, size_t* len) { + return ($0*)_upb_array_mutable_accessor(msg, $3, len); } )cc", - CType(field), msg_name, resolved_name, FieldInitializer(layout, field)); + CType(field), msg_name, resolved_name, layout.GetFieldOffset(field)); output( R"cc( - UPB_INLINE $0* $1_resize_$2($1* msg, size_t size, upb_Arena* arena) { - upb_MiniTableField field = $3; - return ($0*)upb_Message_ResizeArray(msg, &field, size, arena); + UPB_INLINE $0* $1_resize_$2($1* msg, size_t len, upb_Arena* arena) { + return ($0*)_upb_Array_Resize_accessor2(msg, $3, len, $4, arena); } )cc", - CType(field), msg_name, resolved_name, FieldInitializer(layout, field)); + CType(field), msg_name, resolved_name, layout.GetFieldOffset(field), + SizeLg2(field)); if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE) { output( R"cc( UPB_INLINE struct $0* $1_add_$2($1* msg, upb_Arena* arena) { - upb_MiniTableField field = $4; - upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena); - if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) { - return NULL; - } struct $0* sub = (struct $0*)_upb_Message_New(&$3, arena); - if (!arr || !sub) return NULL; - _upb_Array_Set(arr, arr->size - 1, &sub, sizeof(sub)); + bool ok = _upb_Array_Append_accessor2(msg, $4, $5, &sub, arena); + if (!ok) return NULL; return sub; } )cc", MessageName(field->message_type()), msg_name, resolved_name, - MessageInit(field->message_type()), FieldInitializer(layout, field)); + MessageInit(field->message_type()), layout.GetFieldOffset(field), + SizeLg2(field)); } else { output( R"cc( UPB_INLINE bool $1_add_$2($1* msg, $0 val, upb_Arena* arena) { - upb_MiniTableField field = $3; - upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena); - if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) { - return false; - } - _upb_Array_Set(arr, arr->size - 1, &val, sizeof(val)); - return true; + return _upb_Array_Append_accessor2(msg, $3, $4, &val, arena); } )cc", - CType(field), msg_name, resolved_name, FieldInitializer(layout, field)); + CType(field), msg_name, resolved_name, layout.GetFieldOffset(field), + SizeLg2(field)); } } From 003ecb3125a6f186d85814d06ca03cec15c0f093 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 27 Dec 2022 09:26:17 -0800 Subject: [PATCH 09/21] Rolling forward: expanded GCC diagnostic pragmas to cover all of accessors.h. PiperOrigin-RevId: 497983863 --- .github/workflows/bazel_tests.yml | 6 +- protos/protos_internal_test.cc | 2 +- upb/collections/array.c | 59 ++++----------- upb/collections/array_internal.h | 122 ++++++------------------------ upb/message/accessors.c | 32 -------- upb/message/accessors.h | 94 +++++++++++++++++++---- upb/mini_table/field_internal.h | 16 ++++ upbc/file_layout.h | 17 ----- upbc/protoc-gen-upb.cc | 87 ++++++++++----------- 9 files changed, 181 insertions(+), 254 deletions(-) diff --git a/.github/workflows/bazel_tests.yml b/.github/workflows/bazel_tests.yml index efe6fe5e69..b0affa1859 100644 --- a/.github/workflows/bazel_tests.yml +++ b/.github/workflows/bazel_tests.yml @@ -24,7 +24,7 @@ jobs: include: - { NAME: "Fastbuild", BAZEL: bazel, CC: clang, os: ubuntu-20.04, flags: "" } - { NAME: "Optmized", BAZEL: bazel, CC: clang, os: ubuntu-20.04, flags: "-c opt" } # Some warnings only fire with -c opt - - { NAME: "GCC Optimized", BAZEL: bazel, CC: gcc, os: ubuntu-20.04, flags: "-c opt" } + - { NAME: "GCC Optimized", BAZEL: bazel, CC: gcc-12, os: ubuntu-22.04, flags: "-c opt" } - { NAME: "FastTable", BAZEL: bazel, CC: clang, os: ubuntu-20.04, flags: "--//:fasttable_enabled=true -- -cmake:test_generated_files" } - { NAME: "ASAN", BAZEL: bazel, CC: clang, os: ubuntu-20.04, flags: "--config=asan -c dbg -- -benchmarks:benchmark -python/..." } - { NAME: "UBSAN", BAZEL: bazel, CC: clang, os: ubuntu-20.04, flags: "--config=ubsan -c dbg -- -benchmarks:benchmark -python/... -lua/...", install: "libunwind-dev" } @@ -53,6 +53,10 @@ jobs: wget -O $FILENAME https://github.com/bazelbuild/bazel/releases/download/$VERSION/${{ matrix.BAZEL }} chmod a+x $FILENAME if: ${{ matrix.BAZEL != 'bazel' }} + - name: Check tool versions + run: | + ${{ matrix.CC }} --version + ${{ matrix.BAZEL }} --version - name: Set up Bazel read/write caching run: echo "BAZEL_CACHE_AUTH=--google_default_credentials" >> $GITHUB_ENV if: ${{ github.event.pull_request.head.repo.full_name == 'protocolbuffers/upb' }} diff --git a/protos/protos_internal_test.cc b/protos/protos_internal_test.cc index 835601c549..99bfaa2402 100644 --- a/protos/protos_internal_test.cc +++ b/protos/protos_internal_test.cc @@ -40,9 +40,9 @@ TEST(CppGeneratedCode, InternalMoveMessage) { upb_Arena* source_arena = upb_Arena_New(); protos_generator_test_TestModel* message = protos_generator_test_TestModel_new(source_arena); + ASSERT_NE(message, nullptr); protos_generator_test_TestModel_set_int_value_with_default(message, 123); - EXPECT_NE(message, nullptr); // Move ownership. TestModel model = protos::internal::MoveMessage(message, source_arena); diff --git a/upb/collections/array.c b/upb/collections/array.c index a957f26ab8..fdb1bd206c 100644 --- a/upb/collections/array.c +++ b/upb/collections/array.c @@ -32,23 +32,22 @@ // Must be last. #include "upb/port/def.inc" -static const char _upb_CTypeo_sizelg2[12] = { - 0, - 0, /* kUpb_CType_Bool */ - 2, /* kUpb_CType_Float */ - 2, /* kUpb_CType_Int32 */ - 2, /* kUpb_CType_UInt32 */ - 2, /* kUpb_CType_Enum */ - UPB_SIZE(2, 3), /* kUpb_CType_Message */ - 3, /* kUpb_CType_Double */ - 3, /* kUpb_CType_Int64 */ - 3, /* kUpb_CType_UInt64 */ - UPB_SIZE(3, 4), /* kUpb_CType_String */ - UPB_SIZE(3, 4), /* kUpb_CType_Bytes */ +const char _upb_Array_CTypeSizeLg2Table[] = { + [kUpb_CType_Bool] = 0, + [kUpb_CType_Float] = 2, + [kUpb_CType_Int32] = 2, + [kUpb_CType_UInt32] = 2, + [kUpb_CType_Enum] = 2, + [kUpb_CType_Message] = UPB_SIZE(2, 3), + [kUpb_CType_Double] = 3, + [kUpb_CType_Int64] = 3, + [kUpb_CType_UInt64] = 3, + [kUpb_CType_String] = UPB_SIZE(3, 4), + [kUpb_CType_Bytes] = UPB_SIZE(3, 4), }; upb_Array* upb_Array_New(upb_Arena* a, upb_CType type) { - return _upb_Array_New(a, 4, _upb_CTypeo_sizelg2[type]); + return _upb_Array_New(a, 4, _upb_Array_CTypeSizeLg2(type)); } size_t upb_Array_Size(const upb_Array* arr) { return arr->size; } @@ -142,35 +141,3 @@ bool _upb_array_realloc(upb_Array* arr, size_t min_capacity, upb_Arena* arena) { arr->capacity = new_capacity; return true; } - -static upb_Array* getorcreate_array(upb_Array** arr_ptr, int elem_size_lg2, - upb_Arena* arena) { - upb_Array* arr = *arr_ptr; - if (!arr) { - arr = _upb_Array_New(arena, 4, elem_size_lg2); - if (!arr) return NULL; - *arr_ptr = arr; - } - return arr; -} - -void* _upb_Array_Resize_fallback(upb_Array** arr_ptr, size_t size, - int elem_size_lg2, upb_Arena* arena) { - upb_Array* arr = getorcreate_array(arr_ptr, elem_size_lg2, arena); - return arr && _upb_Array_ResizeUninitialized(arr, size, arena) - ? _upb_array_ptr(arr) - : NULL; -} - -bool _upb_Array_Append_fallback(upb_Array** arr_ptr, const void* value, - int elem_size_lg2, upb_Arena* arena) { - upb_Array* arr = getorcreate_array(arr_ptr, elem_size_lg2, arena); - if (!arr) return false; - - size_t elems = arr->size; - if (!_upb_Array_ResizeUninitialized(arr, elems + 1, arena)) return false; - - char* data = _upb_array_ptr(arr); - memcpy(data + (elems << elem_size_lg2), value, 1 << elem_size_lg2); - return true; -} diff --git a/upb/collections/array_internal.h b/upb/collections/array_internal.h index e1a4cefde2..0c126eb642 100644 --- a/upb/collections/array_internal.h +++ b/upb/collections/array_internal.h @@ -48,8 +48,14 @@ struct upb_Array { }; // LINT.ThenChange(GoogleInternalName1) +UPB_INLINE size_t _upb_Array_ElementSizeLg2(const upb_Array* arr) { + size_t ret = arr->data & 7; + UPB_ASSERT(ret <= 4); + return ret; +} + UPB_INLINE const void* _upb_array_constptr(const upb_Array* arr) { - UPB_ASSERT((arr->data & 7) <= 4); + _upb_Array_ElementSizeLg2(arr); // Check assertion. return (void*)(arr->data & ~(uintptr_t)7); } @@ -68,8 +74,15 @@ UPB_INLINE uintptr_t _upb_tag_arrptr(void* ptr, int elem_size_lg2) { return (uintptr_t)ptr | (unsigned)elem_size_lg2; } +extern const char _upb_Array_CTypeSizeLg2Table[]; + +UPB_INLINE size_t _upb_Array_CTypeSizeLg2(upb_CType ctype) { + return _upb_Array_CTypeSizeLg2Table[ctype]; +} + UPB_INLINE upb_Array* _upb_Array_New(upb_Arena* a, size_t init_capacity, int elem_size_lg2) { + UPB_ASSERT(elem_size_lg2 <= 4); const size_t arr_size = UPB_ALIGN_UP(sizeof(upb_Array), UPB_MALLOC_ALIGN); const size_t bytes = arr_size + (init_capacity << elem_size_lg2); upb_Array* arr = (upb_Array*)upb_Arena_Malloc(a, bytes); @@ -83,12 +96,6 @@ UPB_INLINE upb_Array* _upb_Array_New(upb_Arena* a, size_t init_capacity, // Resizes the capacity of the array to be at least min_size. bool _upb_array_realloc(upb_Array* arr, size_t min_size, upb_Arena* arena); -// Fallback functions for when the accessors require a resize. -void* _upb_Array_Resize_fallback(upb_Array** arr_ptr, size_t size, - int elem_size_lg2, upb_Arena* arena); -bool _upb_Array_Append_fallback(upb_Array** arr_ptr, const void* value, - int elem_size_lg2, upb_Arena* arena); - UPB_INLINE bool _upb_array_reserve(upb_Array* arr, size_t size, upb_Arena* arena) { if (arr->capacity < size) return _upb_array_realloc(arr, size, arena); @@ -103,98 +110,19 @@ UPB_INLINE bool _upb_Array_ResizeUninitialized(upb_Array* arr, size_t size, return true; } -UPB_INLINE void _upb_array_detach(const void* msg, size_t ofs) { - *UPB_PTR_AT(msg, ofs, upb_Array*) = NULL; -} - -UPB_INLINE const void* _upb_array_accessor(const void* msg, size_t ofs, - size_t* size) { - const upb_Array* arr = *UPB_PTR_AT(msg, ofs, const upb_Array*); - if (arr) { - if (size) *size = arr->size; - return _upb_array_constptr(arr); - } else { - if (size) *size = 0; - return NULL; - } -} - -UPB_INLINE void* _upb_array_mutable_accessor(void* msg, size_t ofs, - size_t* size) { - upb_Array* arr = *UPB_PTR_AT(msg, ofs, upb_Array*); - if (arr) { - if (size) *size = arr->size; - return _upb_array_ptr(arr); - } else { - if (size) *size = 0; - return NULL; - } +// This function is intended for situations where elem_size is compile-time +// constant or a known expression of the form (1 << lg2), so that the expression +// i*elem_size does not result in an actual multiplication. +UPB_INLINE void _upb_Array_Set(upb_Array* arr, size_t i, const void* data, + size_t elem_size) { + UPB_ASSERT(i < arr->size); + UPB_ASSERT(elem_size == 1U << _upb_Array_ElementSizeLg2(arr)); + char* arr_data = (char*)_upb_array_ptr(arr); + memcpy(arr_data + (i * elem_size), data, elem_size); } -UPB_INLINE void* _upb_Array_Resize_accessor2(void* msg, size_t ofs, size_t size, - int elem_size_lg2, - upb_Arena* arena) { - upb_Array** arr_ptr = UPB_PTR_AT(msg, ofs, upb_Array*); - upb_Array* arr = *arr_ptr; - if (!arr || arr->capacity < size) { - return _upb_Array_Resize_fallback(arr_ptr, size, elem_size_lg2, arena); - } - arr->size = size; - return _upb_array_ptr(arr); -} - -UPB_INLINE bool _upb_Array_Append_accessor2(void* msg, size_t ofs, - int elem_size_lg2, - const void* value, - upb_Arena* arena) { - upb_Array** arr_ptr = UPB_PTR_AT(msg, ofs, upb_Array*); - size_t elem_size = 1 << elem_size_lg2; - upb_Array* arr = *arr_ptr; - void* ptr; - if (!arr || arr->size == arr->capacity) { - return _upb_Array_Append_fallback(arr_ptr, value, elem_size_lg2, arena); - } - ptr = _upb_array_ptr(arr); - memcpy(UPB_PTR_AT(ptr, arr->size * elem_size, char), value, elem_size); - arr->size++; - return true; -} - -// Used by old generated code, remove once all code has been regenerated. -UPB_INLINE int _upb_sizelg2(upb_CType type) { - switch (type) { - case kUpb_CType_Bool: - return 0; - case kUpb_CType_Float: - case kUpb_CType_Int32: - case kUpb_CType_UInt32: - case kUpb_CType_Enum: - return 2; - case kUpb_CType_Message: - return UPB_SIZE(2, 3); - case kUpb_CType_Double: - case kUpb_CType_Int64: - case kUpb_CType_UInt64: - return 3; - case kUpb_CType_String: - case kUpb_CType_Bytes: - return UPB_SIZE(3, 4); - } - UPB_UNREACHABLE(); -} - -UPB_INLINE void* _upb_Array_Resize_accessor(void* msg, size_t ofs, size_t size, - upb_CType type, upb_Arena* arena) { - return _upb_Array_Resize_accessor2(msg, ofs, size, _upb_sizelg2(type), arena); -} - -UPB_INLINE bool _upb_Array_Append_accessor(void* msg, size_t ofs, - size_t elem_size, upb_CType type, - const void* value, - upb_Arena* arena) { - (void)elem_size; - return _upb_Array_Append_accessor2(msg, ofs, _upb_sizelg2(type), value, - arena); +UPB_INLINE void _upb_array_detach(const void* msg, size_t ofs) { + *UPB_PTR_AT(msg, ofs, upb_Array*) = NULL; } #ifdef __cplusplus diff --git a/upb/message/accessors.c b/upb/message/accessors.c index 83ca4040ec..4359441a33 100644 --- a/upb/message/accessors.c +++ b/upb/message/accessors.c @@ -38,38 +38,6 @@ // Must be last. #include "upb/port/def.inc" -// Maps descriptor type to elem_size_lg2. -static int _upb_MiniTableField_CTypeLg2Size(const upb_MiniTableField* f) { - static const uint8_t sizes[] = { - -1, /* invalid descriptor type */ - 3, /* DOUBLE */ - 2, /* FLOAT */ - 3, /* INT64 */ - 3, /* UINT64 */ - 2, /* INT32 */ - 3, /* FIXED64 */ - 2, /* FIXED32 */ - 0, /* BOOL */ - UPB_SIZE(3, 4), /* STRING */ - UPB_SIZE(2, 3), /* GROUP */ - UPB_SIZE(2, 3), /* MESSAGE */ - UPB_SIZE(3, 4), /* BYTES */ - 2, /* UINT32 */ - 2, /* ENUM */ - 2, /* SFIXED32 */ - 3, /* SFIXED64 */ - 2, /* SINT32 */ - 3, /* SINT64 */ - }; - return sizes[f->descriptortype]; -} - -void* upb_Message_ResizeArray(upb_Message* msg, const upb_MiniTableField* field, - size_t len, upb_Arena* arena) { - return _upb_Array_Resize_accessor2( - msg, field->offset, len, _upb_MiniTableField_CTypeLg2Size(field), arena); -} - typedef struct { const char* ptr; uint64_t val; diff --git a/upb/message/accessors.h b/upb/message/accessors.h index c40b9ddca8..521078c157 100644 --- a/upb/message/accessors.h +++ b/upb/message/accessors.h @@ -30,6 +30,7 @@ #include "upb/base/descriptor_constants.h" #include "upb/collections/array.h" +#include "upb/collections/array_internal.h" #include "upb/collections/map.h" #include "upb/collections/map_internal.h" #include "upb/message/extension_internal.h" @@ -41,6 +42,22 @@ // Must be last. #include "upb/port/def.inc" +#if defined(__GNUC__) && !defined(__clang__) +// GCC raises incorrect warnings in these functions. It thinks that we are +// overrunning buffers, but we carefully write the functions in this file to +// guarantee that this is impossible. GCC gets this wrong due it its failure +// to perform constant propagation as we expect: +// - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108217 +// - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108226 +// +// Unfortunately this also indicates that GCC is not optimizing away the +// switch() in cases where it should be, compromising the performance. +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Warray-bounds" +#pragma GCC diagnostic ignored "-Wstringop-overflow" +#pragma GCC diagnostic ignored "-Wstringop-overread" +#endif + #ifdef __cplusplus extern "C" { #endif @@ -71,9 +88,6 @@ UPB_INLINE void _upb_Message_SetPresence(upb_Message* msg, UPB_INLINE bool _upb_MiniTable_ValueIsNonZero(const void* default_val, const upb_MiniTableField* field) { char zero[16] = {0}; - if (upb_IsRepeatedOrMap(field)) { - return memcmp(&zero, default_val, sizeof(void*)); - } switch (_upb_MiniTableField_GetRep(field)) { case kUpb_FieldRep_1Byte: return memcmp(&zero, default_val, 1) != 0; @@ -91,10 +105,6 @@ UPB_INLINE bool _upb_MiniTable_ValueIsNonZero(const void* default_val, UPB_INLINE void _upb_MiniTable_CopyFieldData(void* to, const void* from, const upb_MiniTableField* field) { - if (upb_IsRepeatedOrMap(field)) { - memcpy(to, from, sizeof(void*)); - return; - } switch (_upb_MiniTableField_GetRep(field)) { case kUpb_FieldRep_1Byte: memcpy(to, from, 1); @@ -113,6 +123,32 @@ UPB_INLINE void _upb_MiniTable_CopyFieldData(void* to, const void* from, UPB_UNREACHABLE(); } +UPB_INLINE size_t +_upb_MiniTable_ElementSizeLg2(const upb_MiniTableField* field) { + const unsigned char table[] = { + 0, + 3, // kUpb_FieldType_Double = 1, + 2, // kUpb_FieldType_Float = 2, + 3, // kUpb_FieldType_Int64 = 3, + 3, // kUpb_FieldType_UInt64 = 4, + 2, // kUpb_FieldType_Int32 = 5, + 3, // kUpb_FieldType_Fixed64 = 6, + 2, // kUpb_FieldType_Fixed32 = 7, + 0, // kUpb_FieldType_Bool = 8, + UPB_SIZE(3, 4), // kUpb_FieldType_String = 9, + UPB_SIZE(2, 3), // kUpb_FieldType_Group = 10, + UPB_SIZE(2, 3), // kUpb_FieldType_Message = 11, + UPB_SIZE(3, 4), // kUpb_FieldType_Bytes = 12, + 2, // kUpb_FieldType_UInt32 = 13, + 2, // kUpb_FieldType_Enum = 14, + 2, // kUpb_FieldType_SFixed32 = 15, + 3, // kUpb_FieldType_SFixed64 = 16, + 2, // kUpb_FieldType_SInt32 = 17, + 3, // kUpb_FieldType_SInt64 = 18, + }; + return table[field->descriptortype]; +} + // Here we define universal getter/setter functions for message fields. // These look very branchy and inefficient, but as long as the MiniTableField // values are known at compile time, all the branches are optimized away and @@ -261,12 +297,14 @@ UPB_INLINE void _upb_Message_ClearNonExtensionField( UPB_INLINE upb_Map* _upb_MiniTable_GetOrCreateMutableMap( upb_Message* msg, const upb_MiniTableField* field, size_t key_size, size_t val_size, upb_Arena* arena) { - UPB_ASSUME(upb_IsRepeatedOrMap(field)); + _upb_MiniTableField_CheckIsMap(field); upb_Map* map = NULL; upb_Map* default_map_value = NULL; _upb_Message_GetNonExtensionField(msg, field, &default_map_value, &map); if (!map) { map = _upb_Map_New(arena, key_size, val_size); + // Check again due to: https://godbolt.org/z/7WfaoKG1r + _upb_MiniTableField_CheckIsMap(field); _upb_Message_SetNonExtensionField(msg, field, &map); } return map; @@ -531,6 +569,7 @@ UPB_API_INLINE upb_Message* upb_MiniTable_GetMutableMessage( UPB_API_INLINE const upb_Array* upb_Message_GetArray( const upb_Message* msg, const upb_MiniTableField* field) { + _upb_MiniTableField_CheckIsArray(field); const upb_Array* ret; const upb_Array* default_val = NULL; _upb_Message_GetNonExtensionField(msg, field, &default_val, &ret); @@ -539,22 +578,43 @@ UPB_API_INLINE const upb_Array* upb_Message_GetArray( UPB_API_INLINE upb_Array* upb_Message_GetMutableArray( upb_Message* msg, const upb_MiniTableField* field) { + _upb_MiniTableField_CheckIsArray(field); return (upb_Array*)upb_Message_GetArray(msg, field); } -UPB_API_INLINE upb_Array* upb_Message_GetOrCreateMutableArray( - upb_Message* msg, const upb_MiniTableField* field, upb_CType ctype, - upb_Arena* arena) { +UPB_INLINE upb_Array* upb_Message_GetOrCreateMutableArray( + upb_Message* msg, const upb_MiniTableField* field, upb_Arena* arena) { + _upb_MiniTableField_CheckIsArray(field); upb_Array* array = upb_Message_GetMutableArray(msg, field); if (!array) { - array = upb_Array_New(arena, ctype); + array = _upb_Array_New(arena, 4, _upb_MiniTable_ElementSizeLg2(field)); + // Check again due to: https://godbolt.org/z/7WfaoKG1r + _upb_MiniTableField_CheckIsArray(field); _upb_Message_SetField(msg, field, &array, arena); } return array; } -void* upb_Message_ResizeArray(upb_Message* msg, const upb_MiniTableField* field, - size_t len, upb_Arena* arena); +UPB_INLINE upb_Array* upb_Message_ResizeArrayUninitialized( + upb_Message* msg, const upb_MiniTableField* field, size_t size, + upb_Arena* arena) { + _upb_MiniTableField_CheckIsArray(field); + upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, field, arena); + if (!arr || !_upb_Array_ResizeUninitialized(arr, size, arena)) return NULL; + return arr; +} + +// TODO: remove, migrate users to upb_Message_ResizeArrayUninitialized(), which +// has the same semantics but a clearer name. Alternatively, if users want an +// initialized variant, we can also offer that. +UPB_API_INLINE void* upb_Message_ResizeArray(upb_Message* msg, + const upb_MiniTableField* field, + size_t size, upb_Arena* arena) { + _upb_MiniTableField_CheckIsArray(field); + upb_Array* arr = + upb_Message_ResizeArrayUninitialized(msg, field, size, arena); + return _upb_array_ptr(arr); +} UPB_API_INLINE bool upb_MiniTableField_IsClosedEnum( const upb_MiniTableField* field) { @@ -563,7 +623,7 @@ UPB_API_INLINE bool upb_MiniTableField_IsClosedEnum( UPB_API_INLINE const upb_Map* upb_Message_GetMap( const upb_Message* msg, const upb_MiniTableField* field) { - UPB_ASSUME(upb_FieldMode_Get(field) == kUpb_FieldMode_Map); + _upb_MiniTableField_CheckIsMap(field); const upb_Map* ret; const upb_Map* default_val = NULL; _upb_Message_GetNonExtensionField(msg, field, &default_val, &ret); @@ -689,6 +749,10 @@ upb_UnknownToMessage_Status upb_MiniTable_PromoteUnknownToMap( } /* extern "C" */ #endif +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif + #include "upb/port/undef.inc" #endif // UPB_MESSAGE_ACCESSORS_H_ diff --git a/upb/mini_table/field_internal.h b/upb/mini_table/field_internal.h index f58531a5a6..e2b8f6bcc2 100644 --- a/upb/mini_table/field_internal.h +++ b/upb/mini_table/field_internal.h @@ -75,6 +75,8 @@ typedef enum { kUpb_FieldRep_StringView = 2, kUpb_FieldRep_8Byte = 3, + kUpb_FieldRep_NativePointer = + UPB_SIZE(kUpb_FieldRep_4Byte, kUpb_FieldRep_8Byte), kUpb_FieldRep_Max = kUpb_FieldRep_8Byte, } upb_FieldRep; @@ -93,6 +95,20 @@ UPB_INLINE upb_FieldMode upb_FieldMode_Get(const upb_MiniTableField* field) { return (upb_FieldMode)(field->mode & 3); } +UPB_INLINE void _upb_MiniTableField_CheckIsArray( + const upb_MiniTableField* field) { + UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_NativePointer); + UPB_ASSUME(upb_FieldMode_Get(field) == kUpb_FieldMode_Array); + UPB_ASSUME(field->presence == 0); +} + +UPB_INLINE void _upb_MiniTableField_CheckIsMap( + const upb_MiniTableField* field) { + UPB_ASSUME(_upb_MiniTableField_GetRep(field) == kUpb_FieldRep_NativePointer); + UPB_ASSUME(upb_FieldMode_Get(field) == kUpb_FieldMode_Map); + UPB_ASSUME(field->presence == 0); +} + UPB_INLINE bool upb_IsRepeatedOrMap(const upb_MiniTableField* field) { // This works because upb_FieldMode has no value 3. return !(field->mode & kUpb_FieldMode_Scalar); diff --git a/upbc/file_layout.h b/upbc/file_layout.h index 51ea2b72c0..265dfe3c83 100644 --- a/upbc/file_layout.h +++ b/upbc/file_layout.h @@ -180,23 +180,6 @@ class FileLayout { return layout64_.GetEnumTable(d); } - std::string GetFieldOffset(const protobuf::FieldDescriptor* f) const { - const upb_MiniTableField* f_32 = upb_MiniTable_FindFieldByNumber( - GetMiniTable32(f->containing_type()), f->number()); - const upb_MiniTableField* f_64 = upb_MiniTable_FindFieldByNumber( - GetMiniTable64(f->containing_type()), f->number()); - return UpbSize(f_32->offset, f_64->offset); - } - - std::string GetOneofCaseOffset(const protobuf::OneofDescriptor* o) const { - const protobuf::FieldDescriptor* f = o->field(0); - const upb_MiniTableField* f_32 = upb_MiniTable_FindFieldByNumber( - GetMiniTable32(f->containing_type()), f->number()); - const upb_MiniTableField* f_64 = upb_MiniTable_FindFieldByNumber( - GetMiniTable64(f->containing_type()), f->number()); - return UpbSize(~f_32->presence, ~f_64->presence); - } - std::string GetMessageSize(const protobuf::Descriptor* d) const { return UpbSize(GetMiniTable32(d)->size, GetMiniTable64(d)->size); } diff --git a/upbc/protoc-gen-upb.cc b/upbc/protoc-gen-upb.cc index 71d34963e0..4c27f53233 100644 --- a/upbc/protoc-gen-upb.cc +++ b/upbc/protoc-gen-upb.cc @@ -137,34 +137,6 @@ std::string CTypeInternal(const protobuf::FieldDescriptor* field, } } -std::string SizeLg2(const protobuf::FieldDescriptor* field) { - switch (field->cpp_type()) { - case protobuf::FieldDescriptor::CPPTYPE_MESSAGE: - return "UPB_SIZE(2, 3)"; - case protobuf::FieldDescriptor::CPPTYPE_ENUM: - return std::to_string(2); - case protobuf::FieldDescriptor::CPPTYPE_BOOL: - return std::to_string(0); - case protobuf::FieldDescriptor::CPPTYPE_FLOAT: - return std::to_string(2); - case protobuf::FieldDescriptor::CPPTYPE_INT32: - return std::to_string(2); - case protobuf::FieldDescriptor::CPPTYPE_UINT32: - return std::to_string(2); - case protobuf::FieldDescriptor::CPPTYPE_DOUBLE: - return std::to_string(3); - case protobuf::FieldDescriptor::CPPTYPE_INT64: - return std::to_string(3); - case protobuf::FieldDescriptor::CPPTYPE_UINT64: - return std::to_string(3); - case protobuf::FieldDescriptor::CPPTYPE_STRING: - return "UPB_SIZE(3, 4)"; - default: - fprintf(stderr, "Unexpected type"); - abort(); - } -} - std::string FloatToCLiteral(float value) { if (value == std::numeric_limits::infinity()) { return "kUpb_FltInfinity"; @@ -518,12 +490,20 @@ void GenerateRepeatedGetters(const protobuf::FieldDescriptor* field, Output& output) { output( R"cc( - UPB_INLINE $0 const* $1_$2(const $1* msg, size_t* len) { - return ($0 const*)_upb_array_accessor(msg, $3, len); + UPB_INLINE $0 const* $1_$2(const $1* msg, size_t* size) { + const upb_MiniTableField field = $3; + const upb_Array* arr = upb_Message_GetArray(msg, &field); + if (arr) { + if (size) *size = arr->size; + return ($0 const*)_upb_array_constptr(arr); + } else { + if (size) *size = 0; + return NULL; + } } )cc", CTypeConst(field), msg_name, ResolveFieldName(field, field_names), - layout.GetFieldOffset(field)); + FieldInitializer(layout, field)); } void GenerateScalarGetters(const protobuf::FieldDescriptor* field, @@ -618,41 +598,58 @@ void GenerateRepeatedSetters(const protobuf::FieldDescriptor* field, std::string resolved_name = ResolveFieldName(field, field_names); output( R"cc( - UPB_INLINE $0* $1_mutable_$2($1* msg, size_t* len) { - return ($0*)_upb_array_mutable_accessor(msg, $3, len); + UPB_INLINE $0* $1_mutable_$2($1* msg, size_t* size) { + upb_MiniTableField field = $3; + upb_Array* arr = upb_Message_GetMutableArray(msg, &field); + if (arr) { + if (size) *size = arr->size; + return ($0*)_upb_array_ptr(arr); + } else { + if (size) *size = 0; + return NULL; + } } )cc", - CType(field), msg_name, resolved_name, layout.GetFieldOffset(field)); + CType(field), msg_name, resolved_name, FieldInitializer(layout, field)); output( R"cc( - UPB_INLINE $0* $1_resize_$2($1* msg, size_t len, upb_Arena* arena) { - return ($0*)_upb_Array_Resize_accessor2(msg, $3, len, $4, arena); + UPB_INLINE $0* $1_resize_$2($1* msg, size_t size, upb_Arena* arena) { + upb_MiniTableField field = $3; + return ($0*)upb_Message_ResizeArray(msg, &field, size, arena); } )cc", - CType(field), msg_name, resolved_name, layout.GetFieldOffset(field), - SizeLg2(field)); + CType(field), msg_name, resolved_name, FieldInitializer(layout, field)); if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE) { output( R"cc( UPB_INLINE struct $0* $1_add_$2($1* msg, upb_Arena* arena) { + upb_MiniTableField field = $4; + upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena); + if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) { + return NULL; + } struct $0* sub = (struct $0*)_upb_Message_New(&$3, arena); - bool ok = _upb_Array_Append_accessor2(msg, $4, $5, &sub, arena); - if (!ok) return NULL; + if (!arr || !sub) return NULL; + _upb_Array_Set(arr, arr->size - 1, &sub, sizeof(sub)); return sub; } )cc", MessageName(field->message_type()), msg_name, resolved_name, - MessageInit(field->message_type()), layout.GetFieldOffset(field), - SizeLg2(field)); + MessageInit(field->message_type()), FieldInitializer(layout, field)); } else { output( R"cc( UPB_INLINE bool $1_add_$2($1* msg, $0 val, upb_Arena* arena) { - return _upb_Array_Append_accessor2(msg, $3, $4, &val, arena); + upb_MiniTableField field = $3; + upb_Array* arr = upb_Message_GetOrCreateMutableArray(msg, &field, arena); + if (!arr || !_upb_Array_ResizeUninitialized(arr, arr->size + 1, arena)) { + return false; + } + _upb_Array_Set(arr, arr->size - 1, &val, sizeof(val)); + return true; } )cc", - CType(field), msg_name, resolved_name, layout.GetFieldOffset(field), - SizeLg2(field)); + CType(field), msg_name, resolved_name, FieldInitializer(layout, field)); } } From 1e28a99baa0bffd9439a9cf7f8ac36b64b7d1e43 Mon Sep 17 00:00:00 2001 From: Eric Salo Date: Tue, 27 Dec 2022 12:54:49 -0800 Subject: [PATCH 10/21] fix broken Dart unit tests Mark upb_Message_GetOrCreateMutableArray() as UPB_API_INLINE Update the kernel ffi code to reflect the new function signature Rerun ffigen PiperOrigin-RevId: 498019021 --- upb/message/accessors.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/upb/message/accessors.h b/upb/message/accessors.h index 521078c157..215f3558f3 100644 --- a/upb/message/accessors.h +++ b/upb/message/accessors.h @@ -582,7 +582,7 @@ UPB_API_INLINE upb_Array* upb_Message_GetMutableArray( return (upb_Array*)upb_Message_GetArray(msg, field); } -UPB_INLINE upb_Array* upb_Message_GetOrCreateMutableArray( +UPB_API_INLINE upb_Array* upb_Message_GetOrCreateMutableArray( upb_Message* msg, const upb_MiniTableField* field, upb_Arena* arena) { _upb_MiniTableField_CheckIsArray(field); upb_Array* array = upb_Message_GetMutableArray(msg, field); From f50e8b221b9d33d2bd8e6eb07fdbef6a46ad9016 Mon Sep 17 00:00:00 2001 From: Adam Cozzette Date: Tue, 27 Dec 2022 18:18:55 -0800 Subject: [PATCH 11/21] Update staleness_test() error message to include diff PiperOrigin-RevId: 498069301 --- cmake/staleness_test_lib.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cmake/staleness_test_lib.py b/cmake/staleness_test_lib.py index 171d2be0fc..4887f24855 100644 --- a/cmake/staleness_test_lib.py +++ b/cmake/staleness_test_lib.py @@ -33,6 +33,7 @@ This code is used by test scripts generated from staleness_test() rules. from __future__ import absolute_import from __future__ import print_function +import difflib import sys import os from shutil import copyfile @@ -171,7 +172,10 @@ def CheckFilesMatch(config): continue for pair in stale_files: - diff_errors.append("File %s is out of date" % pair.target) + with open(pair.generated) as g, open(pair.target) as t: + diff = ''.join(difflib.unified_diff(g.read().splitlines(keepends=True), + t.read().splitlines(keepends=True))) + diff_errors.append("File %s is out of date:\n%s" % (pair.target, diff)) if diff_errors: error_msg = "Files out of date!\n\n" From a48af3f824cd86ca0e99d11cd92b7a20cd08f159 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 27 Dec 2022 20:43:42 -0800 Subject: [PATCH 12/21] Moved aliasing logic for string field parsing into EpsCopyInputStream. Moving the logic down to EpsCopyInputStream makes it easier to test and reuse this functionality. We also implement aliasing for the final bytes of the patch buffer, which has never been supported before. We used to always force a copy for any data parsed out of the patch buffer at the end of the stream. Much of this logic is ported directly from the C++ EpsCopyInputStream class. PiperOrigin-RevId: 498091644 --- BUILD | 1 + upb/wire/decode.c | 19 ++- upb/wire/decode_fast.c | 163 +++++++++++-------------- upb/wire/decode_internal.h | 2 - upb/wire/eps_copy_input_stream.h | 81 +++++++++++- upb/wire/eps_copy_input_stream_test.cc | 118 +++++++++++++++--- 6 files changed, 262 insertions(+), 122 deletions(-) diff --git a/BUILD b/BUILD index fba37786e9..462f15e465 100644 --- a/BUILD +++ b/BUILD @@ -956,6 +956,7 @@ cc_test( name = "eps_copy_input_stream_test", srcs = ["upb/wire/eps_copy_input_stream_test.cc"], deps = [ + ":upb", ":wire_internal", "@com_google_googletest//:gtest_main", ], diff --git a/upb/wire/decode.c b/upb/wire/decode.c index 67979f807d..e4c78bc307 100644 --- a/upb/wire/decode.c +++ b/upb/wire/decode.c @@ -221,16 +221,12 @@ static upb_Message* _upb_Decoder_NewSubMessage( static const char* _upb_Decoder_ReadString(upb_Decoder* d, const char* ptr, int size, upb_StringView* str) { - if (d->options & kUpb_DecodeOption_AliasString) { - str->data = ptr; - } else { - char* data = upb_Arena_Malloc(&d->arena, size); - if (!data) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); - memcpy(data, ptr, size); - str->data = data; - } + const char* str_ptr = ptr; + ptr = upb_EpsCopyInputStream_ReadString(&d->input, &str_ptr, size, &d->arena); + if (!ptr) _upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); + str->data = str_ptr; str->size = size; - return ptr + size; + return ptr; } UPB_FORCEINLINE @@ -1264,9 +1260,8 @@ upb_DecodeStatus upb_Decode(const char* buf, size_t size, void* msg, upb_Decoder state; unsigned depth = (unsigned)options >> 16; - if (upb_EpsCopyInputStream_Init(&state.input, &buf, size)) { - options &= ~kUpb_DecodeOption_AliasString; // Can't alias patch buf. - } + upb_EpsCopyInputStream_Init(&state.input, &buf, size, + options & kUpb_DecodeOption_AliasString); state.extreg = extreg; state.unknown = NULL; diff --git a/upb/wire/decode_fast.c b/upb/wire/decode_fast.c index 690f364fdd..6b048a0937 100644 --- a/upb/wire/decode_fast.c +++ b/upb/wire/decode_fast.c @@ -639,26 +639,17 @@ static const char* fastdecode_verifyutf8(upb_Decoder* d, const char* ptr, ptr = fastdecode_longsize(ptr, &size); \ } \ \ - if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckDataSizeAvailable( \ - &d->input, ptr, size))) { \ + if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckSize(&d->input, ptr, size))) { \ dst->size = 0; \ _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \ } \ \ - if (d->options & kUpb_DecodeOption_AliasString) { \ - dst->data = ptr; \ - dst->size = size; \ - } else { \ - char* data = upb_Arena_Malloc(&d->arena, size); \ - if (!data) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); \ - } \ - memcpy(data, ptr, size); \ - dst->data = data; \ - dst->size = size; \ - } \ + const char* s_ptr = ptr; \ + ptr = upb_EpsCopyInputStream_ReadString(&d->input, &s_ptr, size, &d->arena); \ + if (!ptr) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); \ + dst->data = s_ptr; \ + dst->size = size; \ \ - ptr += size; \ if (validate_utf8) { \ data = (uint64_t)dst; \ UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ @@ -702,7 +693,7 @@ static void fastdecode_docopy(upb_Decoder* d, const char* ptr, uint32_t size, size_t common_has; \ char* buf; \ \ - UPB_ASSERT((d->options & kUpb_DecodeOption_AliasString) == 0); \ + UPB_ASSERT(!upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, 0)); \ UPB_ASSERT(fastdecode_checktag(data, tagbytes)); \ \ dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ @@ -781,79 +772,73 @@ static void fastdecode_docopy(upb_Decoder* d, const char* ptr, uint32_t size, hasbits, (uint64_t)dst); \ } -#define FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, card, \ - copyfunc, validate_utf8) \ - upb_StringView* dst; \ - fastdecode_arr farr; \ - int64_t size; \ - \ - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ - RETURN_GENERIC("string field tag mismatch\n"); \ - } \ - \ - if (UPB_UNLIKELY((d->options & kUpb_DecodeOption_AliasString) == 0)) { \ - UPB_MUSTTAIL return copyfunc(UPB_PARSE_ARGS); \ - } \ - \ - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ - sizeof(upb_StringView), card); \ - \ - again: \ - if (card == CARD_r) { \ - dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \ - } \ - \ - size = (int8_t)ptr[tagbytes]; \ - ptr += tagbytes + 1; \ - dst->data = ptr; \ - dst->size = size; \ - \ - if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckDataSizeAvailable( \ - &d->input, ptr, size))) { \ - ptr--; \ - if (validate_utf8) { \ - return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, \ - (uint64_t)dst); \ - } else { \ - return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, \ - (uint64_t)dst); \ - } \ - } \ - \ - ptr += size; \ - \ - if (card == CARD_r) { \ - if (validate_utf8 && \ - !_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \ - _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \ - } \ - fastdecode_nextret ret = fastdecode_nextrepeated( \ - d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \ - switch (ret.next) { \ - case FD_NEXT_SAMEFIELD: \ - dst = ret.dst; \ - if (UPB_UNLIKELY((d->options & kUpb_DecodeOption_AliasString) == 0)) { \ - /* Buffer flipped and we can't alias any more. Bounce to */ \ - /* copyfunc(), but via dispatch since we need to reload table */ \ - /* data also. */ \ - fastdecode_commitarr(dst, &farr, sizeof(upb_StringView)); \ - data = ret.tag; \ - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ - } \ - goto again; \ - case FD_NEXT_OTHERFIELD: \ - data = ret.tag; \ - UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ - case FD_NEXT_ATLIMIT: \ - return ptr; \ - } \ - } \ - \ - if (card != CARD_r && validate_utf8) { \ - data = (uint64_t)dst; \ - UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ - } \ - \ +#define FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, card, \ + copyfunc, validate_utf8) \ + upb_StringView* dst; \ + fastdecode_arr farr; \ + int64_t size; \ + \ + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ + RETURN_GENERIC("string field tag mismatch\n"); \ + } \ + \ + if (UPB_UNLIKELY( \ + !upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, 0))) { \ + UPB_MUSTTAIL return copyfunc(UPB_PARSE_ARGS); \ + } \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ + sizeof(upb_StringView), card); \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \ + } \ + \ + size = (int8_t)ptr[tagbytes]; \ + ptr += tagbytes + 1; \ + \ + if (UPB_UNLIKELY( \ + !upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, size))) { \ + ptr--; \ + if (validate_utf8) { \ + return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, \ + (uint64_t)dst); \ + } else { \ + return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, \ + (uint64_t)dst); \ + } \ + } \ + \ + dst->data = ptr; \ + dst->size = size; \ + ptr = upb_EpsCopyInputStream_ReadStringAliased(&d->input, &dst->data, \ + dst->size); \ + \ + if (card == CARD_r) { \ + if (validate_utf8 && \ + !_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \ + _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \ + } \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + if (card != CARD_r && validate_utf8) { \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ + } \ + \ UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); /* Generate all combinations: diff --git a/upb/wire/decode_internal.h b/upb/wire/decode_internal.h index e1941b275f..03d2a05b2f 100644 --- a/upb/wire/decode_internal.h +++ b/upb/wire/decode_internal.h @@ -131,8 +131,6 @@ UPB_INLINE const char* _upb_Decoder_BufferFlipCallback( } d->unknown = new_start; } - - d->options &= ~kUpb_DecodeOption_AliasString; return new_start; } diff --git a/upb/wire/eps_copy_input_stream.h b/upb/wire/eps_copy_input_stream.h index ea6d530e10..234a918d4d 100644 --- a/upb/wire/eps_copy_input_stream.h +++ b/upb/wire/eps_copy_input_stream.h @@ -30,6 +30,8 @@ #include +#include "upb/mem/arena.h" + // Must be last. #include "upb/port/def.inc" @@ -41,9 +43,16 @@ // this invariant. #define kUpb_EpsCopyInputStream_SlopBytes 16 +enum { + kUpb_EpsCopyInputStream_NoAliasing = 0, + kUpb_EpsCopyInputStream_OnPatch = 1, + kUpb_EpsCopyInputStream_NoDelta = 2 +}; + typedef struct { const char* end; // Can read up to SlopBytes bytes beyond this. const char* limit_ptr; // For bounds checks, = end + UPB_MIN(limit, 0) + uintptr_t aliasing; int limit; // Submessage limit relative to end char patch[kUpb_EpsCopyInputStream_SlopBytes * 2]; } upb_EpsCopyInputStream; @@ -56,14 +65,16 @@ typedef const char* upb_EpsCopyInputStream_IsDoneFallbackFunc( // Initializes a upb_EpsCopyInputStream using the contents of the buffer // [*ptr, size]. Updates `*ptr` as necessary to guarantee that at least -// kUpb_EpsCopyInputStream_SlopBytes, and returns true if the pointer has been -// updated. -UPB_INLINE bool upb_EpsCopyInputStream_Init(upb_EpsCopyInputStream* e, - const char** ptr, size_t size) { +// kUpb_EpsCopyInputStream_SlopBytes are available to read. +UPB_INLINE void upb_EpsCopyInputStream_Init(upb_EpsCopyInputStream* e, + const char** ptr, size_t size, + bool enable_aliasing) { bool ret; if (size <= kUpb_EpsCopyInputStream_SlopBytes) { memset(&e->patch, 0, 32); if (size) memcpy(&e->patch, *ptr, size); + e->aliasing = enable_aliasing ? (uintptr_t)*ptr - (uintptr_t)e->patch + : kUpb_EpsCopyInputStream_NoAliasing; *ptr = e->patch; e->end = *ptr + size; e->limit = 0; @@ -71,10 +82,11 @@ UPB_INLINE bool upb_EpsCopyInputStream_Init(upb_EpsCopyInputStream* e, } else { e->end = *ptr + size - kUpb_EpsCopyInputStream_SlopBytes; e->limit = kUpb_EpsCopyInputStream_SlopBytes; + e->aliasing = enable_aliasing ? kUpb_EpsCopyInputStream_NoDelta + : kUpb_EpsCopyInputStream_NoAliasing; ret = false; } e->limit_ptr = e->end; - return ret; } typedef enum { @@ -195,6 +207,62 @@ UPB_INLINE bool upb_EpsCopyInputStream_CheckSubMessageSizeAvailable( return _upb_EpsCopyInputStream_CheckSizeAvailable(e, ptr, size, true); } +// Returns true if aliasing_enabled=true was passed to +// upb_EpsCopyInputStream_Init() when this stream was initialized. +UPB_INLINE bool upb_EpsCopyInputStream_AliasingEnabled( + upb_EpsCopyInputStream* e) { + return e->aliasing != kUpb_EpsCopyInputStream_NoAliasing; +} + +// Returns true if aliasing_enabled=true was passed to +// upb_EpsCopyInputStream_Init() when this stream was initialized *and* we can +// alias into the region [ptr, size] in an input buffer. +UPB_INLINE bool upb_EpsCopyInputStream_AliasingAvailable( + upb_EpsCopyInputStream* e, const char* ptr, size_t size) { + // When EpsCopyInputStream supports streaming, this will need to become a + // runtime check. + return upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size) && + e->aliasing >= kUpb_EpsCopyInputStream_NoDelta; +} + +UPB_INLINE const char* upb_EpsCopyInputStream_ReadStringAliased( + upb_EpsCopyInputStream* e, const char** ptr, size_t size) { + UPB_ASSUME(upb_EpsCopyInputStream_AliasingAvailable(e, *ptr, size)); + uintptr_t delta = + e->aliasing == kUpb_EpsCopyInputStream_NoDelta ? 0 : e->aliasing; + const char* ret = *ptr + size; + *ptr = (const char*)((uintptr_t)*ptr + delta); + UPB_ASSUME(ret != NULL); + return ret; +} + +// Reads string data from the stream and advances the pointer accordingly. +// If aliasing was enabled when the stream was initialized, then the returned +// pointer will point into the input buffer if possible, otherwise new data +// will be allocated from arena and copied into. We may be forced to copy even +// if aliasing was enabled if the input data spans input buffers. +// +// Returns NULL if memory allocation failed, or we reached a premature EOF. +UPB_INLINE const char* upb_EpsCopyInputStream_ReadString( + upb_EpsCopyInputStream* e, const char** ptr, size_t size, + upb_Arena* arena) { + if (upb_EpsCopyInputStream_AliasingAvailable(e, *ptr, size)) { + return upb_EpsCopyInputStream_ReadStringAliased(e, ptr, size); + } else { + // We need to allocate and copy. + if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(e, *ptr, size)) { + return NULL; + } + char* data = (char*)upb_Arena_Malloc(arena, size); + if (!data) return NULL; + memcpy(data, *ptr, size); + const char* ret = *ptr + size; + *ptr = data; + UPB_ASSUME(ret != NULL); + return ret; + } +} + UPB_INLINE void _upb_EpsCopyInputStream_CheckLimit(upb_EpsCopyInputStream* e) { UPB_ASSERT(e->limit_ptr == e->end + UPB_MIN(0, e->limit)); } @@ -245,6 +313,9 @@ UPB_INLINE const char* _upb_EpsCopyInputStream_IsDoneFallbackInline( e->limit -= kUpb_EpsCopyInputStream_SlopBytes; e->limit_ptr = e->end + e->limit; UPB_ASSERT(ptr < e->limit_ptr); + if (e->aliasing != kUpb_EpsCopyInputStream_NoAliasing) { + e->aliasing = (uintptr_t)old_end - (uintptr_t)new_start; + } return callback(e, old_end, new_start); } else { return callback(e, NULL, NULL); diff --git a/upb/wire/eps_copy_input_stream_test.cc b/upb/wire/eps_copy_input_stream_test.cc index 1249a64dc2..3ccb29db7f 100644 --- a/upb/wire/eps_copy_input_stream_test.cc +++ b/upb/wire/eps_copy_input_stream_test.cc @@ -5,6 +5,7 @@ #include #include "gtest/gtest.h" +#include "upb/upb.hpp" // begin:google_only // #include "testing/fuzzing/fuzztest.h" // end:google_only @@ -14,7 +15,7 @@ namespace { TEST(EpsCopyInputStreamTest, ZeroSize) { upb_EpsCopyInputStream stream; const char* ptr = NULL; - upb_EpsCopyInputStream_Init(&stream, &ptr, 0); + upb_EpsCopyInputStream_Init(&stream, &ptr, 0, false); EXPECT_TRUE(upb_EpsCopyInputStream_IsDone(&stream, &ptr, NULL)); } @@ -81,20 +82,49 @@ TEST(EpsCopyInputStreamTest, ZeroSize) { // // class EpsStream { // public: -// EpsStream(const std::string& data) : data_(data) { +// EpsStream(const std::string& data, bool enable_aliasing) +// : data_(data), enable_aliasing_(enable_aliasing) { // ptr_ = data_.data(); -// upb_EpsCopyInputStream_Init(&eps_, &ptr_, data_.size()); +// upb_EpsCopyInputStream_Init(&eps_, &ptr_, data_.size(), enable_aliasing); // } // // // Returns false at EOF or error. // int ReadData(int n, std::string* data) { +// EXPECT_LE(n, kUpb_EpsCopyInputStream_SlopBytes); // // We want to verify that we can read kUpb_EpsCopyInputStream_SlopBytes // // safely, even if we haven't actually been requested to read that much. // // We copy to a global buffer so the copy can't be optimized away. // memcpy(&tmp_buf, ptr_, kUpb_EpsCopyInputStream_SlopBytes); // data->assign(tmp_buf, n); // ptr_ += n; +// return PopLimits(); +// } +// +// int ReadString(int n, std::string* data) { +// if (!upb_EpsCopyInputStream_CheckSize(&eps_, ptr_, n)) return -1; +// const char* str_data = ptr_; +// ptr_ = upb_EpsCopyInputStream_ReadString(&eps_, &str_data, n, arena_.ptr()); +// if (!ptr_) return -1; +// if (enable_aliasing_ && n) { +// EXPECT_GE(reinterpret_cast(str_data), +// reinterpret_cast(data_.data())); +// EXPECT_LT(reinterpret_cast(str_data), +// reinterpret_cast(data_.data() + data_.size())); +// } +// data->assign(str_data, n); +// return PopLimits(); +// } // +// bool TryPushLimit(int limit) { +// if (!upb_EpsCopyInputStream_CheckSize(&eps_, ptr_, limit)) return false; +// deltas_.push_back(upb_EpsCopyInputStream_PushLimit(&eps_, ptr_, limit)); +// return true; +// } +// +// bool IsEof() const { return eof_; } +// +// private: +// int PopLimits() { // int end_limit_count = 0; // // while (IsAtLimit()) { @@ -110,15 +140,6 @@ TEST(EpsCopyInputStreamTest, ZeroSize) { // return error_ ? -1 : end_limit_count; // } // -// bool TryPushLimit(int limit) { -// if (!upb_EpsCopyInputStream_CheckSize(&eps_, ptr_, limit)) return false; -// deltas_.push_back(upb_EpsCopyInputStream_PushLimit(&eps_, ptr_, limit)); -// return true; -// } -// -// bool IsEof() const { return eof_; } -// -// private: // bool IsAtLimit() { // return upb_EpsCopyInputStream_IsDone(&eps_, &ptr_, // &EpsStream::IsDoneFallback); @@ -150,12 +171,18 @@ TEST(EpsCopyInputStreamTest, ZeroSize) { // std::string data_; // const char* ptr_; // std::vector deltas_; +// upb::Arena arena_; // bool error_ = false; // bool eof_ = false; +// bool enable_aliasing_; // }; // // // Reads N bytes from the given position. // struct ReadOp { +// int bytes; // Must be <= kUpb_EpsCopyInputStream_SlopBytes. +// }; +// +// struct ReadStringOp { // int bytes; // }; // @@ -164,14 +191,16 @@ TEST(EpsCopyInputStreamTest, ZeroSize) { // int bytes; // }; // -// typedef std::variant Op; +// typedef std::variant Op; // // struct EpsCopyTestScript { // int data_size; +// bool enable_aliasing; // std::vector ops; // }; // // auto ArbitraryEpsCopyTestScript() { +// using ::fuzztest::Arbitrary; // using ::fuzztest::InRange; // using ::fuzztest::NonNegative; // using ::fuzztest::StructOf; @@ -182,9 +211,12 @@ TEST(EpsCopyInputStreamTest, ZeroSize) { // // return StructOf( // InRange(0, max_data_size), // data_size +// Arbitrary(), // enable_aliasing // VectorOf(VariantOf( // // ReadOp // StructOf(InRange(0, kUpb_EpsCopyInputStream_SlopBytes)), +// // ReadStringOp +// StructOf(NonNegative()), // // PushLimitOp // StructOf(NonNegative())))); // } @@ -198,7 +230,7 @@ TEST(EpsCopyInputStreamTest, ZeroSize) { // } // // FakeStream fake_stream(data); -// EpsStream eps_stream(data); +// EpsStream eps_stream(data, script.enable_aliasing); // // for (const auto& op : script.ops) { // if (const ReadOp* read_op = std::get_if(&op)) { @@ -211,24 +243,82 @@ TEST(EpsCopyInputStreamTest, ZeroSize) { // EXPECT_EQ(data_fake, data_eps); // EXPECT_EQ(fake_stream.IsEof(), eps_stream.IsEof()); // if (fake_stream.IsEof()) break; +// } else if (const ReadStringOp* read_op = std::get_if(&op)) { +// std::string data_fake; +// std::string data_eps; +// int fake_result = fake_stream.ReadData(read_op->bytes, &data_fake); +// int eps_result = eps_stream.ReadString(read_op->bytes, &data_eps); +// EXPECT_EQ(fake_result, eps_result); +// if (fake_result == -1) break; // Error +// EXPECT_EQ(data_fake, data_eps); +// EXPECT_EQ(fake_stream.IsEof(), eps_stream.IsEof()); +// if (fake_stream.IsEof()) break; // } else if (const PushLimitOp* push = std::get_if(&op)) { // EXPECT_EQ(fake_stream.TryPushLimit(push->bytes), // eps_stream.TryPushLimit(push->bytes)); +// } else { +// EXPECT_TRUE(false); // Unknown op. // } // } // } // +// // Test with: +// // $ blaze run --config=fuzztest third_party/upb:eps_copy_input_stream_test \ +// // -- --gunit_fuzz= // FUZZ_TEST(EpsCopyFuzzTest, TestAgainstFakeStream) // .WithDomains(ArbitraryEpsCopyTestScript()); // // TEST(EpsCopyFuzzTest, TestAgainstFakeStreamRegression) { // TestAgainstFakeStream({299, +// false, // { // PushLimitOp{2}, // ReadOp{14}, // }}); // } // +// TEST(EpsCopyFuzzTest, AliasingEnabledZeroSizeReadString) { +// TestAgainstFakeStream({510, true, {ReadStringOp{0}}}); +// } +// +// TEST(EpsCopyFuzzTest, AliasingDisabledZeroSizeReadString) { +// TestAgainstFakeStream({510, false, {ReadStringOp{0}}}); +// } +// +// TEST(EpsCopyFuzzTest, ReadStringZero) { +// TestAgainstFakeStream({0, true, {ReadStringOp{0}}}); +// } +// +// TEST(EpsCopyFuzzTest, ReadZero) { +// TestAgainstFakeStream({0, true, {ReadOp{0}}}); +// } +// +// TEST(EpsCopyFuzzTest, ReadZeroTwice) { +// TestAgainstFakeStream({0, true, {ReadOp{0}, ReadOp{0}}}); +// } +// +// TEST(EpsCopyFuzzTest, ReadStringZeroThenRead) { +// TestAgainstFakeStream({0, true, {ReadStringOp{0}, ReadOp{0}}}); +// } +// +// TEST(EpsCopyFuzzTest, ReadStringOverflowsBufferButNotLimit) { +// TestAgainstFakeStream({351, +// false, +// { +// ReadOp{7}, +// PushLimitOp{2147483647}, +// ReadStringOp{344}, +// }}); +// } +// +// TEST(EpsCopyFuzzTest, LastBufferAliasing) { +// TestAgainstFakeStream({27, true, {ReadOp{12}, ReadStringOp{3}}}); +// } +// +// TEST(EpsCopyFuzzTest, FirstBufferAliasing) { +// TestAgainstFakeStream({7, true, {ReadStringOp{3}}}); +// } +// // end:google_only } // namespace From 75488a074277bb45ec91fcb774a86f9f76517b19 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 27 Dec 2022 23:04:44 -0800 Subject: [PATCH 13/21] Created a new upb_WireReader interface for parsing wire data directly. The overall motivation for this interface is to consolidate many places in upb that are parsing wire format data directly. This interface is not yet complete, but this is a good start. We have enough to port the wire format parsing in accessors.c to this interface. We can follow up by porting more places that do wire format parsing. PiperOrigin-RevId: 498109788 --- BUILD | 46 ++++++- upb/message/accessors.c | 182 +++---------------------- upb/util/BUILD | 1 + upb/wire/decode_internal.h | 4 +- upb/wire/eps_copy_input_stream.c | 39 ++++++ upb/wire/eps_copy_input_stream.h | 20 ++- upb/wire/eps_copy_input_stream_test.cc | 6 +- upb/wire/reader.c | 66 +++++++++ upb/wire/reader.h | 171 +++++++++++++++++++++++ 9 files changed, 363 insertions(+), 172 deletions(-) create mode 100644 upb/wire/eps_copy_input_stream.c create mode 100644 upb/wire/reader.c create mode 100644 upb/wire/reader.h diff --git a/BUILD b/BUILD index 462f15e465..a3558edf37 100644 --- a/BUILD +++ b/BUILD @@ -266,12 +266,14 @@ cc_library( visibility = ["//visibility:public"], deps = [ ":collections_internal", + ":eps_copy_input_stream", ":hash", ":message_internal", ":mini_table_internal", ":port", ":upb", ":wire", + ":wire_reader", ], ) @@ -934,30 +936,59 @@ cc_library( "upb/wire/decode_fast.h", "upb/wire/decode_internal.h", "upb/wire/encode.h", - "upb/wire/eps_copy_input_stream.h", "upb/wire/swap_internal.h", - "upb/wire/types.h", ], copts = UPB_DEFAULT_COPTS, visibility = ["//:__subpackages__"], deps = [ ":base", ":collections_internal", + ":eps_copy_input_stream", ":hash", ":mem_internal", ":message_internal", ":mini_table_internal", ":port", + ":wire_types", "@utf8_range", ], ) +cc_library( + name = "wire_types", + hdrs = ["upb/wire/types.h"], + visibility = ["//visibility:public"], +) + +cc_library( + name = "eps_copy_input_stream", + srcs = ["upb/wire/eps_copy_input_stream.c"], + hdrs = ["upb/wire/eps_copy_input_stream.h"], + visibility = ["//visibility:public"], + deps = [ + ":mem", + ":port", + ], +) + +cc_library( + name = "wire_reader", + srcs = ["upb/wire/reader.c"], + hdrs = ["upb/wire/reader.h"], + visibility = ["//visibility:public"], + deps = [ + ":eps_copy_input_stream", + ":port", + ":wire_types", + ], +) + cc_test( name = "eps_copy_input_stream_test", srcs = ["upb/wire/eps_copy_input_stream_test.cc"], deps = [ + ":eps_copy_input_stream", ":upb", - ":wire_internal", "@com_google_googletest//:gtest_main", ], ) @@ -1014,6 +1045,7 @@ upb_amalgamation( ":base", ":collections_internal", ":descriptor_upb_proto", + ":eps_copy_input_stream", ":fastdecode", ":hash", ":lex", @@ -1026,6 +1058,8 @@ upb_amalgamation( ":reflection_internal", ":upb", ":wire_internal", + ":wire_reader", + ":wire_types", ], strip_import_prefix = ["src"], ) @@ -1049,6 +1083,7 @@ upb_amalgamation( ":collections_internal", ":descriptor_upb_proto", ":descriptor_upb_proto_reflection", + ":eps_copy_input_stream", ":fastdecode", ":hash", ":json", @@ -1062,6 +1097,8 @@ upb_amalgamation( ":reflection_internal", ":upb", ":wire_internal", + ":wire_reader", + ":wire_types", ], prefix = "php-", strip_import_prefix = ["src"], @@ -1086,6 +1123,7 @@ upb_amalgamation( ":base", ":collections_internal", ":descriptor_upb_proto", + ":eps_copy_input_stream", ":fastdecode", ":hash", ":json", @@ -1099,6 +1137,8 @@ upb_amalgamation( ":reflection_internal", ":upb", ":wire_internal", + ":wire_reader", + ":wire_types", ], prefix = "ruby-", strip_import_prefix = ["src"], diff --git a/upb/message/accessors.c b/upb/message/accessors.c index 4359441a33..8178e359b8 100644 --- a/upb/message/accessors.c +++ b/upb/message/accessors.c @@ -33,64 +33,12 @@ #include "upb/message/message.h" #include "upb/wire/decode.h" #include "upb/wire/encode.h" -#include "upb/wire/types.h" +#include "upb/wire/eps_copy_input_stream.h" +#include "upb/wire/reader.h" // Must be last. #include "upb/port/def.inc" -typedef struct { - const char* ptr; - uint64_t val; -} decode_vret; - -UPB_NOINLINE -static decode_vret decode_longvarint64(const char* ptr, uint64_t val) { - decode_vret ret = {NULL, 0}; - uint64_t byte; - int i; - for (i = 1; i < 10; i++) { - byte = (uint8_t)ptr[i]; - val += (byte - 1) << (i * 7); - if (!(byte & 0x80)) { - ret.ptr = ptr + i + 1; - ret.val = val; - return ret; - } - } - return ret; -} - -UPB_FORCEINLINE -static const char* decode_varint64(const char* ptr, uint64_t* val) { - uint64_t byte = (uint8_t)*ptr; - if (UPB_LIKELY((byte & 0x80) == 0)) { - *val = byte; - return ptr + 1; - } else { - decode_vret res = decode_longvarint64(ptr, byte); - if (!res.ptr) return NULL; - *val = res.val; - return res.ptr; - } -} - -UPB_FORCEINLINE -static const char* decode_tag(const char* ptr, uint32_t* val) { - uint64_t byte = (uint8_t)*ptr; - if (UPB_LIKELY((byte & 0x80) == 0)) { - *val = (uint32_t)byte; - return ptr + 1; - } else { - const char* start = ptr; - decode_vret res = decode_longvarint64(ptr, byte); - if (!res.ptr || res.ptr - start > 5 || res.val > UINT32_MAX) { - return NULL; // Malformed. - } - *val = (uint32_t)res.val; - return res.ptr; - } -} - // Parses unknown data by merging into existing base_message or creating a // new message usingg mini_table. static upb_UnknownToMessageRet upb_MiniTable_ParseUnknownMessage( @@ -108,8 +56,8 @@ static upb_UnknownToMessageRet upb_MiniTable_ParseUnknownMessage( const char* data = unknown_data; uint32_t tag; uint64_t message_len = 0; - data = decode_tag(data, &tag); - data = decode_varint64(data, &message_len); + data = upb_WireReader_ReadTag(data, &tag); + data = upb_WireReader_ReadVarint(data, &message_len); upb_DecodeStatus status = upb_Decode(data, message_len, ret.message, mini_table, NULL, decode_options, arena); if (status == kUpb_DecodeStatus_OutOfMemory) { @@ -192,131 +140,39 @@ upb_GetExtensionAsBytes_Status upb_MiniTable_GetExtensionAsBytes( const char* data = result.ptr; uint32_t tag; uint64_t message_len = 0; - data = decode_tag(data, &tag); - data = decode_varint64(data, &message_len); + data = upb_WireReader_ReadTag(data, &tag); + data = upb_WireReader_ReadVarint(data, &message_len); *extension_data = data; *len = message_len; return kUpb_GetExtensionAsBytes_Ok; } -static const char* UnknownFieldSet_SkipGroup(const char* ptr, const char* end, - int group_number); - -static const char* UnknownFieldSet_SkipField(const char* ptr, const char* end, - uint32_t tag) { - int field_number = tag >> 3; - int wire_type = tag & 7; - switch (wire_type) { - case kUpb_WireType_Varint: { - uint64_t val; - return decode_varint64(ptr, &val); - } - case kUpb_WireType_64Bit: - if (end - ptr < 8) return NULL; - return ptr + 8; - case kUpb_WireType_32Bit: - if (end - ptr < 4) return NULL; - return ptr + 4; - case kUpb_WireType_Delimited: { - uint64_t size; - ptr = decode_varint64(ptr, &size); - if (!ptr || end - ptr < size) return NULL; - return ptr + size; - } - case kUpb_WireType_StartGroup: - return UnknownFieldSet_SkipGroup(ptr, end, field_number); - case kUpb_WireType_EndGroup: - return NULL; - default: - assert(0); - return NULL; - } -} - -static const char* UnknownFieldSet_SkipGroup(const char* ptr, const char* end, - int group_number) { - uint32_t end_tag = (group_number << 3) | kUpb_WireType_EndGroup; - while (true) { - if (ptr == end) return NULL; - uint64_t tag; - ptr = decode_varint64(ptr, &tag); - if (!ptr) return NULL; - if (tag == end_tag) return ptr; - ptr = UnknownFieldSet_SkipField(ptr, end, (uint32_t)tag); - if (!ptr) return NULL; - } - return ptr; +upb_FindUnknownRet upb_FindUnknownRet_ParseError() { + return (upb_FindUnknownRet){.status = kUpb_FindUnknown_ParseError}; } -enum { - kUpb_MessageSet_StartItemTag = (1 << 3) | kUpb_WireType_StartGroup, - kUpb_MessageSet_EndItemTag = (1 << 3) | kUpb_WireType_EndGroup, - kUpb_MessageSet_TypeIdTag = (2 << 3) | kUpb_WireType_Varint, - kUpb_MessageSet_MessageTag = (3 << 3) | kUpb_WireType_Delimited, -}; - upb_FindUnknownRet upb_MiniTable_FindUnknown(const upb_Message* msg, uint32_t field_number) { + const int depth_limit = 100; // TODO: this should be a parameter size_t size; upb_FindUnknownRet ret; const char* ptr = upb_Message_GetUnknown(msg, &size); - if (size == 0) { - ret.status = kUpb_FindUnknown_NotPresent; - ret.ptr = NULL; - ret.len = 0; - return ret; - } - const char* end = ptr + size; - uint64_t uint64_val; + upb_EpsCopyInputStream stream; + upb_EpsCopyInputStream_Init(&stream, &ptr, size, true); - while (ptr < end) { - uint32_t tag = 0; - int field; - int wire_type; + while (!upb_EpsCopyInputStream_IsDone(&stream, &ptr)) { + uint32_t tag; const char* unknown_begin = ptr; - ptr = decode_tag(ptr, &tag); - field = tag >> 3; - wire_type = tag & 7; - switch (wire_type) { - case kUpb_WireType_EndGroup: - ret.status = kUpb_FindUnknown_ParseError; - return ret; - case kUpb_WireType_Varint: - ptr = decode_varint64(ptr, &uint64_val); - if (!ptr) { - ret.status = kUpb_FindUnknown_ParseError; - return ret; - } - break; - case kUpb_WireType_32Bit: - ptr += 4; - break; - case kUpb_WireType_64Bit: - ptr += 8; - break; - case kUpb_WireType_Delimited: - // Read size. - ptr = decode_varint64(ptr, &uint64_val); - if (uint64_val >= INT32_MAX || !ptr) { - ret.status = kUpb_FindUnknown_ParseError; - return ret; - } - ptr += uint64_val; - break; - case kUpb_WireType_StartGroup: - // tag >> 3 specifies the group number, recurse and skip - // until we see group end tag. - ptr = UnknownFieldSet_SkipGroup(ptr, end, field_number); - break; - default: - ret.status = kUpb_FindUnknown_ParseError; - return ret; - } - if (field_number == field) { + ptr = upb_WireReader_ReadTag(ptr, &tag); + if (!ptr) return upb_FindUnknownRet_ParseError(); + ptr = upb_WireReader_SkipValue(ptr, tag, depth_limit, &stream); + if (!ptr) return upb_FindUnknownRet_ParseError(); + if (field_number == upb_WireReader_GetFieldNumber(tag)) { ret.status = kUpb_FindUnknown_Ok; ret.ptr = unknown_begin; ret.len = ptr - unknown_begin; + upb_EpsCopyInputStream_ReadStringAliased(&stream, &ret.ptr, ret.len); return ret; } } diff --git a/upb/util/BUILD b/upb/util/BUILD index 2776132f78..4221570a5c 100644 --- a/upb/util/BUILD +++ b/upb/util/BUILD @@ -124,6 +124,7 @@ cc_test( deps = [ ":compare", "//:wire_internal", + "//:wire_types", "@com_google_absl//absl/strings", "@com_google_googletest//:gtest_main", ], diff --git a/upb/wire/decode_internal.h b/upb/wire/decode_internal.h index 03d2a05b2f..14cc518c2e 100644 --- a/upb/wire/decode_internal.h +++ b/upb/wire/decode_internal.h @@ -115,8 +115,8 @@ const char* _upb_Decoder_IsDoneFallback(upb_EpsCopyInputStream* e, const char* ptr, int overrun); UPB_INLINE bool _upb_Decoder_IsDone(upb_Decoder* d, const char** ptr) { - return upb_EpsCopyInputStream_IsDone(&d->input, ptr, - &_upb_Decoder_IsDoneFallback); + return upb_EpsCopyInputStream_IsDoneWithCallback( + &d->input, ptr, &_upb_Decoder_IsDoneFallback); } UPB_INLINE const char* _upb_Decoder_BufferFlipCallback( diff --git a/upb/wire/eps_copy_input_stream.c b/upb/wire/eps_copy_input_stream.c new file mode 100644 index 0000000000..ebbe40adf8 --- /dev/null +++ b/upb/wire/eps_copy_input_stream.c @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "upb/wire/eps_copy_input_stream.h" + +static const char* _upb_EpsCopyInputStream_NoOpCallback( + upb_EpsCopyInputStream* e, const char* old_end, const char* new_start) { + return new_start; +} + +const char* _upb_EpsCopyInputStream_IsDoneFallbackNoCallback( + upb_EpsCopyInputStream* e, const char* ptr, int overrun) { + return _upb_EpsCopyInputStream_IsDoneFallbackInline( + e, ptr, overrun, _upb_EpsCopyInputStream_NoOpCallback); +} diff --git a/upb/wire/eps_copy_input_stream.h b/upb/wire/eps_copy_input_stream.h index 234a918d4d..98d79d2359 100644 --- a/upb/wire/eps_copy_input_stream.h +++ b/upb/wire/eps_copy_input_stream.h @@ -54,6 +54,7 @@ typedef struct { const char* limit_ptr; // For bounds checks, = end + UPB_MIN(limit, 0) uintptr_t aliasing; int limit; // Submessage limit relative to end + bool error; // To distinguish between EOF and error. char patch[kUpb_EpsCopyInputStream_SlopBytes * 2]; } upb_EpsCopyInputStream; @@ -87,6 +88,7 @@ UPB_INLINE void upb_EpsCopyInputStream_Init(upb_EpsCopyInputStream* e, ret = false; } e->limit_ptr = e->end; + e->error = false; } typedef enum { @@ -122,7 +124,7 @@ UPB_INLINE upb_IsDoneStatus upb_EpsCopyInputStream_IsDoneStatus( // // Postcondition: if the function returns false, there are at least // kUpb_EpsCopyInputStream_SlopBytes of data available to read at *ptr. -UPB_INLINE bool upb_EpsCopyInputStream_IsDone( +UPB_INLINE bool upb_EpsCopyInputStream_IsDoneWithCallback( upb_EpsCopyInputStream* e, const char** ptr, upb_EpsCopyInputStream_IsDoneFallbackFunc* func) { int overrun; @@ -137,6 +139,21 @@ UPB_INLINE bool upb_EpsCopyInputStream_IsDone( } } +const char* _upb_EpsCopyInputStream_IsDoneFallbackNoCallback( + upb_EpsCopyInputStream* e, const char* ptr, int overrun); + +// A simpler version of IsDoneWithCallback() that does not support a buffer flip +// callback. Useful in cases where we do not need to insert custom logic at +// every buffer flip. +// +// If this returns true, the user must call upb_EpsCopyInputStream_IsError() +// to distinguish between EOF and error. +UPB_INLINE bool upb_EpsCopyInputStream_IsDone(upb_EpsCopyInputStream* e, + const char** ptr) { + return upb_EpsCopyInputStream_IsDoneWithCallback( + e, ptr, _upb_EpsCopyInputStream_IsDoneFallbackNoCallback); +} + // Returns the total number of bytes that are safe to read from the current // buffer without reading uninitialized or unallocated memory. // @@ -318,6 +335,7 @@ UPB_INLINE const char* _upb_EpsCopyInputStream_IsDoneFallbackInline( } return callback(e, old_end, new_start); } else { + e->error = true; return callback(e, NULL, NULL); } } diff --git a/upb/wire/eps_copy_input_stream_test.cc b/upb/wire/eps_copy_input_stream_test.cc index 3ccb29db7f..398a09fed7 100644 --- a/upb/wire/eps_copy_input_stream_test.cc +++ b/upb/wire/eps_copy_input_stream_test.cc @@ -16,7 +16,7 @@ TEST(EpsCopyInputStreamTest, ZeroSize) { upb_EpsCopyInputStream stream; const char* ptr = NULL; upb_EpsCopyInputStream_Init(&stream, &ptr, 0, false); - EXPECT_TRUE(upb_EpsCopyInputStream_IsDone(&stream, &ptr, NULL)); + EXPECT_TRUE(upb_EpsCopyInputStream_IsDoneWithCallback(&stream, &ptr, NULL)); } // begin:google_only @@ -141,8 +141,8 @@ TEST(EpsCopyInputStreamTest, ZeroSize) { // } // // bool IsAtLimit() { -// return upb_EpsCopyInputStream_IsDone(&eps_, &ptr_, -// &EpsStream::IsDoneFallback); +// return upb_EpsCopyInputStream_IsDoneWithCallback( +// &eps_, &ptr_, &EpsStream::IsDoneFallback); // } // // // Return false on EOF. diff --git a/upb/wire/reader.c b/upb/wire/reader.c new file mode 100644 index 0000000000..1ba0b479b3 --- /dev/null +++ b/upb/wire/reader.c @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "upb/wire/reader.h" + +// Must be last. +#include "upb/port/def.inc" +#include "upb/wire/eps_copy_input_stream.h" +#include "upb/wire/types.h" + +UPB_NOINLINE _upb_WireReader_ReadLongVarintRet +_upb_WireReader_ReadLongVarint(const char* ptr, uint64_t val) { + _upb_WireReader_ReadLongVarintRet ret = {NULL, 0}; + uint64_t byte; + int i; + for (i = 1; i < 10; i++) { + byte = (uint8_t)ptr[i]; + val += (byte - 1) << (i * 7); + if (!(byte & 0x80)) { + ret.ptr = ptr + i + 1; + ret.val = val; + return ret; + } + } + return ret; +} + +const char* upb_WireReader_SkipGroup(const char* ptr, uint32_t tag, + int depth_limit, + upb_EpsCopyInputStream* stream) { + if (--depth_limit == 0) return NULL; + uint32_t end_group_tag = (tag & ~7ULL) | kUpb_WireType_EndGroup; + while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) { + uint32_t tag; + ptr = upb_WireReader_ReadTag(ptr, &tag); + if (!ptr) return NULL; + if (tag == end_group_tag) return ptr; + ptr = upb_WireReader_SkipValue(ptr, tag, depth_limit, stream); + if (!ptr) return NULL; + } + return ptr; +} diff --git a/upb/wire/reader.h b/upb/wire/reader.h new file mode 100644 index 0000000000..8cd42f6c3e --- /dev/null +++ b/upb/wire/reader.h @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UPB_WIRE_READER_H_ +#define UPB_WIRE_READER_H_ + +#include "upb/wire/eps_copy_input_stream.h" +#include "upb/wire/types.h" + +// Must be last. +#include "upb/port/def.inc" + +// The upb_WireReader interface is suitable for general-purpose parsing of +// protobuf binary wire format. It is designed to be used along with +// upb_EpsCopyInputStream for buffering, and all parsing routines in this file +// assume that at least kUpb_EpsCopyInputStream_SlopBytes worth of data is +// available to read without any bounds checks. + +typedef struct { + const char* ptr; + uint64_t val; +} _upb_WireReader_ReadLongVarintRet; + +_upb_WireReader_ReadLongVarintRet _upb_WireReader_ReadLongVarint( + const char* ptr, uint64_t val); + +static UPB_FORCEINLINE const char* _upb_WireReader_ReadVarint(const char* ptr, + uint64_t* val, + int maxlen, + uint64_t maxval) { + uint64_t byte = (uint8_t)*ptr; + if (UPB_LIKELY((byte & 0x80) == 0)) { + *val = (uint32_t)byte; + return ptr + 1; + } + const char* start = ptr; + _upb_WireReader_ReadLongVarintRet res = + _upb_WireReader_ReadLongVarint(ptr, byte); + if (!res.ptr || (maxlen < 10 && res.ptr - start > maxlen) || + res.val > maxval) { + return NULL; // Malformed. + } + *val = res.val; + return res.ptr; +} + +// Parses a tag into `tag`, and returns a pointer past the end of the tag, or +// NULL if there was an error in the tag data. +// +// REQUIRES: there must be at least 10 bytes of data available at `ptr`. +// Bounds checks must be performed before calling this function, preferably +// by calling upb_EpsCopyInputStream_IsDone(). +static UPB_FORCEINLINE const char* upb_WireReader_ReadTag(const char* ptr, + uint32_t* tag) { + uint64_t val; + ptr = _upb_WireReader_ReadVarint(ptr, &val, 5, UINT32_MAX); + if (!ptr) return NULL; + *tag = val; + return ptr; +} + +// Given a tag, returns the field number. +UPB_INLINE uint32_t upb_WireReader_GetFieldNumber(uint32_t tag) { + return tag >> 3; +} + +// Given a tag, returns the wire type. +UPB_INLINE uint8_t upb_WireReader_GetWireType(uint32_t tag) { return tag & 3; } + +UPB_INLINE const char* upb_WireReader_ReadVarint(const char* ptr, + uint64_t* val) { + return _upb_WireReader_ReadVarint(ptr, val, 10, UINT64_MAX); +} + +// Skips data for a varint, returning a pointer past the end of the varint, or +// NULL if there was an error in the varint data. +// +// REQUIRES: there must be at least 10 bytes of data available at `ptr`. +// Bounds checks must be performed before calling this function, preferably +// by calling upb_EpsCopyInputStream_IsDone(). +UPB_INLINE const char* upb_WireReader_SkipVarint(const char* ptr) { + uint64_t val; + return upb_WireReader_ReadVarint(ptr, &val); +} + +// Reads a varint indicating the size of a delimited field into `size`, or +// NULL if there was an error in the varint data. +// +// REQUIRES: there must be at least 10 bytes of data available at `ptr`. +// Bounds checks must be performed before calling this function, preferably +// by calling upb_EpsCopyInputStream_IsDone(). +UPB_INLINE const char* upb_WireReader_ReadSize(const char* ptr, int* size) { + uint64_t size64; + ptr = upb_WireReader_ReadVarint(ptr, &size64); + if (!ptr || size64 >= INT32_MAX) return NULL; + *size = size64; + return ptr; +} + +// Skips data for a group, returning a pointer past the end of the group, or +// NULL if there was an error parsing the group. The `tag` argument should be +// the start group tag that begins the group. The `depth_limit` argument +// indicates how many levels of recursion the group is allowed to have before +// reporting a parse error (this limit exists to protect against stack +// overflow). +const char* upb_WireReader_SkipGroup(const char* ptr, uint32_t tag, + int depth_limit, + upb_EpsCopyInputStream* stream); + +// Skips data for a wire value of any type, returning a pointer past the end of +// the data, or NULL if there was an error parsing the group. The `tag` argument +// should be the tag that was just parsed. The `depth_limit` argument indicates +// how many levels of recursion a group is allowed to have before reporting a +// parse error (this limit exists to protect against stack overflow). +// +// REQUIRES: there must be at least 10 bytes of data available at `ptr`. +// Bounds checks must be performed before calling this function, preferably +// by calling upb_EpsCopyInputStream_IsDone(). +UPB_INLINE const char* upb_WireReader_SkipValue( + const char* ptr, uint32_t tag, int depth_limit, + upb_EpsCopyInputStream* stream) { + switch (upb_WireReader_GetWireType(tag)) { + case kUpb_WireType_Varint: + return upb_WireReader_SkipVarint(ptr); + case kUpb_WireType_32Bit: + return ptr + 4; + case kUpb_WireType_64Bit: + return ptr + 8; + case kUpb_WireType_Delimited: { + int size; + ptr = upb_WireReader_ReadSize(ptr, &size); + if (!ptr) return NULL; + ptr += size; + return ptr; + } + case kUpb_WireType_StartGroup: + return upb_WireReader_SkipGroup(ptr, tag, depth_limit, stream); + case kUpb_WireType_EndGroup: + return NULL; // Should be handled before now. + default: + return NULL; // Unknown wire type. + } +} + +#include "upb/port/undef.inc" + +#endif // UPB_WIRE_READER_H_ From 26e9a75294502a412e55c22d1883a24b665e4e94 Mon Sep 17 00:00:00 2001 From: Eric Salo Date: Wed, 28 Dec 2022 06:09:44 -0800 Subject: [PATCH 14/21] remove wire/types.h from the :wire build target PiperOrigin-RevId: 498167993 --- BUILD | 2 +- python/BUILD | 1 + upb/util/BUILD | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/BUILD b/BUILD index a3558edf37..57d2c025dc 100644 --- a/BUILD +++ b/BUILD @@ -582,6 +582,7 @@ cc_library( ":port", ":reflection", ":wire", + ":wire_types", ], ) @@ -910,7 +911,6 @@ cc_library( hdrs = [ "upb/wire/decode.h", "upb/wire/encode.h", - "upb/wire/types.h", ], copts = UPB_DEFAULT_COPTS, visibility = ["//visibility:public"], diff --git a/python/BUILD b/python/BUILD index 31bbae3d77..99b670bd6d 100644 --- a/python/BUILD +++ b/python/BUILD @@ -227,6 +227,7 @@ py_extension( "//:textformat", "//:upb", "//:wire", + "//:wire_types", "//upb/util:compare", "//upb/util:def_to_proto", "//upb/util:required_fields", diff --git a/upb/util/BUILD b/upb/util/BUILD index 4221570a5c..983daf3e58 100644 --- a/upb/util/BUILD +++ b/upb/util/BUILD @@ -110,6 +110,7 @@ cc_library( "//:port", "//:reflection", "//:wire", + "//:wire_types", ], ) From 112f037da6ba2f786bbc60ee2f037a7dd3bbbedb Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 28 Dec 2022 11:55:56 -0800 Subject: [PATCH 15/21] Migrated the text format encoder to upb_WireReader. Also fixed a few bugs and added new functionality to upb_WireReader. PiperOrigin-RevId: 498223970 --- BUILD | 7 +- upb/message/accessors.c | 13 +-- upb/text/encode.c | 105 ++++++++++++------------- upb/wire/eps_copy_input_stream.h | 42 +++++++++- upb/wire/eps_copy_input_stream_test.cc | 10 +++ upb/wire/reader.h | 36 ++++++++- 6 files changed, 146 insertions(+), 67 deletions(-) diff --git a/BUILD b/BUILD index 57d2c025dc..eefb222a62 100644 --- a/BUILD +++ b/BUILD @@ -578,10 +578,12 @@ cc_library( visibility = ["//visibility:public"], deps = [ ":collections_internal", + ":eps_copy_input_stream", ":lex", ":port", ":reflection", ":wire", + ":wire_reader", ":wire_types", ], ) @@ -973,7 +975,10 @@ cc_library( cc_library( name = "wire_reader", - srcs = ["upb/wire/reader.c"], + srcs = [ + "upb/wire/reader.c", + "upb/wire/swap_internal.h", + ], hdrs = ["upb/wire/reader.h"], visibility = ["//visibility:public"], deps = [ diff --git a/upb/message/accessors.c b/upb/message/accessors.c index 8178e359b8..815d66cbe3 100644 --- a/upb/message/accessors.c +++ b/upb/message/accessors.c @@ -166,15 +166,18 @@ upb_FindUnknownRet upb_MiniTable_FindUnknown(const upb_Message* msg, const char* unknown_begin = ptr; ptr = upb_WireReader_ReadTag(ptr, &tag); if (!ptr) return upb_FindUnknownRet_ParseError(); - ptr = upb_WireReader_SkipValue(ptr, tag, depth_limit, &stream); - if (!ptr) return upb_FindUnknownRet_ParseError(); if (field_number == upb_WireReader_GetFieldNumber(tag)) { ret.status = kUpb_FindUnknown_Ok; - ret.ptr = unknown_begin; - ret.len = ptr - unknown_begin; - upb_EpsCopyInputStream_ReadStringAliased(&stream, &ret.ptr, ret.len); + ret.ptr = upb_EpsCopyInputStream_GetAliasedPtr(&stream, unknown_begin); + ptr = upb_WireReader_SkipValue(ptr, tag, depth_limit, &stream); + // Because we know that the input is a flat buffer, it is safe to perform + // pointer arithmetic on aliased pointers. + ret.len = upb_EpsCopyInputStream_GetAliasedPtr(&stream, ptr) - ret.ptr; return ret; } + + ptr = upb_WireReader_SkipValue(ptr, tag, depth_limit, &stream); + if (!ptr) return upb_FindUnknownRet_ParseError(); } ret.status = kUpb_FindUnknown_NotPresent; ret.ptr = NULL; diff --git a/upb/text/encode.c b/upb/text/encode.c index 830f813775..c694088fc2 100644 --- a/upb/text/encode.c +++ b/upb/text/encode.c @@ -38,6 +38,8 @@ #include "upb/lex/round_trip.h" #include "upb/port/vsnprintf_compat.h" #include "upb/reflection/message.h" +#include "upb/wire/eps_copy_input_stream.h" +#include "upb/wire/reader.h" #include "upb/wire/types.h" // Must be last. @@ -310,23 +312,6 @@ static void txtenc_map(txtenc* e, const upb_Map* map, const upb_FieldDef* f) { } \ } while (0) -static const char* txtenc_parsevarint(const char* ptr, const char* limit, - uint64_t* val) { - uint8_t byte; - int bitpos = 0; - *val = 0; - - do { - CHK(bitpos < 70 && ptr < limit); - byte = *ptr; - *val |= (uint64_t)(byte & 0x7F) << bitpos; - ptr++; - bitpos += 7; - } while (byte & 0x80); - - return ptr; -} - /* * Unknown fields are printed by number. * @@ -337,89 +322,95 @@ static const char* txtenc_parsevarint(const char* ptr, const char* limit, * 1: 111 * } */ -static const char* txtenc_unknown(txtenc* e, const char* ptr, const char* end, +static const char* txtenc_unknown(txtenc* e, const char* ptr, + upb_EpsCopyInputStream* stream, int groupnum) { - while (ptr < end) { - uint64_t tag_64; + // We are guaranteed that the unknown data is valid wire format, and will not + // contain tag zero. + uint32_t end_group = groupnum > 0 + ? ((groupnum << kUpb_WireReader_WireTypeBits) | + kUpb_WireType_EndGroup) + : 0; + + while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) { uint32_t tag; - CHK(ptr = txtenc_parsevarint(ptr, end, &tag_64)); - CHK(tag_64 < UINT32_MAX); - tag = (uint32_t)tag_64; - - if ((tag & 7) == kUpb_WireType_EndGroup) { - CHK((tag >> 3) == (uint32_t)groupnum); - return ptr; - } + CHK(ptr = upb_WireReader_ReadTag(ptr, &tag)); + if (tag == end_group) return ptr; txtenc_indent(e); - txtenc_printf(e, "%d: ", (int)(tag >> 3)); + txtenc_printf(e, "%d: ", (int)upb_WireReader_GetFieldNumber(tag)); - switch (tag & 7) { + switch (upb_WireReader_GetWireType(tag)) { case kUpb_WireType_Varint: { uint64_t val; - CHK(ptr = txtenc_parsevarint(ptr, end, &val)); + CHK(ptr = upb_WireReader_ReadVarint(ptr, &val)); txtenc_printf(e, "%" PRIu64, val); break; } case kUpb_WireType_32Bit: { uint32_t val; - CHK(end - ptr >= 4); - memcpy(&val, ptr, 4); - ptr += 4; + ptr = upb_WireReader_ReadFixed32(ptr, &val); txtenc_printf(e, "0x%08" PRIu32, val); break; } case kUpb_WireType_64Bit: { uint64_t val; - CHK(end - ptr >= 8); - memcpy(&val, ptr, 8); - ptr += 8; + ptr = upb_WireReader_ReadFixed64(ptr, &val); txtenc_printf(e, "0x%016" PRIu64, val); break; } case kUpb_WireType_Delimited: { - uint64_t len; - size_t avail = end - ptr; + int size; char* start = e->ptr; size_t start_overflow = e->overflow; - CHK(ptr = txtenc_parsevarint(ptr, end, &len)); - CHK(avail >= len); + CHK(ptr = upb_WireReader_ReadSize(ptr, &size)); + CHK(upb_EpsCopyInputStream_CheckDataSizeAvailable(stream, ptr, size)); - /* Speculatively try to parse as message. */ + // Speculatively try to parse as message. txtenc_putstr(e, "{"); txtenc_endfield(e); + + // EpsCopyInputStream can't back up, so create a sub-stream for the + // speculative parse. + upb_EpsCopyInputStream sub_stream; + const char* sub_ptr = upb_EpsCopyInputStream_GetAliasedPtr(stream, ptr); + upb_EpsCopyInputStream_Init(&sub_stream, &sub_ptr, size, true); + e->indent_depth++; - if (txtenc_unknown(e, ptr, end, -1)) { + if (txtenc_unknown(e, sub_ptr, &sub_stream, -1)) { + ptr = upb_EpsCopyInputStream_Skip(stream, ptr, size); e->indent_depth--; txtenc_indent(e); txtenc_putstr(e, "}"); } else { - /* Didn't work out, print as raw bytes. */ - upb_StringView str; + // Didn't work out, print as raw bytes. e->indent_depth--; e->ptr = start; e->overflow = start_overflow; - str.data = ptr; - str.size = len; - txtenc_string(e, str, true); + const char* str = ptr; + ptr = upb_EpsCopyInputStream_ReadString(stream, &str, size, NULL); + assert(ptr); + txtenc_string(e, (upb_StringView){.data = str, .size = size}, true); } - ptr += len; break; } case kUpb_WireType_StartGroup: txtenc_putstr(e, "{"); txtenc_endfield(e); e->indent_depth++; - CHK(ptr = txtenc_unknown(e, ptr, end, tag >> 3)); + CHK(ptr = txtenc_unknown(e, ptr, stream, + upb_WireReader_GetFieldNumber(tag))); e->indent_depth--; txtenc_indent(e); txtenc_putstr(e, "}"); break; + default: + return NULL; } txtenc_endfield(e); } - return groupnum == -1 ? ptr : NULL; + return end_group == 0 && !upb_EpsCopyInputStream_IsError(stream) ? ptr : NULL; } #undef CHK @@ -441,11 +432,13 @@ static void txtenc_msg(txtenc* e, const upb_Message* msg, } if ((e->options & UPB_TXTENC_SKIPUNKNOWN) == 0) { - size_t len; - const char* ptr = upb_Message_GetUnknown(msg, &len); - char* start = e->ptr; - if (ptr) { - if (!txtenc_unknown(e, ptr, ptr + len, -1)) { + size_t size; + const char* ptr = upb_Message_GetUnknown(msg, &size); + if (size != 0) { + char* start = e->ptr; + upb_EpsCopyInputStream stream; + upb_EpsCopyInputStream_Init(&stream, &ptr, size, true); + if (!txtenc_unknown(e, ptr, &stream, -1)) { /* Unknown failed to parse, back up and don't print it at all. */ e->ptr = start; } diff --git a/upb/wire/eps_copy_input_stream.h b/upb/wire/eps_copy_input_stream.h index 98d79d2359..92634328c5 100644 --- a/upb/wire/eps_copy_input_stream.h +++ b/upb/wire/eps_copy_input_stream.h @@ -58,6 +58,13 @@ typedef struct { char patch[kUpb_EpsCopyInputStream_SlopBytes * 2]; } upb_EpsCopyInputStream; +// Returns true if the stream is in the error state. A stream enters the error +// state when the user reads past a limit (caught in IsDone()) or the +// ZeroCopyInputStream returns an error. +UPB_INLINE bool upb_EpsCopyInputStream_IsError(upb_EpsCopyInputStream* e) { + return e->error; +} + typedef const char* upb_EpsCopyInputStream_BufferFlipCallback( upb_EpsCopyInputStream* e, const char* old_end, const char* new_start); @@ -242,17 +249,44 @@ UPB_INLINE bool upb_EpsCopyInputStream_AliasingAvailable( e->aliasing >= kUpb_EpsCopyInputStream_NoDelta; } +// Returns a pointer into an input buffer that corresponds to the parsing +// pointer `ptr`. The returned pointer may be the same as `ptr`, but also may +// be different if we are currently parsing out of the patch buffer. +// +// REQUIRES: Aliasing must be available for the given pointer. If the input is a +// flat buffer and aliasing is enabled, then aliasing will always be available. +UPB_INLINE const char* upb_EpsCopyInputStream_GetAliasedPtr( + upb_EpsCopyInputStream* e, const char* ptr) { + UPB_ASSUME(upb_EpsCopyInputStream_AliasingAvailable(e, ptr, 0)); + uintptr_t delta = + e->aliasing == kUpb_EpsCopyInputStream_NoDelta ? 0 : e->aliasing; + return (const char*)((uintptr_t)ptr + delta); +} + +// Reads string data from the input, aliasing into the input buffer instead of +// copying. The parsing pointer is passed in `*ptr`, and will be updated if +// necessary to point to the actual input buffer. Returns the new parsing +// pointer, which will be advanced past the string data. +// +// REQUIRES: Aliasing must be available for this data region (test with +// upb_EpsCopyInputStream_AliasingAvailable(). UPB_INLINE const char* upb_EpsCopyInputStream_ReadStringAliased( upb_EpsCopyInputStream* e, const char** ptr, size_t size) { UPB_ASSUME(upb_EpsCopyInputStream_AliasingAvailable(e, *ptr, size)); - uintptr_t delta = - e->aliasing == kUpb_EpsCopyInputStream_NoDelta ? 0 : e->aliasing; const char* ret = *ptr + size; - *ptr = (const char*)((uintptr_t)*ptr + delta); + *ptr = upb_EpsCopyInputStream_GetAliasedPtr(e, *ptr); UPB_ASSUME(ret != NULL); return ret; } +// Skips `size` bytes of string data from the input and returns a pointer past +// the end. Returns NULL on end of stream or error. +UPB_INLINE const char* upb_EpsCopyInputStream_Skip(upb_EpsCopyInputStream* e, + const char* ptr, int size) { + if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size)) return NULL; + return ptr + size; +} + // Reads string data from the stream and advances the pointer accordingly. // If aliasing was enabled when the stream was initialized, then the returned // pointer will point into the input buffer if possible, otherwise new data @@ -270,6 +304,7 @@ UPB_INLINE const char* upb_EpsCopyInputStream_ReadString( if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(e, *ptr, size)) { return NULL; } + UPB_ASSERT(arena); char* data = (char*)upb_Arena_Malloc(arena, size); if (!data) return NULL; memcpy(data, *ptr, size); @@ -335,6 +370,7 @@ UPB_INLINE const char* _upb_EpsCopyInputStream_IsDoneFallbackInline( } return callback(e, old_end, new_start); } else { + UPB_ASSERT(overrun > e->limit); e->error = true; return callback(e, NULL, NULL); } diff --git a/upb/wire/eps_copy_input_stream_test.cc b/upb/wire/eps_copy_input_stream_test.cc index 398a09fed7..3b90064229 100644 --- a/upb/wire/eps_copy_input_stream_test.cc +++ b/upb/wire/eps_copy_input_stream_test.cc @@ -91,18 +91,27 @@ TEST(EpsCopyInputStreamTest, ZeroSize) { // // Returns false at EOF or error. // int ReadData(int n, std::string* data) { // EXPECT_LE(n, kUpb_EpsCopyInputStream_SlopBytes); +// if (enable_aliasing_) { +// EXPECT_TRUE(upb_EpsCopyInputStream_AliasingAvailable(&eps_, ptr_, n)); +// } // // We want to verify that we can read kUpb_EpsCopyInputStream_SlopBytes // // safely, even if we haven't actually been requested to read that much. // // We copy to a global buffer so the copy can't be optimized away. // memcpy(&tmp_buf, ptr_, kUpb_EpsCopyInputStream_SlopBytes); // data->assign(tmp_buf, n); // ptr_ += n; +// if (enable_aliasing_) { +// EXPECT_TRUE(upb_EpsCopyInputStream_AliasingAvailable(&eps_, ptr_, 0)); +// } // return PopLimits(); // } // // int ReadString(int n, std::string* data) { // if (!upb_EpsCopyInputStream_CheckSize(&eps_, ptr_, n)) return -1; // const char* str_data = ptr_; +// if (enable_aliasing_) { +// EXPECT_TRUE(upb_EpsCopyInputStream_AliasingAvailable(&eps_, ptr_, n)); +// } // ptr_ = upb_EpsCopyInputStream_ReadString(&eps_, &str_data, n, arena_.ptr()); // if (!ptr_) return -1; // if (enable_aliasing_ && n) { @@ -110,6 +119,7 @@ TEST(EpsCopyInputStreamTest, ZeroSize) { // reinterpret_cast(data_.data())); // EXPECT_LT(reinterpret_cast(str_data), // reinterpret_cast(data_.data() + data_.size())); +// EXPECT_TRUE(upb_EpsCopyInputStream_AliasingAvailable(&eps_, ptr_, 0)); // } // data->assign(str_data, n); // return PopLimits(); diff --git a/upb/wire/reader.h b/upb/wire/reader.h index 8cd42f6c3e..b05b3d07ad 100644 --- a/upb/wire/reader.h +++ b/upb/wire/reader.h @@ -29,6 +29,7 @@ #define UPB_WIRE_READER_H_ #include "upb/wire/eps_copy_input_stream.h" +#include "upb/wire/swap_internal.h" #include "upb/wire/types.h" // Must be last. @@ -40,6 +41,9 @@ // assume that at least kUpb_EpsCopyInputStream_SlopBytes worth of data is // available to read without any bounds checks. +#define kUpb_WireReader_WireTypeMask 7 +#define kUpb_WireReader_WireTypeBits 3 + typedef struct { const char* ptr; uint64_t val; @@ -85,11 +89,13 @@ static UPB_FORCEINLINE const char* upb_WireReader_ReadTag(const char* ptr, // Given a tag, returns the field number. UPB_INLINE uint32_t upb_WireReader_GetFieldNumber(uint32_t tag) { - return tag >> 3; + return tag >> kUpb_WireReader_WireTypeBits; } // Given a tag, returns the wire type. -UPB_INLINE uint8_t upb_WireReader_GetWireType(uint32_t tag) { return tag & 3; } +UPB_INLINE uint8_t upb_WireReader_GetWireType(uint32_t tag) { + return tag & kUpb_WireReader_WireTypeMask; +} UPB_INLINE const char* upb_WireReader_ReadVarint(const char* ptr, uint64_t* val) { @@ -121,6 +127,32 @@ UPB_INLINE const char* upb_WireReader_ReadSize(const char* ptr, int* size) { return ptr; } +// Reads a fixed32 field, performing byte swapping if necessary. +// +// REQUIRES: there must be at least 4 bytes of data available at `ptr`. +// Bounds checks must be performed before calling this function, preferably +// by calling upb_EpsCopyInputStream_IsDone(). +UPB_INLINE const char* upb_WireReader_ReadFixed32(const char* ptr, void* val) { + uint32_t uval; + memcpy(&uval, ptr, 4); + uval = _upb_BigEndian_Swap32(uval); + memcpy(val, &uval, 4); + return ptr + 4; +} + +// Reads a fixed64 field, performing byte swapping if necessary. +// +// REQUIRES: there must be at least 4 bytes of data available at `ptr`. +// Bounds checks must be performed before calling this function, preferably +// by calling upb_EpsCopyInputStream_IsDone(). +UPB_INLINE const char* upb_WireReader_ReadFixed64(const char* ptr, void* val) { + uint64_t uval; + memcpy(&uval, ptr, 8); + uval = _upb_BigEndian_Swap64(uval); + memcpy(val, &uval, 8); + return ptr + 8; +} + // Skips data for a group, returning a pointer past the end of the group, or // NULL if there was an error parsing the group. The `tag` argument should be // the start group tag that begins the group. The `depth_limit` argument From 466408978fe17f7dad2300915d78f4e5bfdf9d0c Mon Sep 17 00:00:00 2001 From: Protobuf Team Bot Date: Wed, 28 Dec 2022 15:52:52 -0800 Subject: [PATCH 16/21] Add extern C blocks to wire_reader and eps_copy_input_stream. PiperOrigin-RevId: 498262528 --- upb/wire/eps_copy_input_stream.h | 8 ++++++++ upb/wire/reader.c | 5 +++-- upb/wire/reader.h | 8 ++++++++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/upb/wire/eps_copy_input_stream.h b/upb/wire/eps_copy_input_stream.h index 92634328c5..5875a827a6 100644 --- a/upb/wire/eps_copy_input_stream.h +++ b/upb/wire/eps_copy_input_stream.h @@ -35,6 +35,10 @@ // Must be last. #include "upb/port/def.inc" +#ifdef __cplusplus +extern "C" { +#endif + // The maximum number of bytes a single protobuf field can take up in the // wire format. We only want to do one bounds check per field, so the input // stream guarantees that after upb_EpsCopyInputStream_IsDone() is called, @@ -405,6 +409,10 @@ static UPB_FORCEINLINE bool upb_EpsCopyInputStream_TryParseDelimitedFast( return true; } +#ifdef __cplusplus +} /* extern "C" */ +#endif + #include "upb/port/undef.inc" #endif // UPB_WIRE_EPS_COPY_INPUT_STREAM_H_ diff --git a/upb/wire/reader.c b/upb/wire/reader.c index 1ba0b479b3..f5b9b0b0ce 100644 --- a/upb/wire/reader.c +++ b/upb/wire/reader.c @@ -27,11 +27,12 @@ #include "upb/wire/reader.h" -// Must be last. -#include "upb/port/def.inc" #include "upb/wire/eps_copy_input_stream.h" #include "upb/wire/types.h" +// Must be last. +#include "upb/port/def.inc" + UPB_NOINLINE _upb_WireReader_ReadLongVarintRet _upb_WireReader_ReadLongVarint(const char* ptr, uint64_t val) { _upb_WireReader_ReadLongVarintRet ret = {NULL, 0}; diff --git a/upb/wire/reader.h b/upb/wire/reader.h index b05b3d07ad..13d6327193 100644 --- a/upb/wire/reader.h +++ b/upb/wire/reader.h @@ -35,6 +35,10 @@ // Must be last. #include "upb/port/def.inc" +#ifdef __cplusplus +extern "C" { +#endif + // The upb_WireReader interface is suitable for general-purpose parsing of // protobuf binary wire format. It is designed to be used along with // upb_EpsCopyInputStream for buffering, and all parsing routines in this file @@ -198,6 +202,10 @@ UPB_INLINE const char* upb_WireReader_SkipValue( } } +#ifdef __cplusplus +} /* extern "C" */ +#endif + #include "upb/port/undef.inc" #endif // UPB_WIRE_READER_H_ From 0c6b72dbf891eafc91050ad60733ea3022fac2b3 Mon Sep 17 00:00:00 2001 From: Eric Salo Date: Thu, 29 Dec 2022 09:05:43 -0800 Subject: [PATCH 17/21] replace upb_Map_Delete() We would like for upb_Map_Delete() to optionally return the deleted value. Unfortunately this will require several steps since we are crossing repos. Step #3: Give the new footprint to the original function and switch back to it. Since we're already touching map.h, also mark UPB_API as appropriate. PiperOrigin-RevId: 498398474 --- lua/msg.c | 2 +- python/map.c | 2 +- upb/collections/map.c | 3 +- upb/collections/map.h | 67 ++++++++++++++++++++++--------------------- 4 files changed, 38 insertions(+), 36 deletions(-) diff --git a/lua/msg.c b/lua/msg.c index 8a6f5cc04c..094ec570d6 100644 --- a/lua/msg.c +++ b/lua/msg.c @@ -579,7 +579,7 @@ static int lupb_Map_Newindex(lua_State* L) { upb_MessageValue key = lupb_tomsgval(L, lmap->key_type, 2, 1, LUPB_REF); if (lua_isnil(L, 3)) { - upb_Map_Delete2(map, key, NULL); + upb_Map_Delete(map, key, NULL); } else { upb_MessageValue val = lupb_tomsgval(L, lmap->value_type, 3, 1, LUPB_COPY); upb_Map_Set(map, key, val, lupb_Arenaget(L, 1)); diff --git a/python/map.c b/python/map.c index a058856016..eea57e63d4 100644 --- a/python/map.c +++ b/python/map.c @@ -182,7 +182,7 @@ int PyUpb_MapContainer_AssignSubscript(PyObject* _self, PyObject* key, if (!PyUpb_PyToUpb(val, val_f, &u_val, arena)) return -1; if (!PyUpb_MapContainer_Set(self, map, u_key, u_val, arena)) return -1; } else { - if (!upb_Map_Delete2(map, u_key, NULL)) { + if (!upb_Map_Delete(map, u_key, NULL)) { PyErr_Format(PyExc_KeyError, "Key not present in map"); return -1; } diff --git a/upb/collections/map.c b/upb/collections/map.c index 5724351c86..fb6aedba4b 100644 --- a/upb/collections/map.c +++ b/upb/collections/map.c @@ -70,8 +70,7 @@ upb_MapInsertStatus upb_Map_Insert(upb_Map* map, upb_MessageValue key, map->val_size, arena); } -bool upb_Map_Delete2(upb_Map* map, upb_MessageValue key, - upb_MessageValue* val) { +bool upb_Map_Delete(upb_Map* map, upb_MessageValue key, upb_MessageValue* val) { upb_value v; const bool ok = _upb_Map_Delete(map, &key, map->key_size, &v); if (val) val->uint64_val = v.val; diff --git a/upb/collections/map.h b/upb/collections/map.h index 4e398dc554..920a5019c6 100644 --- a/upb/collections/map.h +++ b/upb/collections/map.h @@ -39,20 +39,21 @@ extern "C" { #endif -/* Creates a new map on the given arena with the given key/value size. */ -upb_Map* upb_Map_New(upb_Arena* a, upb_CType key_type, upb_CType value_type); +// Creates a new map on the given arena with the given key/value size. +UPB_API upb_Map* upb_Map_New(upb_Arena* a, upb_CType key_type, + upb_CType value_type); -/* Returns the number of entries in the map. */ -size_t upb_Map_Size(const upb_Map* map); +// Returns the number of entries in the map. +UPB_API size_t upb_Map_Size(const upb_Map* map); -/* Stores a value for the given key into |*val| (or the zero value if the key is - * not present). Returns whether the key was present. The |val| pointer may be - * NULL, in which case the function tests whether the given key is present. */ -bool upb_Map_Get(const upb_Map* map, upb_MessageValue key, - upb_MessageValue* val); +// Stores a value for the given key into |*val| (or the zero value if the key is +// not present). Returns whether the key was present. The |val| pointer may be +// NULL, in which case the function tests whether the given key is present. +UPB_API bool upb_Map_Get(const upb_Map* map, upb_MessageValue key, + upb_MessageValue* val); -/* Removes all entries in the map. */ -void upb_Map_Clear(upb_Map* map); +// Removes all entries in the map. +UPB_API void upb_Map_Clear(upb_Map* map); typedef enum { kUpb_MapInsertStatus_Inserted = 0, @@ -60,29 +61,31 @@ typedef enum { kUpb_MapInsertStatus_OutOfMemory = 2, } upb_MapInsertStatus; -/* Sets the given key to the given value, returning whether the key was inserted - * or replaced. If the key was inserted, then any existing iterators will be - * invalidated. */ -upb_MapInsertStatus upb_Map_Insert(upb_Map* map, upb_MessageValue key, - upb_MessageValue val, upb_Arena* arena); - -/* Sets the given key to the given value. Returns false if memory allocation - * failed. If the key is newly inserted, then any existing iterators will be - * invalidated. */ -UPB_INLINE bool upb_Map_Set(upb_Map* map, upb_MessageValue key, - upb_MessageValue val, upb_Arena* arena) { +// Sets the given key to the given value, returning whether the key was inserted +// or replaced. If the key was inserted, then any existing iterators will be +// invalidated. +UPB_API upb_MapInsertStatus upb_Map_Insert(upb_Map* map, upb_MessageValue key, + upb_MessageValue val, + upb_Arena* arena); + +// Sets the given key to the given value. Returns false if memory allocation +// failed. If the key is newly inserted, then any existing iterators will be +// invalidated. +UPB_API_INLINE bool upb_Map_Set(upb_Map* map, upb_MessageValue key, + upb_MessageValue val, upb_Arena* arena) { return upb_Map_Insert(map, key, val, arena) != kUpb_MapInsertStatus_OutOfMemory; } // Deletes this key from the table. Returns true if the key was present. // If present and |val| is non-NULL, stores the deleted value. -bool upb_Map_Delete2(upb_Map* map, upb_MessageValue key, upb_MessageValue* val); +UPB_API bool upb_Map_Delete(upb_Map* map, upb_MessageValue key, + upb_MessageValue* val); -// Deletes this key from the table. Returns true if the key was present. // (DEPRECATED and going away soon. Do not use.) -UPB_INLINE bool upb_Map_Delete(upb_Map* map, upb_MessageValue key) { - return upb_Map_Delete2(map, key, NULL); +UPB_INLINE bool upb_Map_Delete2(upb_Map* map, upb_MessageValue key, + upb_MessageValue* val) { + return upb_Map_Delete(map, key, val); } // Map iteration: @@ -97,8 +100,8 @@ UPB_INLINE bool upb_Map_Delete(upb_Map* map, upb_MessageValue key) { // Advances to the next entry. Returns false if no more entries are present. // Otherwise returns true and populates both *key and *value. -bool upb_Map_Next(const upb_Map* map, upb_MessageValue* key, - upb_MessageValue* val, size_t* iter); +UPB_API bool upb_Map_Next(const upb_Map* map, upb_MessageValue* key, + upb_MessageValue* val, size_t* iter); // DEPRECATED iterator, slated for removal. @@ -114,12 +117,12 @@ bool upb_Map_Next(const upb_Map* map, upb_MessageValue* key, // Advances to the next entry. Returns false if no more entries are present. bool upb_MapIterator_Next(const upb_Map* map, size_t* iter); -/* Returns true if the iterator still points to a valid entry, or false if the - * iterator is past the last element. It is an error to call this function with - * kUpb_Map_Begin (you must call next() at least once first). */ +// Returns true if the iterator still points to a valid entry, or false if the +// iterator is past the last element. It is an error to call this function with +// kUpb_Map_Begin (you must call next() at least once first). bool upb_MapIterator_Done(const upb_Map* map, size_t iter); -/* Returns the key and value for this entry of the map. */ +// Returns the key and value for this entry of the map. upb_MessageValue upb_MapIterator_Key(const upb_Map* map, size_t iter); upb_MessageValue upb_MapIterator_Value(const upb_Map* map, size_t iter); From a9f9bfea75ae4b204d05cf2ebcfba23fbeecfd59 Mon Sep 17 00:00:00 2001 From: Eric Salo Date: Thu, 29 Dec 2022 11:58:09 -0800 Subject: [PATCH 18/21] fix some compiler warnings in ecis PiperOrigin-RevId: 498425946 --- upb/wire/eps_copy_input_stream.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/upb/wire/eps_copy_input_stream.h b/upb/wire/eps_copy_input_stream.h index 5875a827a6..eb172f28df 100644 --- a/upb/wire/eps_copy_input_stream.h +++ b/upb/wire/eps_copy_input_stream.h @@ -81,7 +81,6 @@ typedef const char* upb_EpsCopyInputStream_IsDoneFallbackFunc( UPB_INLINE void upb_EpsCopyInputStream_Init(upb_EpsCopyInputStream* e, const char** ptr, size_t size, bool enable_aliasing) { - bool ret; if (size <= kUpb_EpsCopyInputStream_SlopBytes) { memset(&e->patch, 0, 32); if (size) memcpy(&e->patch, *ptr, size); @@ -90,13 +89,11 @@ UPB_INLINE void upb_EpsCopyInputStream_Init(upb_EpsCopyInputStream* e, *ptr = e->patch; e->end = *ptr + size; e->limit = 0; - ret = true; } else { e->end = *ptr + size - kUpb_EpsCopyInputStream_SlopBytes; e->limit = kUpb_EpsCopyInputStream_SlopBytes; e->aliasing = enable_aliasing ? kUpb_EpsCopyInputStream_NoDelta : kUpb_EpsCopyInputStream_NoAliasing; - ret = false; } e->limit_ptr = e->end; e->error = false; @@ -148,6 +145,7 @@ UPB_INLINE bool upb_EpsCopyInputStream_IsDoneWithCallback( *ptr = func(e, *ptr, overrun); return *ptr == NULL; } + UPB_UNREACHABLE(); } const char* _upb_EpsCopyInputStream_IsDoneFallbackNoCallback( From 7cd8f6c9403cbbbf66fa259cd0f0ce7436bf6227 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Thu, 29 Dec 2022 20:54:31 -0800 Subject: [PATCH 19/21] Ported more cases of wire format parsing to upb_WireReader. PiperOrigin-RevId: 498502557 --- BUILD | 1 + python/BUILD | 2 + python/unknown_fields.c | 169 +++++++++++-------------------- upb/message/accessors.c | 4 +- upb/util/BUILD | 3 +- upb/util/compare.c | 61 ++++------- upb/wire/decode.c | 31 ++---- upb/wire/eps_copy_input_stream.h | 18 +++- upb/wire/reader.c | 8 +- upb/wire/reader.h | 44 +++++--- 10 files changed, 144 insertions(+), 197 deletions(-) diff --git a/BUILD b/BUILD index eefb222a62..3289a0640f 100644 --- a/BUILD +++ b/BUILD @@ -951,6 +951,7 @@ cc_library( ":message_internal", ":mini_table_internal", ":port", + ":wire_reader", ":wire_types", "@utf8_range", ], diff --git a/python/BUILD b/python/BUILD index 99b670bd6d..ba0d104bbc 100644 --- a/python/BUILD +++ b/python/BUILD @@ -221,12 +221,14 @@ py_extension( deps = [ "//:collections", "//:descriptor_upb_proto_reflection", + "//:eps_copy_input_stream", "//:hash", "//:port", "//:reflection", "//:textformat", "//:upb", "//:wire", + "//:wire_reader", "//:wire_types", "//upb/util:compare", "//upb/util:def_to_proto", diff --git a/python/unknown_fields.c b/python/unknown_fields.c index 5dc96fb723..fd7716d732 100644 --- a/python/unknown_fields.c +++ b/python/unknown_fields.c @@ -29,21 +29,10 @@ #include "python/message.h" #include "python/protobuf.h" +#include "upb/wire/eps_copy_input_stream.h" +#include "upb/wire/reader.h" #include "upb/wire/types.h" -static const char* PyUpb_DecodeVarint(const char* ptr, const char* end, - uint64_t* val) { - *val = 0; - for (int i = 0; ptr < end && i < 10; i++, ptr++) { - uint64_t byte = (uint8_t)*ptr; - *val |= (byte & 0x7f) << (i * 7); - if ((byte & 0x80) == 0) { - return ptr + 1; - } - } - return NULL; -} - // ----------------------------------------------------------------------------- // UnknownFieldSet // ----------------------------------------------------------------------------- @@ -66,60 +55,6 @@ PyUpb_UnknownFieldSet* PyUpb_UnknownFieldSet_NewBare(void) { return self; } -// Generic functions to skip a value or group. - -static const char* PyUpb_UnknownFieldSet_SkipGroup(const char* ptr, - const char* end, - int group_number); - -static const char* PyUpb_UnknownFieldSet_SkipField(const char* ptr, - const char* end, - uint32_t tag) { - int field_number = tag >> 3; - int wire_type = tag & 7; - switch (wire_type) { - case kUpb_WireType_Varint: { - uint64_t val; - return PyUpb_DecodeVarint(ptr, end, &val); - } - case kUpb_WireType_64Bit: - if (end - ptr < 8) return NULL; - return ptr + 8; - case kUpb_WireType_32Bit: - if (end - ptr < 4) return NULL; - return ptr + 4; - case kUpb_WireType_Delimited: { - uint64_t size; - ptr = PyUpb_DecodeVarint(ptr, end, &size); - if (!ptr || end - ptr < size) return NULL; - return ptr + size; - } - case kUpb_WireType_StartGroup: - return PyUpb_UnknownFieldSet_SkipGroup(ptr, end, field_number); - case kUpb_WireType_EndGroup: - return NULL; - default: - assert(0); - return NULL; - } -} - -static const char* PyUpb_UnknownFieldSet_SkipGroup(const char* ptr, - const char* end, - int group_number) { - uint32_t end_tag = (group_number << 3) | kUpb_WireType_EndGroup; - while (true) { - if (ptr == end) return NULL; - uint64_t tag; - ptr = PyUpb_DecodeVarint(ptr, end, &tag); - if (!ptr) return NULL; - if (tag == end_tag) return ptr; - ptr = PyUpb_UnknownFieldSet_SkipField(ptr, end, tag); - if (!ptr) return NULL; - } - return ptr; -} - // For MessageSet the established behavior is for UnknownFieldSet to interpret // the MessageSet wire format: // message MessageSet { @@ -143,40 +78,43 @@ enum { }; static const char* PyUpb_UnknownFieldSet_BuildMessageSetItem( - PyUpb_UnknownFieldSet* self, const char* ptr, const char* end) { + PyUpb_UnknownFieldSet* self, upb_EpsCopyInputStream* stream, + const char* ptr) { PyUpb_ModuleState* s = PyUpb_ModuleState_Get(); int type_id = 0; PyObject* msg = NULL; - while (true) { - if (ptr == end) goto err; - uint64_t tag; - ptr = PyUpb_DecodeVarint(ptr, end, &tag); + while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) { + uint32_t tag; + ptr = upb_WireReader_ReadTag(ptr, &tag); if (!ptr) goto err; switch (tag) { case kUpb_MessageSet_EndItemTag: goto done; case kUpb_MessageSet_TypeIdTag: { uint64_t tmp; - ptr = PyUpb_DecodeVarint(ptr, end, &tmp); + ptr = upb_WireReader_ReadVarint(ptr, &tmp); if (!ptr) goto err; if (!type_id) type_id = tmp; break; } case kUpb_MessageSet_MessageTag: { - uint64_t size; - ptr = PyUpb_DecodeVarint(ptr, end, &size); - if (!ptr || end - ptr < size) goto err; + int size; + ptr = upb_WireReader_ReadSize(ptr, &size); + if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(stream, ptr, size)) { + goto err; + } + const char* str = ptr; + ptr = upb_EpsCopyInputStream_ReadStringAliased(stream, &str, size); if (!msg) { - msg = PyBytes_FromStringAndSize(ptr, size); + msg = PyBytes_FromStringAndSize(str, size); if (!msg) goto err; } else { // already saw a message here so deliberately skipping the duplicate } - ptr += size; break; } default: - ptr = PyUpb_UnknownFieldSet_SkipField(ptr, end, tag); + ptr = upb_WireReader_SkipValue(ptr, tag, stream); if (!ptr) goto err; } } @@ -198,19 +136,21 @@ err: } static const char* PyUpb_UnknownFieldSet_BuildMessageSet( - PyUpb_UnknownFieldSet* self, const char* ptr, const char* end) { + PyUpb_UnknownFieldSet* self, upb_EpsCopyInputStream* stream, + const char* ptr) { self->fields = PyList_New(0); - while (ptr < end) { - uint64_t tag; - ptr = PyUpb_DecodeVarint(ptr, end, &tag); + while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) { + uint32_t tag; + ptr = upb_WireReader_ReadTag(ptr, &tag); if (!ptr) goto err; if (tag == kUpb_MessageSet_StartItemTag) { - ptr = PyUpb_UnknownFieldSet_BuildMessageSetItem(self, ptr, end); + ptr = PyUpb_UnknownFieldSet_BuildMessageSetItem(self, stream, ptr); } else { - ptr = PyUpb_UnknownFieldSet_SkipField(ptr, end, tag); + ptr = upb_WireReader_SkipValue(ptr, tag, stream); } if (!ptr) goto err; } + if (upb_EpsCopyInputStream_IsError(stream)) goto err; return ptr; err: @@ -220,46 +160,50 @@ err: } static const char* PyUpb_UnknownFieldSet_Build(PyUpb_UnknownFieldSet* self, - const char* ptr, const char* end, + upb_EpsCopyInputStream* stream, + const char* ptr, int group_number); static const char* PyUpb_UnknownFieldSet_BuildValue( - PyUpb_UnknownFieldSet* self, const char* ptr, const char* end, - int field_number, int wire_type, int group_number, PyObject** data) { + PyUpb_UnknownFieldSet* self, upb_EpsCopyInputStream* stream, + const char* ptr, int field_number, int wire_type, int group_number, + PyObject** data) { switch (wire_type) { case kUpb_WireType_Varint: { uint64_t val; - ptr = PyUpb_DecodeVarint(ptr, end, &val); + ptr = upb_WireReader_ReadVarint(ptr, &val); if (!ptr) return NULL; *data = PyLong_FromUnsignedLongLong(val); return ptr; } case kUpb_WireType_64Bit: { - if (end - ptr < 8) return NULL; uint64_t val; - memcpy(&val, ptr, 8); + ptr = upb_WireReader_ReadFixed64(ptr, &val); *data = PyLong_FromUnsignedLongLong(val); - return ptr + 8; + return ptr; } case kUpb_WireType_32Bit: { - if (end - ptr < 4) return NULL; uint32_t val; - memcpy(&val, ptr, 4); + ptr = upb_WireReader_ReadFixed32(ptr, &val); *data = PyLong_FromUnsignedLongLong(val); - return ptr + 4; + return ptr; } case kUpb_WireType_Delimited: { - uint64_t size; - ptr = PyUpb_DecodeVarint(ptr, end, &size); - if (!ptr || end - ptr < size) return NULL; - *data = PyBytes_FromStringAndSize(ptr, size); - return ptr + size; + int size; + ptr = upb_WireReader_ReadSize(ptr, &size); + if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(stream, ptr, size)) { + return NULL; + } + const char* str = ptr; + ptr = upb_EpsCopyInputStream_ReadStringAliased(stream, &str, size); + *data = PyBytes_FromStringAndSize(str, size); + return ptr; } case kUpb_WireType_StartGroup: { PyUpb_UnknownFieldSet* sub = PyUpb_UnknownFieldSet_NewBare(); if (!sub) return NULL; *data = &sub->ob_base; - return PyUpb_UnknownFieldSet_Build(sub, ptr, end, field_number); + return PyUpb_UnknownFieldSet_Build(sub, stream, ptr, field_number); } default: assert(0); @@ -271,22 +215,23 @@ static const char* PyUpb_UnknownFieldSet_BuildValue( // For non-MessageSet we just build the unknown fields exactly as they exist on // the wire. static const char* PyUpb_UnknownFieldSet_Build(PyUpb_UnknownFieldSet* self, - const char* ptr, const char* end, + upb_EpsCopyInputStream* stream, + const char* ptr, int group_number) { PyUpb_ModuleState* s = PyUpb_ModuleState_Get(); self->fields = PyList_New(0); - while (ptr < end) { - uint64_t tag; - ptr = PyUpb_DecodeVarint(ptr, end, &tag); + while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) { + uint32_t tag; + ptr = upb_WireReader_ReadTag(ptr, &tag); if (!ptr) goto err; PyObject* data = NULL; - int field_number = tag >> 3; - int wire_type = tag & 7; + int field_number = upb_WireReader_GetFieldNumber(tag); + int wire_type = upb_WireReader_GetWireType(tag); if (wire_type == kUpb_WireType_EndGroup) { if (field_number != group_number) return NULL; return ptr; } - ptr = PyUpb_UnknownFieldSet_BuildValue(self, ptr, end, field_number, + ptr = PyUpb_UnknownFieldSet_BuildValue(self, stream, ptr, field_number, wire_type, group_number, &data); if (!ptr) { Py_XDECREF(data); @@ -298,6 +243,7 @@ static const char* PyUpb_UnknownFieldSet_Build(PyUpb_UnknownFieldSet* self, PyList_Append(self->fields, field); Py_DECREF(field); } + if (upb_EpsCopyInputStream_IsError(stream)) goto err; return ptr; err: @@ -324,14 +270,15 @@ static PyObject* PyUpb_UnknownFieldSet_New(PyTypeObject* type, PyObject* args, const char* ptr = upb_Message_GetUnknown(msg, &size); if (size == 0) return &self->ob_base; - const char* end = ptr + size; + upb_EpsCopyInputStream stream; + upb_EpsCopyInputStream_Init(&stream, &ptr, size, true); const upb_MessageDef* msgdef = PyUpb_Message_GetMsgdef(py_msg); bool ok; if (upb_MessageDef_IsMessageSet(msgdef)) { - ok = PyUpb_UnknownFieldSet_BuildMessageSet(self, ptr, end) == end; + ok = PyUpb_UnknownFieldSet_BuildMessageSet(self, &stream, ptr) != NULL; } else { - ok = PyUpb_UnknownFieldSet_Build(self, ptr, end, -1) == end; + ok = PyUpb_UnknownFieldSet_Build(self, &stream, ptr, -1) != NULL; } if (!ok) { diff --git a/upb/message/accessors.c b/upb/message/accessors.c index 815d66cbe3..8196c79bfc 100644 --- a/upb/message/accessors.c +++ b/upb/message/accessors.c @@ -169,14 +169,14 @@ upb_FindUnknownRet upb_MiniTable_FindUnknown(const upb_Message* msg, if (field_number == upb_WireReader_GetFieldNumber(tag)) { ret.status = kUpb_FindUnknown_Ok; ret.ptr = upb_EpsCopyInputStream_GetAliasedPtr(&stream, unknown_begin); - ptr = upb_WireReader_SkipValue(ptr, tag, depth_limit, &stream); + ptr = _upb_WireReader_SkipValue(ptr, tag, depth_limit, &stream); // Because we know that the input is a flat buffer, it is safe to perform // pointer arithmetic on aliased pointers. ret.len = upb_EpsCopyInputStream_GetAliasedPtr(&stream, ptr) - ret.ptr; return ret; } - ptr = upb_WireReader_SkipValue(ptr, tag, depth_limit, &stream); + ptr = _upb_WireReader_SkipValue(ptr, tag, depth_limit, &stream); if (!ptr) return upb_FindUnknownRet_ParseError(); } ret.status = kUpb_FindUnknown_NotPresent; diff --git a/upb/util/BUILD b/upb/util/BUILD index 983daf3e58..50417daee3 100644 --- a/upb/util/BUILD +++ b/upb/util/BUILD @@ -107,9 +107,10 @@ cc_library( hdrs = ["compare.h"], visibility = ["//visibility:public"], deps = [ + "//:eps_copy_input_stream", "//:port", "//:reflection", - "//:wire", + "//:wire_reader", "//:wire_types", ], ) diff --git a/upb/util/compare.c b/upb/util/compare.c index 60fd5c12e2..4215411594 100644 --- a/upb/util/compare.c +++ b/upb/util/compare.c @@ -27,6 +27,8 @@ #include "upb/util/compare.h" +#include "upb/wire/eps_copy_input_stream.h" +#include "upb/wire/reader.h" #include "upb/wire/types.h" // Must be last. @@ -53,7 +55,7 @@ struct upb_UnknownFields { }; typedef struct { - const char* end; + upb_EpsCopyInputStream stream; upb_Arena* arena; upb_UnknownField* tmp; size_t tmp_size; @@ -76,25 +78,6 @@ static void upb_UnknownFields_Grow(upb_UnknownField_Context* ctx, *end = *base + new; } -static const char* upb_UnknownFields_ParseVarint(const char* ptr, - const char* limit, - uint64_t* val) { - uint8_t byte; - int bitpos = 0; - *val = 0; - - do { - // Unknown field data must be valid. - UPB_ASSERT(bitpos < 70 && ptr < limit); - byte = *ptr; - *val |= (uint64_t)(byte & 0x7F) << bitpos; - ptr++; - bitpos += 7; - } while (byte & 0x80); - - return ptr; -} - // We have to implement our own sort here, since qsort() is not an in-order // sort. Here we use merge sort, the simplest in-order sort. static void upb_UnknownFields_Merge(upb_UnknownField* arr, size_t start, @@ -151,11 +134,11 @@ static upb_UnknownFields* upb_UnknownFields_DoBuild( const char* ptr = *buf; uint32_t last_tag = 0; bool sorted = true; - while (ptr < ctx->end) { - uint64_t tag; - ptr = upb_UnknownFields_ParseVarint(ptr, ctx->end, &tag); + while (!upb_EpsCopyInputStream_IsDone(&ctx->stream, &ptr)) { + uint32_t tag; + ptr = upb_WireReader_ReadTag(ptr, &tag); UPB_ASSERT(tag <= UINT32_MAX); - int wire_type = tag & 7; + int wire_type = upb_WireReader_GetWireType(tag); if (wire_type == kUpb_WireType_EndGroup) break; if (tag < last_tag) sorted = false; last_tag = tag; @@ -169,25 +152,22 @@ static upb_UnknownFields* upb_UnknownFields_DoBuild( switch (wire_type) { case kUpb_WireType_Varint: - ptr = upb_UnknownFields_ParseVarint(ptr, ctx->end, &field->data.varint); + ptr = upb_WireReader_ReadVarint(ptr, &field->data.varint); break; case kUpb_WireType_64Bit: - UPB_ASSERT(ctx->end - ptr >= 8); - memcpy(&field->data.uint64, ptr, 8); - ptr += 8; + ptr = upb_WireReader_ReadFixed64(ptr, &field->data.uint64); break; case kUpb_WireType_32Bit: - UPB_ASSERT(ctx->end - ptr >= 4); - memcpy(&field->data.uint32, ptr, 4); - ptr += 4; + ptr = upb_WireReader_ReadFixed32(ptr, &field->data.uint32); break; case kUpb_WireType_Delimited: { - uint64_t size; - ptr = upb_UnknownFields_ParseVarint(ptr, ctx->end, &size); - UPB_ASSERT(ctx->end - ptr >= size); - field->data.delimited.data = ptr; + int size; + ptr = upb_WireReader_ReadSize(ptr, &size); + const char* s_ptr = ptr; + ptr = upb_EpsCopyInputStream_ReadStringAliased(&ctx->stream, &s_ptr, + size); + field->data.delimited.data = s_ptr; field->data.delimited.size = size; - ptr += size; break; } case kUpb_WireType_StartGroup: @@ -216,11 +196,12 @@ static upb_UnknownFields* upb_UnknownFields_DoBuild( // Builds a upb_UnknownFields data structure from the binary data in buf. static upb_UnknownFields* upb_UnknownFields_Build(upb_UnknownField_Context* ctx, - const char* buf, + const char* ptr, size_t size) { - ctx->end = buf + size; - upb_UnknownFields* fields = upb_UnknownFields_DoBuild(ctx, &buf); - UPB_ASSERT(buf == ctx->end); + upb_EpsCopyInputStream_Init(&ctx->stream, &ptr, size, true); + upb_UnknownFields* fields = upb_UnknownFields_DoBuild(ctx, &ptr); + UPB_ASSERT(upb_EpsCopyInputStream_IsDone(&ctx->stream, &ptr) && + !upb_EpsCopyInputStream_IsError(&ctx->stream)); return fields; } diff --git a/upb/wire/decode.c b/upb/wire/decode.c index e4c78bc307..6ef1a7aa25 100644 --- a/upb/wire/decode.c +++ b/upb/wire/decode.c @@ -35,6 +35,7 @@ #include "upb/wire/common_internal.h" #include "upb/wire/decode_internal.h" #include "upb/wire/eps_copy_input_stream.h" +#include "upb/wire/reader.h" #include "upb/wire/swap_internal.h" #include "upb/wire/types.h" @@ -370,27 +371,21 @@ static const char* _upb_Decoder_DecodeFixedPacked( // Note: if/when the decoder supports multi-buffer input, we will need to // handle buffer seams here. if (_upb_IsLittleEndian()) { - memcpy(mem, ptr, val->size); - ptr += val->size; + ptr = upb_EpsCopyInputStream_Copy(&d->input, ptr, mem, val->size); } else { - const char* end = ptr + val->size; + int delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, val->size); char* dst = mem; - while (ptr < end) { + while (!_upb_Decoder_IsDone(d, &ptr)) { if (lg2 == 2) { - uint32_t val; - memcpy(&val, ptr, sizeof(val)); - val = _upb_BigEndian_Swap32(val); - memcpy(dst, &val, sizeof(val)); + ptr = upb_WireReader_ReadFixed32(ptr, dst); + dst += 4; } else { UPB_ASSERT(lg2 == 3); - uint64_t val; - memcpy(&val, ptr, sizeof(val)); - val = _upb_BigEndian_Swap64(val); - memcpy(dst, &val, sizeof(val)); + ptr = upb_WireReader_ReadFixed64(ptr, dst); + dst += 8; } - ptr += 1 << lg2; - dst += 1 << lg2; } + upb_EpsCopyInputStream_PopLimit(&d->input, ptr, delta); } return ptr; @@ -1027,21 +1022,17 @@ static const char* _upb_Decoder_DecodeWireValue(upb_Decoder* d, const char* ptr, _upb_Decoder_Munge(field->descriptortype, val); return ptr; case kUpb_WireType_32Bit: - memcpy(&val->uint32_val, ptr, 4); - val->uint32_val = _upb_BigEndian_Swap32(val->uint32_val); *op = kUpb_DecodeOp_Scalar4Byte; if (((1 << field->descriptortype) & kFixed32OkMask) == 0) { *op = kUpb_DecodeOp_UnknownField; } - return ptr + 4; + return upb_WireReader_ReadFixed32(ptr, &val->uint32_val); case kUpb_WireType_64Bit: - memcpy(&val->uint64_val, ptr, 8); - val->uint64_val = _upb_BigEndian_Swap64(val->uint64_val); *op = kUpb_DecodeOp_Scalar8Byte; if (((1 << field->descriptortype) & kFixed64OkMask) == 0) { *op = kUpb_DecodeOp_UnknownField; } - return ptr + 8; + return upb_WireReader_ReadFixed64(ptr, &val->uint64_val); case kUpb_WireType_Delimited: ptr = upb_Decoder_DecodeSize(d, ptr, &val->size); *op = _upb_Decoder_GetDelimitedOp(mt, field); diff --git a/upb/wire/eps_copy_input_stream.h b/upb/wire/eps_copy_input_stream.h index eb172f28df..35d66162f4 100644 --- a/upb/wire/eps_copy_input_stream.h +++ b/upb/wire/eps_copy_input_stream.h @@ -281,14 +281,24 @@ UPB_INLINE const char* upb_EpsCopyInputStream_ReadStringAliased( return ret; } -// Skips `size` bytes of string data from the input and returns a pointer past -// the end. Returns NULL on end of stream or error. +// Skips `size` bytes of data from the input and returns a pointer past the end. +// Returns NULL on end of stream or error. UPB_INLINE const char* upb_EpsCopyInputStream_Skip(upb_EpsCopyInputStream* e, const char* ptr, int size) { if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size)) return NULL; return ptr + size; } +// Copies `size` bytes of data from the input `ptr` into the buffer `to`, and +// returns a pointer past the end. Returns NULL on end of stream or error. +UPB_INLINE const char* upb_EpsCopyInputStream_Copy(upb_EpsCopyInputStream* e, + const char* ptr, void* to, + int size) { + if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size)) return NULL; + memcpy(to, ptr, size); + return ptr + size; +} + // Reads string data from the stream and advances the pointer accordingly. // If aliasing was enabled when the stream was initialized, then the returned // pointer will point into the input buffer if possible, otherwise new data @@ -309,10 +319,8 @@ UPB_INLINE const char* upb_EpsCopyInputStream_ReadString( UPB_ASSERT(arena); char* data = (char*)upb_Arena_Malloc(arena, size); if (!data) return NULL; - memcpy(data, *ptr, size); - const char* ret = *ptr + size; + const char* ret = upb_EpsCopyInputStream_Copy(e, *ptr, data, size); *ptr = data; - UPB_ASSUME(ret != NULL); return ret; } } diff --git a/upb/wire/reader.c b/upb/wire/reader.c index f5b9b0b0ce..a84fb0b912 100644 --- a/upb/wire/reader.c +++ b/upb/wire/reader.c @@ -50,9 +50,9 @@ _upb_WireReader_ReadLongVarint(const char* ptr, uint64_t val) { return ret; } -const char* upb_WireReader_SkipGroup(const char* ptr, uint32_t tag, - int depth_limit, - upb_EpsCopyInputStream* stream) { +const char* _upb_WireReader_SkipGroup(const char* ptr, uint32_t tag, + int depth_limit, + upb_EpsCopyInputStream* stream) { if (--depth_limit == 0) return NULL; uint32_t end_group_tag = (tag & ~7ULL) | kUpb_WireType_EndGroup; while (!upb_EpsCopyInputStream_IsDone(stream, &ptr)) { @@ -60,7 +60,7 @@ const char* upb_WireReader_SkipGroup(const char* ptr, uint32_t tag, ptr = upb_WireReader_ReadTag(ptr, &tag); if (!ptr) return NULL; if (tag == end_group_tag) return ptr; - ptr = upb_WireReader_SkipValue(ptr, tag, depth_limit, stream); + ptr = _upb_WireReader_SkipValue(ptr, tag, depth_limit, stream); if (!ptr) return NULL; } return ptr; diff --git a/upb/wire/reader.h b/upb/wire/reader.h index 13d6327193..b959744027 100644 --- a/upb/wire/reader.h +++ b/upb/wire/reader.h @@ -157,26 +157,25 @@ UPB_INLINE const char* upb_WireReader_ReadFixed64(const char* ptr, void* val) { return ptr + 8; } +const char* _upb_WireReader_SkipGroup(const char* ptr, uint32_t tag, + int depth_limit, + upb_EpsCopyInputStream* stream); + // Skips data for a group, returning a pointer past the end of the group, or // NULL if there was an error parsing the group. The `tag` argument should be // the start group tag that begins the group. The `depth_limit` argument // indicates how many levels of recursion the group is allowed to have before // reporting a parse error (this limit exists to protect against stack // overflow). -const char* upb_WireReader_SkipGroup(const char* ptr, uint32_t tag, - int depth_limit, - upb_EpsCopyInputStream* stream); - -// Skips data for a wire value of any type, returning a pointer past the end of -// the data, or NULL if there was an error parsing the group. The `tag` argument -// should be the tag that was just parsed. The `depth_limit` argument indicates -// how many levels of recursion a group is allowed to have before reporting a -// parse error (this limit exists to protect against stack overflow). // -// REQUIRES: there must be at least 10 bytes of data available at `ptr`. -// Bounds checks must be performed before calling this function, preferably -// by calling upb_EpsCopyInputStream_IsDone(). -UPB_INLINE const char* upb_WireReader_SkipValue( +// TODO: evaluate how the depth_limit should be specified. Do users need +// control over this? +UPB_INLINE const char* upb_WireReader_SkipGroup( + const char* ptr, uint32_t tag, upb_EpsCopyInputStream* stream) { + return _upb_WireReader_SkipGroup(ptr, tag, 100, stream); +} + +UPB_INLINE const char* _upb_WireReader_SkipValue( const char* ptr, uint32_t tag, int depth_limit, upb_EpsCopyInputStream* stream) { switch (upb_WireReader_GetWireType(tag)) { @@ -194,7 +193,7 @@ UPB_INLINE const char* upb_WireReader_SkipValue( return ptr; } case kUpb_WireType_StartGroup: - return upb_WireReader_SkipGroup(ptr, tag, depth_limit, stream); + return _upb_WireReader_SkipGroup(ptr, tag, depth_limit, stream); case kUpb_WireType_EndGroup: return NULL; // Should be handled before now. default: @@ -202,6 +201,23 @@ UPB_INLINE const char* upb_WireReader_SkipValue( } } +// Skips data for a wire value of any type, returning a pointer past the end of +// the data, or NULL if there was an error parsing the group. The `tag` argument +// should be the tag that was just parsed. The `depth_limit` argument indicates +// how many levels of recursion a group is allowed to have before reporting a +// parse error (this limit exists to protect against stack overflow). +// +// REQUIRES: there must be at least 10 bytes of data available at `ptr`. +// Bounds checks must be performed before calling this function, preferably +// by calling upb_EpsCopyInputStream_IsDone(). +// +// TODO: evaluate how the depth_limit should be specified. Do users need +// control over this? +UPB_INLINE const char* upb_WireReader_SkipValue( + const char* ptr, uint32_t tag, upb_EpsCopyInputStream* stream) { + return _upb_WireReader_SkipValue(ptr, tag, 100, stream); +} + #ifdef __cplusplus } /* extern "C" */ #endif From 143132fa27288f47c49c15963c9a280d7ac2dc01 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 3 Jan 2023 16:20:37 -0800 Subject: [PATCH 20/21] Make upb's generated code agnostic to fasttable. This simplifies the code generation by making output agnostic to whether fasttables will be used or not. This grows the generated code in the common case, but when fasttables are not being used the preprocessor will strip away the unused tables. PiperOrigin-RevId: 499340805 --- BUILD | 4 ++-- bazel/upb_proto_library.bzl | 28 +--------------------------- cmake/make_cmakelists.py | 3 +++ upb/port/def.inc | 4 ++-- upbc/protoc-gen-upb.cc | 29 ++++++++++------------------- 5 files changed, 18 insertions(+), 50 deletions(-) diff --git a/BUILD b/BUILD index 3289a0640f..c2b4103684 100644 --- a/BUILD +++ b/BUILD @@ -31,11 +31,11 @@ load( ) load( "//bazel:upb_proto_library.bzl", - "upb_fasttable_enabled", "upb_proto_library", "upb_proto_library_copts", "upb_proto_reflection_library", ) +load("@bazel_skylib//rules:common_settings.bzl", "bool_flag") # begin:google_only # load( @@ -69,7 +69,7 @@ config_setting( visibility = ["//visibility:public"], ) -upb_fasttable_enabled( +bool_flag( name = "fasttable_enabled", build_setting_default = False, visibility = ["//visibility:public"], diff --git a/bazel/upb_proto_library.bzl b/bazel/upb_proto_library.bzl index e7ebb72d7f..88ef5a605f 100644 --- a/bazel/upb_proto_library.bzl +++ b/bazel/upb_proto_library.bzl @@ -167,29 +167,6 @@ def _cc_library_func(ctx, name, hdrs, srcs, copts, includes, dep_ccinfos): linking_context = linking_context, ) -# Build setting for whether fasttable code generation is enabled ############### - -_FastTableEnabledInfo = provider( - "Provides fasttable configuration", - fields = { - "enabled": "whether fasttable is enabled", - }, -) - -def fasttable_enabled_impl(ctx): - raw_setting = ctx.build_setting_value - - if raw_setting: - # TODO(haberman): check that the target CPU supports fasttable. - pass - - return _FastTableEnabledInfo(enabled = raw_setting) - -upb_fasttable_enabled = rule( - implementation = fasttable_enabled_impl, - build_setting = config.bool(flag = True), -) - # Dummy rule to expose select() copts to aspects ############################## UpbProtoLibraryCoptsInfo = provider( @@ -235,9 +212,6 @@ def _compile_upb_protos(ctx, generator, proto_info, proto_sources): srcs = [_generate_output_file(ctx, name, ext + ".c") for name in proto_sources] hdrs = [_generate_output_file(ctx, name, ext + ".h") for name in proto_sources] transitive_sets = proto_info.transitive_descriptor_sets.to_list() - fasttable_enabled = (hasattr(ctx.attr, "_fasttable_enabled") and - ctx.attr._fasttable_enabled[_FastTableEnabledInfo].enabled) - codegen_params = "fasttable:" if fasttable_enabled else "" ctx.actions.run( inputs = depset( direct = [proto_info.direct_descriptor_set], @@ -247,7 +221,7 @@ def _compile_upb_protos(ctx, generator, proto_info, proto_sources): outputs = srcs + hdrs, executable = ctx.executable._protoc, arguments = [ - "--" + generator + "_out=" + codegen_params + _get_real_root(ctx, srcs[0]), + "--" + generator + "_out=" + _get_real_root(ctx, srcs[0]), "--plugin=protoc-gen-" + generator + "=" + tool.path, "--descriptor_set_in=" + ctx.configuration.host_path_separator.join([f.path for f in transitive_sets]), ] + diff --git a/cmake/make_cmakelists.py b/cmake/make_cmakelists.py index 47bf85b738..6d2c374ee1 100755 --- a/cmake/make_cmakelists.py +++ b/cmake/make_cmakelists.py @@ -197,6 +197,9 @@ class BuildFileFunctions(object): def package_group(self, **kwargs): pass + def bool_flag(self, **kwargs): + pass + class WorkspaceFileFunctions(object): def __init__(self, converter): diff --git a/upb/port/def.inc b/upb/port/def.inc index b507b18769..b23b406992 100644 --- a/upb/port/def.inc +++ b/upb/port/def.inc @@ -243,8 +243,8 @@ #endif /* UPB_FASTTABLE_INIT() allows protos compiled for fasttable to gracefully - * degrade to non-fasttable if we are using UPB_TRY_ENABLE_FASTTABLE. */ -#if !UPB_FASTTABLE && defined(UPB_TRY_ENABLE_FASTTABLE) + * degrade to non-fasttable if the runtime or platform do not support it. */ +#if !UPB_FASTTABLE #define UPB_FASTTABLE_INIT(...) #else #define UPB_FASTTABLE_INIT(...) __VA_ARGS__ diff --git a/upbc/protoc-gen-upb.cc b/upbc/protoc-gen-upb.cc index 4c27f53233..3c3987d62f 100644 --- a/upbc/protoc-gen-upb.cc +++ b/upbc/protoc-gen-upb.cc @@ -1182,7 +1182,7 @@ void WriteMessageField(const upb_MiniTableField* field64, // Writes a single message into a .upb.c source file. void WriteMessage(const protobuf::Descriptor* message, const FileLayout& layout, - Output& output, bool fasttable_enabled) { + Output& output) { std::string msg_name = ToCIdent(message->full_name()); std::string fields_array_ref = "NULL"; std::string submsgs_array_ref = "NULL"; @@ -1225,9 +1225,7 @@ void WriteMessage(const protobuf::Descriptor* message, const FileLayout& layout, std::vector table; uint8_t table_mask = -1; - if (fasttable_enabled) { - table = FastDecodeTable(message, layout); - } + table = FastDecodeTable(message, layout); if (table.size() > 1) { assert((table.size() & (table.size() - 1)) == 0); @@ -1256,7 +1254,7 @@ void WriteMessage(const protobuf::Descriptor* message, const FileLayout& layout, output(" {0x$1, &$0},\n", ent.first, absl::StrCat(absl::Hex(ent.second, absl::kZeroPad16))); } - output(" }),\n"); + output(" })\n"); } output("};\n\n"); } @@ -1310,15 +1308,14 @@ int WriteEnums(const FileLayout& layout, Output& output) { return this_file_enums.size(); } -int WriteMessages(const FileLayout& layout, Output& output, - bool fasttable_enabled) { +int WriteMessages(const FileLayout& layout, Output& output) { const protobuf::FileDescriptor* file = layout.descriptor(); std::vector file_messages = SortedMessages(file); if (file_messages.empty()) return 0; for (auto message : file_messages) { - WriteMessage(message, layout, output, fasttable_enabled); + WriteMessage(message, layout, output); } output("static const upb_MiniTable *$0[$1] = {\n", kMessagesInit, @@ -1383,8 +1380,7 @@ int WriteExtensions(const FileLayout& layout, Output& output) { } // Writes a .upb.cc source file. -void WriteSource(const FileLayout& layout, Output& output, - bool fasttable_enabled) { +void WriteSource(const FileLayout& layout, Output& output) { const protobuf::FileDescriptor* file = layout.descriptor(); EmitFileWarning(file, output); @@ -1405,7 +1401,7 @@ void WriteSource(const FileLayout& layout, Output& output, "#include \"upb/port/def.inc\"\n" "\n"); - int msg_count = WriteMessages(layout, output, fasttable_enabled); + int msg_count = WriteMessages(layout, output); int ext_count = WriteExtensions(layout, output); int enum_count = WriteEnums(layout, output); @@ -1436,17 +1432,12 @@ bool Generator::Generate(const protobuf::FileDescriptor* file, const std::string& parameter, protoc::GeneratorContext* context, std::string* error) const { - bool fasttable_enabled = false; std::vector> params; google::protobuf::compiler::ParseGeneratorParameter(parameter, ¶ms); for (const auto& pair : params) { - if (pair.first == "fasttable") { - fasttable_enabled = true; - } else { - *error = "Unknown parameter: " + pair.first; - return false; - } + *error = "Unknown parameter: " + pair.first; + return false; } FileLayout layout(file); @@ -1459,7 +1450,7 @@ bool Generator::Generate(const protobuf::FileDescriptor* file, std::unique_ptr c_output_stream( context->Open(SourceFilename(file))); Output c_output(c_output_stream.get()); - WriteSource(layout, c_output, fasttable_enabled); + WriteSource(layout, c_output); return true; } From c628e53dde6f6d2a66120d74f00c942ca27f31db Mon Sep 17 00:00:00 2001 From: Eric Salo Date: Tue, 3 Jan 2023 19:48:26 -0800 Subject: [PATCH 21/21] upb_MiniTable_GetMutableMap() -> upb_Message_GetOrCreateMutableMap() PiperOrigin-RevId: 499371815 --- upb/message/accessors.c | 4 ++-- upb/message/accessors.h | 7 +++---- upb/message/accessors_test.cc | 4 ++-- upbc/protoc-gen-upb.cc | 2 +- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/upb/message/accessors.c b/upb/message/accessors.c index 8196c79bfc..43a79b9a9a 100644 --- a/upb/message/accessors.c +++ b/upb/message/accessors.c @@ -313,8 +313,8 @@ upb_UnknownToMessage_Status upb_MiniTable_PromoteUnknownToMap( /* base_message= */ NULL, decode_options, arena); if (ret.status != kUpb_UnknownToMessage_Ok) return ret.status; // Allocate map on demand before append. - upb_Map* map = - upb_MiniTable_GetMutableMap(msg, map_entry_mini_table, field, arena); + upb_Map* map = upb_Message_GetOrCreateMutableMap(msg, map_entry_mini_table, + field, arena); upb_Message* map_entry_message = ret.message; upb_MapInsertStatus insert_status = upb_Message_InsertMapEntry( map, mini_table, field, map_entry_message, arena); diff --git a/upb/message/accessors.h b/upb/message/accessors.h index 215f3558f3..9a26fd9f63 100644 --- a/upb/message/accessors.h +++ b/upb/message/accessors.h @@ -294,7 +294,7 @@ UPB_INLINE void _upb_Message_ClearNonExtensionField( field); } -UPB_INLINE upb_Map* _upb_MiniTable_GetOrCreateMutableMap( +UPB_INLINE upb_Map* _upb_Message_GetOrCreateMutableMap( upb_Message* msg, const upb_MiniTableField* field, size_t key_size, size_t val_size, upb_Arena* arena) { _upb_MiniTableField_CheckIsMap(field); @@ -630,8 +630,7 @@ UPB_API_INLINE const upb_Map* upb_Message_GetMap( return ret; } -// TODO: rename to GetOrCreateMutableMap -UPB_API_INLINE upb_Map* upb_MiniTable_GetMutableMap( +UPB_API_INLINE upb_Map* upb_Message_GetOrCreateMutableMap( upb_Message* msg, const upb_MiniTable* map_entry_mini_table, const upb_MiniTableField* field, upb_Arena* arena) { UPB_ASSERT(field->descriptortype == kUpb_FieldType_Message || @@ -640,7 +639,7 @@ UPB_API_INLINE upb_Map* upb_MiniTable_GetMutableMap( &map_entry_mini_table->fields[0]; const upb_MiniTableField* map_entry_value_field = &map_entry_mini_table->fields[1]; - return _upb_MiniTable_GetOrCreateMutableMap( + return _upb_Message_GetOrCreateMutableMap( msg, field, _upb_Map_CTypeSize(upb_MiniTableField_CType(map_entry_key_field)), _upb_Map_CTypeSize(upb_MiniTableField_CType(map_entry_value_field)), diff --git a/upb/message/accessors_test.cc b/upb/message/accessors_test.cc index a4c21a5ae9..edb9cd8a3a 100644 --- a/upb/message/accessors_test.cc +++ b/upb/message/accessors_test.cc @@ -756,8 +756,8 @@ TEST(GeneratedCode, PromoteUnknownToMap) { decode_options, arena); EXPECT_EQ(promote_result, kUpb_UnknownToMessage_Ok); - upb_Map* map = upb_MiniTable_GetMutableMap(msg, map_entry_mini_table, - &mini_table->fields[1], arena); + upb_Map* map = upb_Message_GetOrCreateMutableMap( + msg, map_entry_mini_table, &mini_table->fields[1], arena); EXPECT_NE(map, nullptr); // Lookup in map. upb_MessageValue key; diff --git a/upbc/protoc-gen-upb.cc b/upbc/protoc-gen-upb.cc index 3c3987d62f..c20d00210d 100644 --- a/upbc/protoc-gen-upb.cc +++ b/upbc/protoc-gen-upb.cc @@ -559,7 +559,7 @@ void GenerateMapSetters(const protobuf::FieldDescriptor* field, R"cc( UPB_INLINE bool $0_$1_set($0* msg, $2 key, $3 val, upb_Arena* a) { const upb_MiniTableField field = $4; - upb_Map* map = _upb_MiniTable_GetOrCreateMutableMap(msg, &field, $5, $6, a); + upb_Map* map = _upb_Message_GetOrCreateMutableMap(msg, &field, $5, $6, a); return _upb_Map_Insert(map, &key, $5, &val, $6, a) != kUpb_MapInsertStatus_OutOfMemory; }