diff --git a/.bazelrc b/.bazelrc index a9a31d2421..23492c03de 100644 --- a/.bazelrc +++ b/.bazelrc @@ -7,6 +7,7 @@ build --extra_toolchains=@system_python//:python_toolchain build:m32 --copt=-m32 --linkopt=-m32 build:asan --copt=-fsanitize=address --linkopt=-fsanitize=address +build:msan --copt=-fsanitize=memory --linkopt=-fsanitize=memory # For Valgrind, we have to disable checks of "possible" leaks because the Python # interpreter does the sorts of things that flag Valgrind "possible" leak checks. diff --git a/upb/mini_table.c b/upb/mini_table.c index 834492e7ac..8bcaf7449c 100644 --- a/upb/mini_table.c +++ b/upb/mini_table.c @@ -152,13 +152,23 @@ static char* upb_MtDataEncoder_PutBase92Varint(upb_MtDataEncoder* e, char* ptr, return ptr; } +char* upb_MtDataEncoder_PutModifier(upb_MtDataEncoder* e, char* ptr, + uint64_t mod) { + if (mod) { + ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, mod, + kUpb_EncodedValue_MinModifier, + kUpb_EncodedValue_MaxModifier); + } + return ptr; +} + char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr, uint64_t msg_mod) { upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); in->msg_mod = msg_mod; in->last_field_num = 0; in->oneof_state = kUpb_OneofState_NotStarted; - return ptr; + return upb_MtDataEncoder_PutModifier(e, ptr, msg_mod); } char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr, @@ -218,11 +228,7 @@ char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr, (in->msg_mod & kUpb_MessageModifier_DefaultIsPacked)) { encoded_modifiers |= kUpb_EncodedFieldModifier_IsUnpacked; } - if (encoded_modifiers) { - ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, encoded_modifiers, - kUpb_EncodedValue_MinModifier, - kUpb_EncodedValue_MaxModifier); - } + upb_MtDataEncoder_PutModifier(e, ptr, encoded_modifiers); return ptr; } @@ -421,6 +427,8 @@ static void upb_MiniTable_SetField(upb_MtDecoder* d, uint8_t ch, field->descriptortype = kUpb_EncodedToType[type]; if (upb_MiniTable_HasSub(type, msg_modifiers)) { field->submsg_index = sub_count ? (*sub_count)++ : 0; + } else { + field->submsg_index = 0; } } @@ -687,6 +695,8 @@ static void upb_MtDecoder_AssignHasbits(upb_MiniTable* ret) { upb_MiniTable_Field* field = (upb_MiniTable_Field*)&ret->fields[i]; if (field->offset == kRequiredPresence) { field->presence = ++last_hasbit; + } else if (field->offset == kNoPresence) { + field->presence = 0; } } ret->required_count = last_hasbit; diff --git a/upbc/protoc-gen-upb.cc b/upbc/protoc-gen-upb.cc index 48f4a884c4..c615d37ffa 100644 --- a/upbc/protoc-gen-upb.cc +++ b/upbc/protoc-gen-upb.cc @@ -74,6 +74,41 @@ const char* kEnumsInit = "enums_layout"; const char* kExtensionsInit = "extensions_layout"; const char* kMessagesInit = "messages_layout"; +enum SubTag { + kNull = 0, + kMessage = 1, + kEnum = 2, + kMask = 3, +}; + +upb_MiniTable_Sub PackSub(const char* data, SubTag tag) { + uintptr_t val = reinterpret_cast(data); + assert((val & kMask) == 0); + upb_MiniTable_Sub sub; + sub.submsg = reinterpret_cast(val | tag); + return sub; +} + +bool IsNull(upb_MiniTable_Sub sub) { + return reinterpret_cast(sub.subenum) == 0; +} + +void WriteSub(upb_MiniTable_Sub sub, Output& output) { + uintptr_t as_int = reinterpret_cast(sub.submsg); + const char* str = reinterpret_cast(as_int & ~SubTag::kMask); + switch (as_int & SubTag::kMask) { + case SubTag::kMessage: + output(" {.submsg = &$0},\n", str); + break; + case SubTag::kEnum: + output(" {.subenum = &$0},\n", str); + break; + default: + output(" {.submsg = NULL},\n", str); + break; + } +} + void AddEnums(const protobuf::Descriptor* message, std::vector* enums) { for (int i = 0; i < message->enum_type_count(); i++) { @@ -276,29 +311,6 @@ std::string SizeLg2(const protobuf::FieldDescriptor* field) { } } -std::string SizeRep(const protobuf::FieldDescriptor* field) { - switch (field->cpp_type()) { - case protobuf::FieldDescriptor::CPPTYPE_MESSAGE: - return "kUpb_FieldRep_Pointer"; - case protobuf::FieldDescriptor::CPPTYPE_ENUM: - case protobuf::FieldDescriptor::CPPTYPE_FLOAT: - case protobuf::FieldDescriptor::CPPTYPE_INT32: - case protobuf::FieldDescriptor::CPPTYPE_UINT32: - return "kUpb_FieldRep_4Byte"; - case protobuf::FieldDescriptor::CPPTYPE_BOOL: - return "kUpb_FieldRep_1Byte"; - case protobuf::FieldDescriptor::CPPTYPE_DOUBLE: - case protobuf::FieldDescriptor::CPPTYPE_INT64: - case protobuf::FieldDescriptor::CPPTYPE_UINT64: - return "kUpb_FieldRep_8Byte"; - case protobuf::FieldDescriptor::CPPTYPE_STRING: - return "kUpb_FieldRep_StringView"; - default: - fprintf(stderr, "Unexpected type"); - abort(); - } -} - bool HasNonZeroDefault(const protobuf::FieldDescriptor* field) { switch (field->cpp_type()) { case protobuf::FieldDescriptor::CPPTYPE_MESSAGE: @@ -435,22 +447,32 @@ class FilePlatformLayout { upb_MiniTable* mt = pair.second; for (const auto& f : FieldNumberOrder(pair.first)) { upb_MiniTable_Field* mt_f = FindField(mt, f->number()); - if (f->message_type()) { - SetCStr(&mt->subs[mt_f->submsg_index], MessageInit(f->message_type())); - } else if (mt_f->descriptortype == kUpb_FieldType_Enum) { - SetCStr(&mt->subs[mt_f->submsg_index], EnumInit(f->enum_type())); - } + assert(mt_f); + upb_MiniTable_Sub sub = PackSubForField(f); + if (IsNull(sub)) continue; + *const_cast(&mt->subs[mt_f->submsg_index]) = sub; } } } - void SetCStr(const upb_MiniTable_Sub* sub, const std::string& str) { + upb_MiniTable_Sub PackSubForField(const protobuf::FieldDescriptor* f) { + if (f->message_type()) { + return PackSub(AllocStr(MessageInit(f->message_type())), + SubTag::kMessage); + } else if (f->enum_type() && f->enum_type()->file()->syntax() == + protobuf::FileDescriptor::SYNTAX_PROTO2) { + return PackSub(AllocStr(EnumInit(f->enum_type())), SubTag::kEnum); + } else { + return PackSub(nullptr, SubTag::kNull); + } + } + + const char* AllocStr(const std::string& str) { char* ret = static_cast(upb_Arena_Malloc(arena_.ptr(), str.size() + 1)); memcpy(ret, str.data(), str.size()); ret[str.size()] = '\0'; - upb_MiniTable_Sub* sub_mut = const_cast(sub); - sub_mut->submsg = reinterpret_cast(ret); + return ret; } void BuildExtensions(const protobuf::FileDescriptor* fd) { @@ -480,6 +502,8 @@ class FilePlatformLayout { } } + + upb_MiniTable* MakeMiniTable(const protobuf::Descriptor* m) { if (m->options().message_set_wire_format()) { return upb_MiniTable_BuildMessageSet(platform_, arena_.ptr()); @@ -1088,29 +1112,6 @@ void WriteHeader(const protobuf::FileDescriptor* file, Output& output) { ToPreproc(file->name())); } -int TableDescriptorType(const protobuf::FieldDescriptor* field) { - if (field->file()->syntax() == protobuf::FileDescriptor::SYNTAX_PROTO2 && - field->type() == protobuf::FieldDescriptor::TYPE_STRING) { - // From the perspective of the binary encoder/decoder, proto2 string fields - // are identical to bytes fields. Only in proto3 do we check UTF-8 for - // string fields at parse time. - // - // If we ever use these tables for JSON encoding/decoding (for example by - // embedding field names on the side) we will have to revisit this, because - // string vs. bytes behavior is not affected by proto2 vs proto3. - return protobuf::FieldDescriptor::TYPE_BYTES; - } else if (field->enum_type() && - field->enum_type()->file()->syntax() == - protobuf::FileDescriptor::SYNTAX_PROTO3) { - // From the perspective of the binary decoder, proto3 enums are identical to - // int32 fields. Only in proto2 do we check enum values to make sure they - // are defined in the enum. - return protobuf::FieldDescriptor::TYPE_INT32; - } else { - return field->type(); - } -} - struct SubLayoutArray { public: SubLayoutArray(const protobuf::Descriptor* message); @@ -1419,8 +1420,9 @@ std::string GetModeInit(uint8_t mode) { void WriteField(const upb_MiniTable_Field* field64, const upb_MiniTable_Field* field32, Output& output) { output("{$0, UPB_SIZE($1, $2), $3, $4, $5, $6}", field64->number, - field64->offset, field32->offset, field64->submsg_index, - field64->descriptortype, field64->mode); + field64->offset, field32->offset, field32->presence, + field64->submsg_index, field64->descriptortype, + GetModeInit(field64->mode)); } // Writes a single field into a .upb.c source file. @@ -1584,23 +1586,14 @@ int WriteMessages(const FileLayout& layout, Output& output, return file_messages.size(); } -void WriteExtension(upb_MiniTable_Extension* ext64, - upb_MiniTable_Extension* ext32, Output& output) { - WriteField(&ext64->field, &ext32->field, output); +void WriteExtension(const upb_MiniTable_Extension* ext, Output& output) { + WriteField(&ext->field, &ext->field, output); output(",\n"); - output(" &$0,\n", MessageInit(ext->containing_type())); - if (ext->message_type()) { - output(" {.submsg = &$0},\n", MessageInit(ext->message_type())); - } else if (ext->enum_type() && ext->enum_type()->file()->syntax() == - protobuf::FileDescriptor::SYNTAX_PROTO2) { - output(" {.subenum = &$0},\n", EnumInit(ext->enum_type())); - } else { - output(" {.submsg = NULL},\n"); - } + WriteSub(ext->sub, output); } -int WriteExtensions(const protobuf::FileDescriptor* file, Output& output) { - auto exts = SortedExtensions(file); +int WriteExtensions(const FileLayout& layout, Output& output) { + auto exts = SortedExtensions(layout.descriptor()); absl::flat_hash_set forward_decls; if (exts.empty()) return 0; @@ -1622,7 +1615,7 @@ int WriteExtensions(const protobuf::FileDescriptor* file, Output& output) { for (auto ext : exts) { output("const upb_MiniTable_Extension $0 = {\n ", ExtensionLayout(ext)); - WriteExtension(ext, output); + WriteExtension(layout.GetExtension(ext), output); output("\n};\n"); } @@ -1663,7 +1656,7 @@ void WriteSource(const FileLayout& layout, Output& output, "\n"); int msg_count = WriteMessages(file, output, fasttable_enabled); - int ext_count = WriteExtensions(file, output); + int ext_count = WriteExtensions(layout, output); int enum_count = WriteEnums(file, output); output("const upb_MiniTable_File $0 = {\n", FileLayoutName(file));