diff --git a/BUILD b/BUILD index 1dd45e632f..f49f4c250c 100644 --- a/BUILD +++ b/BUILD @@ -511,6 +511,7 @@ cc_test( name = "msg_test", srcs = ["upb/msg_test.cc"], deps = [ + ":fuzz_test_util", ":json", ":msg_test_upb_proto", ":msg_test_upb_proto_reflection", @@ -704,6 +705,17 @@ sh_test( deps = ["@bazel_tools//tools/bash/runfiles"], ) +cc_library( + name = "fuzz_test_util", + testonly = 1, + srcs = ["upb/fuzz_test_util.cc"], + hdrs = ["upb/fuzz_test_util.h"], + deps = [ + ":mini_table", + ":upb", + ], +) + # Internal C/C++ libraries ##################################################### cc_library( diff --git a/cmake/make_cmakelists.py b/cmake/make_cmakelists.py index d64c14f6f3..496fa2f4fc 100755 --- a/cmake/make_cmakelists.py +++ b/cmake/make_cmakelists.py @@ -68,6 +68,8 @@ class BuildFileFunctions(object): return if kwargs["name"] == "lupb": return + if "testonly" in kwargs: + return files = kwargs.get("srcs", []) + kwargs.get("hdrs", []) found_files = [] pregenerated_files = [ diff --git a/upb/extension_registry.c b/upb/extension_registry.c index 921cf227dd..f811eddf8c 100644 --- a/upb/extension_registry.c +++ b/upb/extension_registry.c @@ -60,6 +60,8 @@ bool _upb_extreg_add(upb_ExtensionRegistry* r, const upb_MiniTable_Extension** start = e; const upb_MiniTable_Extension** end = UPB_PTRADD(e, count); for (; e < end; e++) { + // TODO: we should gracefully handle the case where this already exists. + // Right now we're only checking for out of memory. const upb_MiniTable_Extension* ext = *e; extreg_key(buf, ext->extendee, ext->field.number); if (!upb_strtable_insert(&r->exts, buf, EXTREG_KEY_SIZE, diff --git a/upb/fuzz_test_util.cc b/upb/fuzz_test_util.cc new file mode 100644 index 0000000000..12de4a9283 --- /dev/null +++ b/upb/fuzz_test_util.cc @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2009-2022, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "upb/fuzz_test_util.h" + +#include "upb/msg.h" +#include "upb/upb.hpp" + +namespace upb { +namespace fuzz { + +namespace { + +class Builder { + public: + Builder(const MiniTableFuzzInput& input, upb_Arena* arena) + : input_(&input), arena_(arena) {} + + const upb_MiniTable* Build(upb_ExtensionRegistry** exts) { + BuildMessages(); + BuildEnums(); + BuildExtensions(exts); + LinkMessages(); + return mini_tables_.empty() ? nullptr : mini_tables_.front(); + } + + private: + void BuildMessages(); + void BuildEnums(); + void BuildExtensions(upb_ExtensionRegistry** exts); + bool LinkExtension(upb_MiniTable_Extension* ext); + void LinkMessages(); + + size_t NextLink() { + if (input_->links.empty()) return 0; + if (link_ == input_->links.size()) link_ = 0; + return input_->links[link_++]; + } + + const upb_MiniTable* NextMiniTable() { + return mini_tables_.empty() + ? nullptr + : mini_tables_[NextLink() % mini_tables_.size()]; + } + + const upb_MiniTable_Enum* NextEnumTable() { + return enum_tables_.empty() + ? nullptr + : enum_tables_[NextLink() % enum_tables_.size()]; + } + + const MiniTableFuzzInput* input_; + upb_Arena* arena_; + std::vector mini_tables_; + std::vector enum_tables_; + size_t link_ = 0; +}; + +void Builder::BuildMessages() { + upb::Status status; + mini_tables_.reserve(input_->mini_descriptors.size()); + for (const auto& d : input_->mini_descriptors) { + upb_MiniTable* table; + if (d == "\n") { + // We special-case this input string, which is not a valid + // mini-descriptor, to mean message set. + table = + upb_MiniTable_BuildMessageSet(kUpb_MiniTablePlatform_Native, arena_); + } else { + table = + upb_MiniTable_Build(d.data(), d.size(), kUpb_MiniTablePlatform_Native, + arena_, status.ptr()); + } + if (table) mini_tables_.push_back(table); + } +} + +void Builder::BuildEnums() { + upb::Status status; + enum_tables_.reserve(input_->enum_mini_descriptors.size()); + for (const auto& d : input_->enum_mini_descriptors) { + upb_MiniTable_Enum* enum_table = + upb_MiniTable_BuildEnum(d.data(), d.size(), arena_, status.ptr()); + if (enum_table) enum_tables_.push_back(enum_table); + } +} + +bool Builder::LinkExtension(upb_MiniTable_Extension* ext) { + upb_MiniTable_Field* field = &ext->field; + ext->extendee = NextMiniTable(); + if (!ext->extendee) return false; + if (field->descriptortype == kUpb_FieldType_Message || + field->descriptortype == kUpb_FieldType_Group) { + auto mt = NextMiniTable(); + if (!mt) field->descriptortype = kUpb_FieldType_Int32; + ext->sub.submsg = mt; + } + if (field->descriptortype == kUpb_FieldType_Enum) { + auto et = NextEnumTable(); + if (!et) field->descriptortype = kUpb_FieldType_Int32; + ext->sub.subenum = et; + } + return true; +} + +void Builder::BuildExtensions(upb_ExtensionRegistry** exts) { + upb::Status status; + if (input_->extensions.empty()) { + *exts = nullptr; + } else { + *exts = upb_ExtensionRegistry_New(arena_); + const char* ptr = input_->extensions.data(); + const char* end = ptr + input_->extensions.size(); + // Iterate through the buffer, building extensions as long as we can. + while (ptr < end) { + upb_MiniTable_Extension* ext = reinterpret_cast( + upb_Arena_Malloc(arena_, sizeof(*ext))); + upb_MiniTable_Sub sub; + ptr = + upb_MiniTable_BuildExtension(ptr, end - ptr, ext, sub, status.ptr()); + if (!ptr) break; + if (!LinkExtension(ext)) continue; + if (_upb_extreg_get(*exts, ext->extendee, ext->field.number)) continue; + _upb_extreg_add(*exts, const_cast(&ext), + 1); + } + } +} + +void Builder::LinkMessages() { + for (auto* t : mini_tables_) { + upb_MiniTable* table = const_cast(t); + // For each field that requires a sub-table, assign one as appropriate. + for (size_t i = 0; i < table->field_count; i++) { + upb_MiniTable_Field* field = + const_cast(&table->fields[i]); + if (link_ == input_->links.size()) link_ = 0; + if (field->descriptortype == kUpb_FieldType_Message || + field->descriptortype == kUpb_FieldType_Group) { + upb_MiniTable_SetSubMessage(table, field, NextMiniTable()); + } + if (field->descriptortype == kUpb_FieldType_Enum) { + auto* et = NextEnumTable(); + if (et) { + upb_MiniTable_SetSubEnum(table, field, et); + } else { + // We don't have any sub-enums. Override the field type so that it is + // not needed. + field->descriptortype = kUpb_FieldType_Int32; + } + } + } + } +} + +} // namespace + +const upb_MiniTable* BuildMiniTable(const MiniTableFuzzInput& input, + upb_ExtensionRegistry** exts, + upb_Arena* arena) { + Builder builder(input, arena); + return builder.Build(exts); +} + +} // namespace fuzz +} // namespace upb diff --git a/upb/fuzz_test_util.h b/upb/fuzz_test_util.h new file mode 100644 index 0000000000..9127b17106 --- /dev/null +++ b/upb/fuzz_test_util.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2009-2022, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UPB_FUZZ_TEST_UTIL_H_ +#define UPB_FUZZ_TEST_UTIL_H_ + +#include +#include + +#include "upb/mini_table.h" + +namespace upb { +namespace fuzz { + +struct MiniTableFuzzInput { + // MiniDescripotrs for N messages, in the format accepted by + // upb_MiniTable_Build(). + std::vector mini_descriptors; + + // MiniDescripotrs for N enums, in the format accepted by + // upb_MiniTable_BuildEnum(). + std::vector enum_mini_descriptors; + + // A MiniDescriptor for N extensions, in the format accepted by + // upb_MiniTable_BuildExtension(). + std::string extensions; + + // Integer indexes into the message or enum mini tables lists. These specify + // which message or enum to use for each sub-message or enum field. We mod + // by the total number of enums or messages so that any link value can be + // valid. + std::vector links; +}; + +// Builds an arbitrary mini table corresponding to the random data in `input`. +// This function should be capable of producing any mini table that can +// successfully build, and any topology of messages and enums (including +// cycles). +// +// As currently written, it effectively fuzzes the mini descriptor parser also, +// and can therefore trigger any bugs in that parser. To better isolate these +// two, we may want to change this implementation to use the mini descriptor +// builder API so we are producing mini descriptors in a known good format. That +// would mostly eliminate the chance of crashing the mini descriptor parser +// itself. +// +// TODO: maps. If we give maps some space in the regular encoding instead of +// using a separate function, we could get that for free. +const upb_MiniTable* BuildMiniTable(const MiniTableFuzzInput& input, + upb_ExtensionRegistry** exts, + upb_Arena* arena); + +} // namespace fuzz +} // namespace upb + +#endif // THIRD_PARTY_UPB_UPB_FUZZ_TEST_UTIL_H_ diff --git a/upb/mini_table.c b/upb/mini_table.c index cd53059738..3caebdaa4e 100644 --- a/upb/mini_table.c +++ b/upb/mini_table.c @@ -428,7 +428,6 @@ static const char* upb_MiniTable_DecodeBase92Varint(upb_MtDecoder* d, char ch = first_ch; while (1) { uint32_t bits = upb_FromBase92(ch) - upb_FromBase92(min); - UPB_ASSERT(shift < 32); val |= bits << shift; if (ptr == d->end || *ptr < min || max < *ptr) { *out_val = val; @@ -436,6 +435,7 @@ static const char* upb_MiniTable_DecodeBase92Varint(upb_MtDecoder* d, } ch = *ptr++; shift += bits_per_char; + if (shift >= 32) upb_MtDecoder_ErrorFormat(d, "Overlong varint"); } } @@ -530,11 +530,15 @@ static void upb_MiniTable_SetField(upb_MtDecoder* d, uint8_t ch, field->mode |= kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift; field->offset = kNoPresence; } else { + if (type >= sizeof(kUpb_EncodedToFieldRep)) { + upb_MtDecoder_ErrorFormat(d, "Invalid field type: %d", (int)type); + UPB_UNREACHABLE(); + } field->mode = kUpb_FieldMode_Scalar; field->mode |= kUpb_EncodedToFieldRep[type] << kUpb_FieldRep_Shift; field->offset = kHasbitPresence; } - if (type >= 18) { + if (type >= sizeof(kUpb_EncodedToType)) { upb_MtDecoder_ErrorFormat(d, "Invalid field type: %d", (int)type); UPB_UNREACHABLE(); } @@ -728,9 +732,10 @@ static void upb_MtDecoder_AllocateSubs(upb_MtDecoder* d, uint32_t sub_count) { upb_MtDecoder_CheckOutOfMemory(d, d->table->subs); } -static void upb_MtDecoder_Parse(upb_MtDecoder* d, const char* ptr, size_t len, - void* fields, size_t field_size, - uint16_t* field_count, uint32_t* sub_count) { +static const char* upb_MtDecoder_Parse(upb_MtDecoder* d, const char* ptr, + size_t len, void* fields, + size_t field_size, uint16_t* field_count, + uint32_t* sub_count) { uint64_t msg_modifiers = 0; uint32_t last_field_number = 0; upb_MiniTable_Field* last_field = NULL; @@ -741,6 +746,7 @@ static void upb_MtDecoder_Parse(upb_MtDecoder* d, const char* ptr, size_t len, while (ptr < d->end) { char ch = *ptr++; if (ch <= kUpb_EncodedValue_MaxField) { + if (!d->table && last_field) return --ptr; upb_MiniTable_Field* field = fields; *field_count += 1; fields = (char*)fields + field_size; @@ -777,6 +783,8 @@ static void upb_MtDecoder_Parse(upb_MtDecoder* d, const char* ptr, size_t len, if (need_dense_below) { d->table->dense_below = d->table->field_count; } + + return ptr; } static void upb_MtDecoder_ParseMessage(upb_MtDecoder* d, const char* data, @@ -1092,9 +1100,10 @@ upb_MiniTable_Enum* upb_MiniTable_BuildEnum(const char* data, size_t len, return table; } -bool upb_MiniTable_BuildExtension(const char* data, size_t len, - upb_MiniTable_Extension* ext, - upb_MiniTable_Sub sub, upb_Status* status) { +const char* upb_MiniTable_BuildExtension(const char* data, size_t len, + upb_MiniTable_Extension* ext, + upb_MiniTable_Sub sub, + upb_Status* status) { upb_MtDecoder decoder = { .arena = NULL, .status = status, @@ -1102,14 +1111,15 @@ bool upb_MiniTable_BuildExtension(const char* data, size_t len, }; if (UPB_SETJMP(decoder.err)) { - return false; + return NULL; } uint16_t count = 0; - upb_MtDecoder_Parse(&decoder, data, len, ext, sizeof(*ext), &count, NULL); + const char* ret = + upb_MtDecoder_Parse(&decoder, data, len, ext, sizeof(*ext), &count, NULL); ext->field.mode |= kUpb_LabelFlags_IsExtension; ext->field.offset = 0; - return true; + return ret; } upb_MiniTable* upb_MiniTable_Build(const char* data, size_t len, diff --git a/upb/mini_table.h b/upb/mini_table.h index 4a87f147e4..20a7146979 100644 --- a/upb/mini_table.h +++ b/upb/mini_table.h @@ -148,9 +148,10 @@ void upb_MiniTable_SetSubMessage(upb_MiniTable* table, void upb_MiniTable_SetSubEnum(upb_MiniTable* table, upb_MiniTable_Field* field, const upb_MiniTable_Enum* sub); -bool upb_MiniTable_BuildExtension(const char* data, size_t len, - upb_MiniTable_Extension* ext, - upb_MiniTable_Sub sub, upb_Status* status); +const char* upb_MiniTable_BuildExtension(const char* data, size_t len, + upb_MiniTable_Extension* ext, + upb_MiniTable_Sub sub, + upb_Status* status); // Special-case functions for MessageSet layout and map entries. upb_MiniTable* upb_MiniTable_BuildMessageSet(upb_MiniTablePlatform platform, diff --git a/upb/mini_table_test.cc b/upb/mini_table_test.cc index bb0d6d4ef5..b86e35005b 100644 --- a/upb/mini_table_test.cc +++ b/upb/mini_table_test.cc @@ -27,13 +27,20 @@ #include "upb/mini_table.hpp" -#include "absl/container/flat_hash_set.h" -#include "gmock/gmock.h" #include "google/protobuf/descriptor.h" +#include "gmock/gmock.h" #include "gtest/gtest.h" +#include "absl/container/flat_hash_set.h" +#include "upb/decode.h" +#include "upb/mini_table.h" #include "upb/msg_internal.h" +#include "upb/upb.h" #include "upb/upb.hpp" +// begin:google_only +// #include "testing/fuzzing/fuzztest.h" +// end:google_only + namespace protobuf = ::google::protobuf; class MiniTableTest : public testing::TestWithParam {}; @@ -244,3 +251,21 @@ TEST_P(MiniTableTest, Extendible) { ASSERT_NE(nullptr, table); EXPECT_EQ(kUpb_ExtMode_Extendable, table->ext & kUpb_ExtMode_Extendable); } + +// begin:google_only +// +// static void BuildMiniTable(std::string_view s, bool is_32bit) { +// upb::Arena arena; +// upb::Status status; +// upb_MiniTable_Build( +// s.data(), s.size(), +// is_32bit ? kUpb_MiniTablePlatform_32Bit : kUpb_MiniTablePlatform_64Bit, +// arena.ptr(), status.ptr()); +// } +// FUZZ_TEST(FuzzTest, BuildMiniTable); +// +// TEST(FuzzTest, BuildMiniTableRegression) { +// BuildMiniTable("g}{v~fq{\271", false); +// } +// +// end:google_only diff --git a/upb/msg_test.cc b/upb/msg_test.cc index d46e29fd96..d49e8ef5b0 100644 --- a/upb/msg_test.cc +++ b/upb/msg_test.cc @@ -29,12 +29,17 @@ #include "gtest/gtest.h" #include "google/protobuf/test_messages_proto3.upb.h" #include "upb/def.hpp" +#include "upb/fuzz_test_util.h" #include "upb/json_decode.h" #include "upb/json_encode.h" #include "upb/msg_test.upb.h" #include "upb/msg_test.upbdefs.h" #include "upb/upb.hpp" +// begin:google_only +// #include "testing/fuzzing/fuzztest.h" +// end:google_only + void VerifyMessage(const upb_test_TestExtensions* ext_msg) { EXPECT_TRUE(upb_test_TestExtensions_has_optional_int32_ext(ext_msg)); // EXPECT_FALSE(upb_test_TestExtensions_Nested_has_optional_int32_ext(ext_msg)); @@ -489,3 +494,25 @@ TEST(MessageTest, MapField) { ASSERT_TRUE( upb_test_TestMapFieldExtra_map_field_get(test_msg_extra2, 0, nullptr)); } + +// begin:google_only +// +// static void DecodeEncodeArbitrarySchemaAndPayload( +// const upb::fuzz::MiniTableFuzzInput& input, std::string_view proto_payload, +// int decode_options, int encode_options) { +// upb::Arena arena; +// upb_ExtensionRegistry* exts; +// const upb_MiniTable* mini_table = +// upb::fuzz::BuildMiniTable(input, &exts, arena.ptr()); +// if (!mini_table) return; +// upb::Status status; +// upb_Message* msg = _upb_Message_New(mini_table, arena.ptr()); +// upb_Decode(proto_payload.data(), proto_payload.size(), msg, mini_table, exts, +// decode_options, arena.ptr()); +// char* ptr; +// size_t size; +// upb_Encode(msg, mini_table, encode_options, arena.ptr(), &ptr, &size); +// } +// FUZZ_TEST(FuzzTest, DecodeEncodeArbitrarySchemaAndPayload); +// +// end:google_only