Added fuzz tests for mini table building and binary format parsing/serialization.

PiperOrigin-RevId: 458240180
pull/13171/head
Joshua Haberman 3 years ago committed by Copybara-Service
parent d44834063a
commit 125db89ff5
  1. 12
      BUILD
  2. 2
      cmake/make_cmakelists.py
  3. 2
      upb/extension_registry.c
  4. 190
      upb/fuzz_test_util.cc
  5. 80
      upb/fuzz_test_util.h
  6. 32
      upb/mini_table.c
  7. 7
      upb/mini_table.h
  8. 29
      upb/mini_table_test.cc
  9. 27
      upb/msg_test.cc

12
BUILD

@ -511,6 +511,7 @@ cc_test(
name = "msg_test",
srcs = ["upb/msg_test.cc"],
deps = [
":fuzz_test_util",
":json",
":msg_test_upb_proto",
":msg_test_upb_proto_reflection",
@ -704,6 +705,17 @@ sh_test(
deps = ["@bazel_tools//tools/bash/runfiles"],
)
cc_library(
name = "fuzz_test_util",
testonly = 1,
srcs = ["upb/fuzz_test_util.cc"],
hdrs = ["upb/fuzz_test_util.h"],
deps = [
":mini_table",
":upb",
],
)
# Internal C/C++ libraries #####################################################
cc_library(

@ -68,6 +68,8 @@ class BuildFileFunctions(object):
return
if kwargs["name"] == "lupb":
return
if "testonly" in kwargs:
return
files = kwargs.get("srcs", []) + kwargs.get("hdrs", [])
found_files = []
pregenerated_files = [

@ -60,6 +60,8 @@ bool _upb_extreg_add(upb_ExtensionRegistry* r,
const upb_MiniTable_Extension** start = e;
const upb_MiniTable_Extension** end = UPB_PTRADD(e, count);
for (; e < end; e++) {
// TODO: we should gracefully handle the case where this already exists.
// Right now we're only checking for out of memory.
const upb_MiniTable_Extension* ext = *e;
extreg_key(buf, ext->extendee, ext->field.number);
if (!upb_strtable_insert(&r->exts, buf, EXTREG_KEY_SIZE,

@ -0,0 +1,190 @@
/*
* Copyright (c) 2009-2022, Google LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Google LLC nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "upb/fuzz_test_util.h"
#include "upb/msg.h"
#include "upb/upb.hpp"
namespace upb {
namespace fuzz {
namespace {
class Builder {
public:
Builder(const MiniTableFuzzInput& input, upb_Arena* arena)
: input_(&input), arena_(arena) {}
const upb_MiniTable* Build(upb_ExtensionRegistry** exts) {
BuildMessages();
BuildEnums();
BuildExtensions(exts);
LinkMessages();
return mini_tables_.empty() ? nullptr : mini_tables_.front();
}
private:
void BuildMessages();
void BuildEnums();
void BuildExtensions(upb_ExtensionRegistry** exts);
bool LinkExtension(upb_MiniTable_Extension* ext);
void LinkMessages();
size_t NextLink() {
if (input_->links.empty()) return 0;
if (link_ == input_->links.size()) link_ = 0;
return input_->links[link_++];
}
const upb_MiniTable* NextMiniTable() {
return mini_tables_.empty()
? nullptr
: mini_tables_[NextLink() % mini_tables_.size()];
}
const upb_MiniTable_Enum* NextEnumTable() {
return enum_tables_.empty()
? nullptr
: enum_tables_[NextLink() % enum_tables_.size()];
}
const MiniTableFuzzInput* input_;
upb_Arena* arena_;
std::vector<const upb_MiniTable*> mini_tables_;
std::vector<const upb_MiniTable_Enum*> enum_tables_;
size_t link_ = 0;
};
void Builder::BuildMessages() {
upb::Status status;
mini_tables_.reserve(input_->mini_descriptors.size());
for (const auto& d : input_->mini_descriptors) {
upb_MiniTable* table;
if (d == "\n") {
// We special-case this input string, which is not a valid
// mini-descriptor, to mean message set.
table =
upb_MiniTable_BuildMessageSet(kUpb_MiniTablePlatform_Native, arena_);
} else {
table =
upb_MiniTable_Build(d.data(), d.size(), kUpb_MiniTablePlatform_Native,
arena_, status.ptr());
}
if (table) mini_tables_.push_back(table);
}
}
void Builder::BuildEnums() {
upb::Status status;
enum_tables_.reserve(input_->enum_mini_descriptors.size());
for (const auto& d : input_->enum_mini_descriptors) {
upb_MiniTable_Enum* enum_table =
upb_MiniTable_BuildEnum(d.data(), d.size(), arena_, status.ptr());
if (enum_table) enum_tables_.push_back(enum_table);
}
}
bool Builder::LinkExtension(upb_MiniTable_Extension* ext) {
upb_MiniTable_Field* field = &ext->field;
ext->extendee = NextMiniTable();
if (!ext->extendee) return false;
if (field->descriptortype == kUpb_FieldType_Message ||
field->descriptortype == kUpb_FieldType_Group) {
auto mt = NextMiniTable();
if (!mt) field->descriptortype = kUpb_FieldType_Int32;
ext->sub.submsg = mt;
}
if (field->descriptortype == kUpb_FieldType_Enum) {
auto et = NextEnumTable();
if (!et) field->descriptortype = kUpb_FieldType_Int32;
ext->sub.subenum = et;
}
return true;
}
void Builder::BuildExtensions(upb_ExtensionRegistry** exts) {
upb::Status status;
if (input_->extensions.empty()) {
*exts = nullptr;
} else {
*exts = upb_ExtensionRegistry_New(arena_);
const char* ptr = input_->extensions.data();
const char* end = ptr + input_->extensions.size();
// Iterate through the buffer, building extensions as long as we can.
while (ptr < end) {
upb_MiniTable_Extension* ext = reinterpret_cast<upb_MiniTable_Extension*>(
upb_Arena_Malloc(arena_, sizeof(*ext)));
upb_MiniTable_Sub sub;
ptr =
upb_MiniTable_BuildExtension(ptr, end - ptr, ext, sub, status.ptr());
if (!ptr) break;
if (!LinkExtension(ext)) continue;
if (_upb_extreg_get(*exts, ext->extendee, ext->field.number)) continue;
_upb_extreg_add(*exts, const_cast<const upb_MiniTable_Extension**>(&ext),
1);
}
}
}
void Builder::LinkMessages() {
for (auto* t : mini_tables_) {
upb_MiniTable* table = const_cast<upb_MiniTable*>(t);
// For each field that requires a sub-table, assign one as appropriate.
for (size_t i = 0; i < table->field_count; i++) {
upb_MiniTable_Field* field =
const_cast<upb_MiniTable_Field*>(&table->fields[i]);
if (link_ == input_->links.size()) link_ = 0;
if (field->descriptortype == kUpb_FieldType_Message ||
field->descriptortype == kUpb_FieldType_Group) {
upb_MiniTable_SetSubMessage(table, field, NextMiniTable());
}
if (field->descriptortype == kUpb_FieldType_Enum) {
auto* et = NextEnumTable();
if (et) {
upb_MiniTable_SetSubEnum(table, field, et);
} else {
// We don't have any sub-enums. Override the field type so that it is
// not needed.
field->descriptortype = kUpb_FieldType_Int32;
}
}
}
}
}
} // namespace
const upb_MiniTable* BuildMiniTable(const MiniTableFuzzInput& input,
upb_ExtensionRegistry** exts,
upb_Arena* arena) {
Builder builder(input, arena);
return builder.Build(exts);
}
} // namespace fuzz
} // namespace upb

@ -0,0 +1,80 @@
/*
* Copyright (c) 2009-2022, Google LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Google LLC nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef UPB_FUZZ_TEST_UTIL_H_
#define UPB_FUZZ_TEST_UTIL_H_
#include <string>
#include <vector>
#include "upb/mini_table.h"
namespace upb {
namespace fuzz {
struct MiniTableFuzzInput {
// MiniDescripotrs for N messages, in the format accepted by
// upb_MiniTable_Build().
std::vector<std::string> mini_descriptors;
// MiniDescripotrs for N enums, in the format accepted by
// upb_MiniTable_BuildEnum().
std::vector<std::string> enum_mini_descriptors;
// A MiniDescriptor for N extensions, in the format accepted by
// upb_MiniTable_BuildExtension().
std::string extensions;
// Integer indexes into the message or enum mini tables lists. These specify
// which message or enum to use for each sub-message or enum field. We mod
// by the total number of enums or messages so that any link value can be
// valid.
std::vector<uint32_t> links;
};
// Builds an arbitrary mini table corresponding to the random data in `input`.
// This function should be capable of producing any mini table that can
// successfully build, and any topology of messages and enums (including
// cycles).
//
// As currently written, it effectively fuzzes the mini descriptor parser also,
// and can therefore trigger any bugs in that parser. To better isolate these
// two, we may want to change this implementation to use the mini descriptor
// builder API so we are producing mini descriptors in a known good format. That
// would mostly eliminate the chance of crashing the mini descriptor parser
// itself.
//
// TODO: maps. If we give maps some space in the regular encoding instead of
// using a separate function, we could get that for free.
const upb_MiniTable* BuildMiniTable(const MiniTableFuzzInput& input,
upb_ExtensionRegistry** exts,
upb_Arena* arena);
} // namespace fuzz
} // namespace upb
#endif // THIRD_PARTY_UPB_UPB_FUZZ_TEST_UTIL_H_

@ -428,7 +428,6 @@ static const char* upb_MiniTable_DecodeBase92Varint(upb_MtDecoder* d,
char ch = first_ch;
while (1) {
uint32_t bits = upb_FromBase92(ch) - upb_FromBase92(min);
UPB_ASSERT(shift < 32);
val |= bits << shift;
if (ptr == d->end || *ptr < min || max < *ptr) {
*out_val = val;
@ -436,6 +435,7 @@ static const char* upb_MiniTable_DecodeBase92Varint(upb_MtDecoder* d,
}
ch = *ptr++;
shift += bits_per_char;
if (shift >= 32) upb_MtDecoder_ErrorFormat(d, "Overlong varint");
}
}
@ -530,11 +530,15 @@ static void upb_MiniTable_SetField(upb_MtDecoder* d, uint8_t ch,
field->mode |= kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift;
field->offset = kNoPresence;
} else {
if (type >= sizeof(kUpb_EncodedToFieldRep)) {
upb_MtDecoder_ErrorFormat(d, "Invalid field type: %d", (int)type);
UPB_UNREACHABLE();
}
field->mode = kUpb_FieldMode_Scalar;
field->mode |= kUpb_EncodedToFieldRep[type] << kUpb_FieldRep_Shift;
field->offset = kHasbitPresence;
}
if (type >= 18) {
if (type >= sizeof(kUpb_EncodedToType)) {
upb_MtDecoder_ErrorFormat(d, "Invalid field type: %d", (int)type);
UPB_UNREACHABLE();
}
@ -728,9 +732,10 @@ static void upb_MtDecoder_AllocateSubs(upb_MtDecoder* d, uint32_t sub_count) {
upb_MtDecoder_CheckOutOfMemory(d, d->table->subs);
}
static void upb_MtDecoder_Parse(upb_MtDecoder* d, const char* ptr, size_t len,
void* fields, size_t field_size,
uint16_t* field_count, uint32_t* sub_count) {
static const char* upb_MtDecoder_Parse(upb_MtDecoder* d, const char* ptr,
size_t len, void* fields,
size_t field_size, uint16_t* field_count,
uint32_t* sub_count) {
uint64_t msg_modifiers = 0;
uint32_t last_field_number = 0;
upb_MiniTable_Field* last_field = NULL;
@ -741,6 +746,7 @@ static void upb_MtDecoder_Parse(upb_MtDecoder* d, const char* ptr, size_t len,
while (ptr < d->end) {
char ch = *ptr++;
if (ch <= kUpb_EncodedValue_MaxField) {
if (!d->table && last_field) return --ptr;
upb_MiniTable_Field* field = fields;
*field_count += 1;
fields = (char*)fields + field_size;
@ -777,6 +783,8 @@ static void upb_MtDecoder_Parse(upb_MtDecoder* d, const char* ptr, size_t len,
if (need_dense_below) {
d->table->dense_below = d->table->field_count;
}
return ptr;
}
static void upb_MtDecoder_ParseMessage(upb_MtDecoder* d, const char* data,
@ -1092,9 +1100,10 @@ upb_MiniTable_Enum* upb_MiniTable_BuildEnum(const char* data, size_t len,
return table;
}
bool upb_MiniTable_BuildExtension(const char* data, size_t len,
upb_MiniTable_Extension* ext,
upb_MiniTable_Sub sub, upb_Status* status) {
const char* upb_MiniTable_BuildExtension(const char* data, size_t len,
upb_MiniTable_Extension* ext,
upb_MiniTable_Sub sub,
upb_Status* status) {
upb_MtDecoder decoder = {
.arena = NULL,
.status = status,
@ -1102,14 +1111,15 @@ bool upb_MiniTable_BuildExtension(const char* data, size_t len,
};
if (UPB_SETJMP(decoder.err)) {
return false;
return NULL;
}
uint16_t count = 0;
upb_MtDecoder_Parse(&decoder, data, len, ext, sizeof(*ext), &count, NULL);
const char* ret =
upb_MtDecoder_Parse(&decoder, data, len, ext, sizeof(*ext), &count, NULL);
ext->field.mode |= kUpb_LabelFlags_IsExtension;
ext->field.offset = 0;
return true;
return ret;
}
upb_MiniTable* upb_MiniTable_Build(const char* data, size_t len,

@ -148,9 +148,10 @@ void upb_MiniTable_SetSubMessage(upb_MiniTable* table,
void upb_MiniTable_SetSubEnum(upb_MiniTable* table, upb_MiniTable_Field* field,
const upb_MiniTable_Enum* sub);
bool upb_MiniTable_BuildExtension(const char* data, size_t len,
upb_MiniTable_Extension* ext,
upb_MiniTable_Sub sub, upb_Status* status);
const char* upb_MiniTable_BuildExtension(const char* data, size_t len,
upb_MiniTable_Extension* ext,
upb_MiniTable_Sub sub,
upb_Status* status);
// Special-case functions for MessageSet layout and map entries.
upb_MiniTable* upb_MiniTable_BuildMessageSet(upb_MiniTablePlatform platform,

@ -27,13 +27,20 @@
#include "upb/mini_table.hpp"
#include "absl/container/flat_hash_set.h"
#include "gmock/gmock.h"
#include "google/protobuf/descriptor.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/container/flat_hash_set.h"
#include "upb/decode.h"
#include "upb/mini_table.h"
#include "upb/msg_internal.h"
#include "upb/upb.h"
#include "upb/upb.hpp"
// begin:google_only
// #include "testing/fuzzing/fuzztest.h"
// end:google_only
namespace protobuf = ::google::protobuf;
class MiniTableTest : public testing::TestWithParam<upb_MiniTablePlatform> {};
@ -244,3 +251,21 @@ TEST_P(MiniTableTest, Extendible) {
ASSERT_NE(nullptr, table);
EXPECT_EQ(kUpb_ExtMode_Extendable, table->ext & kUpb_ExtMode_Extendable);
}
// begin:google_only
//
// static void BuildMiniTable(std::string_view s, bool is_32bit) {
// upb::Arena arena;
// upb::Status status;
// upb_MiniTable_Build(
// s.data(), s.size(),
// is_32bit ? kUpb_MiniTablePlatform_32Bit : kUpb_MiniTablePlatform_64Bit,
// arena.ptr(), status.ptr());
// }
// FUZZ_TEST(FuzzTest, BuildMiniTable);
//
// TEST(FuzzTest, BuildMiniTableRegression) {
// BuildMiniTable("g}{v~fq{\271", false);
// }
//
// end:google_only

@ -29,12 +29,17 @@
#include "gtest/gtest.h"
#include "google/protobuf/test_messages_proto3.upb.h"
#include "upb/def.hpp"
#include "upb/fuzz_test_util.h"
#include "upb/json_decode.h"
#include "upb/json_encode.h"
#include "upb/msg_test.upb.h"
#include "upb/msg_test.upbdefs.h"
#include "upb/upb.hpp"
// begin:google_only
// #include "testing/fuzzing/fuzztest.h"
// end:google_only
void VerifyMessage(const upb_test_TestExtensions* ext_msg) {
EXPECT_TRUE(upb_test_TestExtensions_has_optional_int32_ext(ext_msg));
// EXPECT_FALSE(upb_test_TestExtensions_Nested_has_optional_int32_ext(ext_msg));
@ -489,3 +494,25 @@ TEST(MessageTest, MapField) {
ASSERT_TRUE(
upb_test_TestMapFieldExtra_map_field_get(test_msg_extra2, 0, nullptr));
}
// begin:google_only
//
// static void DecodeEncodeArbitrarySchemaAndPayload(
// const upb::fuzz::MiniTableFuzzInput& input, std::string_view proto_payload,
// int decode_options, int encode_options) {
// upb::Arena arena;
// upb_ExtensionRegistry* exts;
// const upb_MiniTable* mini_table =
// upb::fuzz::BuildMiniTable(input, &exts, arena.ptr());
// if (!mini_table) return;
// upb::Status status;
// upb_Message* msg = _upb_Message_New(mini_table, arena.ptr());
// upb_Decode(proto_payload.data(), proto_payload.size(), msg, mini_table, exts,
// decode_options, arena.ptr());
// char* ptr;
// size_t size;
// upb_Encode(msg, mini_table, encode_options, arena.ptr(), &ptr, &size);
// }
// FUZZ_TEST(FuzzTest, DecodeEncodeArbitrarySchemaAndPayload);
//
// end:google_only

Loading…
Cancel
Save