Unknown field comparison is passing tests.

pull/13171/head
Joshua Haberman 3 years ago
parent 732a21bb59
commit 9bff7b82d1
  1. 4
      .bazelrc
  2. 10
      upb/util/BUILD
  3. 148
      upb/util/compare.c
  4. 13
      upb/util/compare.h
  5. 255
      upb/util/compare_test.cc

@ -6,7 +6,7 @@ build --extra_toolchains=@system_python//:python_toolchain
# Use our custom-configured c++ toolchain.
build:m32 --copt=-m32 --linkopt=-m32
build:asan --copt=-fsanitize=address --linkopt=-fsanitize=address
build:asan --copt=-fsanitize=address --linkopt=-fsanitize=address --copt=-D__SANITIZE_ADDRESS__=1
# For Valgrind, we have to disable checks of "possible" leaks because the Python
# interpreter does the sorts of things that flag Valgrind "possible" leak checks.
@ -14,7 +14,7 @@ build:asan --copt=-fsanitize=address --linkopt=-fsanitize=address
# know of an easy way to do that.
#
# We also have to disable pymalloc to avoid triggering Valgrind.
build:valgrind --run_under='valgrind --leak-check=full --trace-children=yes --show-possibly-lost=no --errors-for-leak-kinds=definite --error-exitcode=1' --action_env=PYTHONMALLOC=malloc
build:valgrind --run_under='valgrind --leak-check=full --track-origins=yes --trace-children=yes --show-possibly-lost=no --errors-for-leak-kinds=definite --error-exitcode=1' --action_env=PYTHONMALLOC=malloc
build:ubsan --copt=-fsanitize=undefined --linkopt=-fsanitize=undefined --action_env=UBSAN_OPTIONS=halt_on_error=1:print_stacktrace=1
# Workaround for the fact that Bazel links with $CC, not $CXX

@ -95,3 +95,13 @@ cc_library(
hdrs = ["compare.h"],
deps = ["//:reflection"],
)
cc_test(
name = "compare_test",
srcs = ["compare_test.cc"],
deps = [
"@com_google_absl//absl/strings",
"@com_google_googletest//:gtest_main",
":compare",
],
)

@ -1,3 +1,29 @@
/*
* Copyright (c) 2009-2021, Google LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Google LLC nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "upb/util/compare.h"
@ -29,6 +55,8 @@ struct upb_UnknownFields {
typedef struct {
const char *end;
upb_arena *arena;
upb_UnknownField *tmp;
size_t tmp_size;
int depth;
jmp_buf err;
} upb_UnknownField_Context;
@ -40,8 +68,8 @@ static void upb_UnknownFields_Grow(upb_UnknownField_Context *ctx,
size_t old = (*ptr - *base);
size_t new = UPB_MAX(4, old * 2);
*base = upb_arena_realloc(ctx->arena, *base, old * sizeof(*base),
new * sizeof(*base));
*base = upb_arena_realloc(ctx->arena, *base, old * sizeof(**base),
new * sizeof(**base));
if (!*base) UPB_LONGJMP(ctx->err, kUpb_UnknownCompareResult_OutOfMemory);
*ptr = *base + old;
@ -67,23 +95,80 @@ static const char *upb_UnknownFields_ParseVarint(const char *ptr,
return ptr;
}
// We have to implement our own sort here, since qsort() is not an in-order
// sort. Here we use merge sort, the simplest in-order sort.
static void upb_UnknownFields_Merge(upb_UnknownField *arr, size_t start,
size_t mid, size_t end,
upb_UnknownField *tmp) {
memcpy(tmp, &arr[start], (end - start) * sizeof(*tmp));
upb_UnknownField* ptr1 = tmp;
upb_UnknownField* end1 = &tmp[mid - start];
upb_UnknownField* ptr2 = &tmp[mid - start];
upb_UnknownField* end2 = &tmp[end - start];
upb_UnknownField* out = &arr[start];
while (ptr1 < end1 && ptr2 < end2) {
if (ptr1->tag <= ptr2->tag) {
*out++ = *ptr1++;
} else {
*out++ = *ptr2++;
}
}
if (ptr1 < end1) {
memcpy(out, ptr1, (end1 - ptr1) * sizeof(*out));
} else if (ptr2 < end2) {
memcpy(out, ptr1, (end2 - ptr2) * sizeof(*out));
}
}
static void upb_UnknownFields_SortRecursive(upb_UnknownField *arr,
size_t start, size_t end,
upb_UnknownField *tmp) {
if (end - start > 1) {
size_t mid = start + ((end - start) / 2);
upb_UnknownFields_SortRecursive(arr, start, mid, tmp);
upb_UnknownFields_SortRecursive(arr, mid, end, tmp);
upb_UnknownFields_Merge(arr, start, mid, end, tmp);
}
}
static void upb_UnknownFields_Sort(upb_UnknownField_Context *ctx,
upb_UnknownFields *fields) {
if (ctx->tmp_size < fields->size) {
ctx->tmp_size = UPB_MAX(8, ctx->tmp_size);
while (ctx->tmp_size < fields->size) ctx->tmp_size *= 2;
ctx->tmp = realloc(ctx->tmp, ctx->tmp_size * sizeof(*ctx->tmp));
}
upb_UnknownFields_SortRecursive(fields->fields, 0, fields->size, ctx->tmp);
}
static upb_UnknownFields *upb_UnknownFields_DoBuild(
upb_UnknownField_Context *ctx, const char **buf) {
upb_UnknownField *arr_base = NULL;
upb_UnknownField *arr_ptr = NULL;
upb_UnknownField *arr_end = NULL;
const char *ptr = *buf;
uint32_t last_tag = 0;
bool sorted = true;
while (ptr < ctx->end) {
uint64_t tag;
ptr = upb_UnknownFields_ParseVarint(ptr, ctx->end, &tag);
UPB_ASSERT(tag <= UINT32_MAX);
int wire_type = tag & 7;
if (wire_type == UPB_WIRE_TYPE_END_GROUP) break;
if (tag < last_tag) sorted = false;
last_tag = tag;
if (arr_ptr == arr_end) {
upb_UnknownFields_Grow(ctx, &arr_base, &arr_ptr, &arr_end);
}
upb_UnknownField *field = arr_ptr;
field->tag = tag;
arr_ptr++;
uint64_t val;
ptr = upb_UnknownFields_ParseVarint(ptr, ctx->end, &val);
UPB_ASSERT(val <= UINT32_MAX);
field->tag = val;
switch (field->tag & 7) {
switch (wire_type) {
case UPB_WIRE_TYPE_VARINT:
ptr = upb_UnknownFields_ParseVarint(ptr, ctx->end, &field->data.varint);
break;
@ -95,7 +180,7 @@ static upb_UnknownFields *upb_UnknownFields_DoBuild(
case UPB_WIRE_TYPE_32BIT:
UPB_ASSERT(ctx->end - ptr >= 4);
memcpy(&field->data.uint32, ptr, 4);
ptr += 8;
ptr += 4;
break;
case UPB_WIRE_TYPE_DELIMITED: {
uint64_t size;
@ -103,6 +188,7 @@ static upb_UnknownFields *upb_UnknownFields_DoBuild(
UPB_ASSERT(ctx->end - ptr >= size);
field->data.delimited.data = ptr;
field->data.delimited.size = size;
ptr += size;
break;
}
case UPB_WIRE_TYPE_START_GROUP:
@ -112,23 +198,24 @@ static upb_UnknownFields *upb_UnknownFields_DoBuild(
field->data.group = upb_UnknownFields_DoBuild(ctx, &ptr);
ctx->depth++;
break;
case UPB_WIRE_TYPE_END_GROUP:
goto done;
default:
UPB_UNREACHABLE();
}
}
done:
*buf = ptr;
upb_UnknownFields *ret = upb_arena_malloc(ctx->arena, sizeof(*ret));
if (!ret) UPB_LONGJMP(ctx->err, kUpb_UnknownCompareResult_OutOfMemory);
ret->fields = arr_base;
ret->size = arr_ptr - arr_base;
ret->capacity = arr_end - arr_base;
if (!sorted) {
upb_UnknownFields_Sort(ctx, ret);
}
return ret;
}
// Builds a upb_UnknownFields data structure from the binary data in buf.
static upb_UnknownFields *upb_UnknownFields_Build(upb_UnknownField_Context *ctx,
const char *buf,
size_t size) {
@ -138,6 +225,7 @@ static upb_UnknownFields *upb_UnknownFields_Build(upb_UnknownField_Context *ctx,
return fields;
}
// Compares two sorted upb_UnknwonFields structures for equality.
static bool upb_UnknownFields_IsEqual(const upb_UnknownFields *uf1,
const upb_UnknownFields *uf2) {
if (uf1->size != uf2->size) return false;
@ -145,7 +233,8 @@ static bool upb_UnknownFields_IsEqual(const upb_UnknownFields *uf1,
upb_UnknownField *f1 = &uf1->fields[i];
upb_UnknownField *f2 = &uf2->fields[i];
if (f1->tag != f2->tag) return false;
switch (f1->tag & 7) {
int wire_type = f1->tag & 7;
switch (wire_type) {
case UPB_WIRE_TYPE_VARINT:
if (f1->data.varint != f2->data.varint) return false;
break;
@ -172,12 +261,11 @@ static bool upb_UnknownFields_IsEqual(const upb_UnknownFields *uf1,
return true;
}
upb_UnknownCompareResult upb_Message_UnknownFieldsAreEqual(const upb_msg *msg1,
const upb_msg *msg2,
upb_UnknownCompareResult upb_Message_UnknownFieldsAreEqual(const char *buf1,
size_t size1,
const char *buf2,
size_t size2,
int max_depth) {
size_t size1, size2;
const char *buf1 = upb_msg_getunknown(msg1, &size1);
const char *buf2 = upb_msg_getunknown(msg2, &size2);
if (size1 == 0 && size2 == 0) return kUpb_UnknownCompareResult_Equal;
if (size1 == 0 || size2 == 0) return kUpb_UnknownCompareResult_NotEqual;
if (memcmp(buf1, buf2, size1) == 0) return kUpb_UnknownCompareResult_Equal;
@ -185,13 +273,29 @@ upb_UnknownCompareResult upb_Message_UnknownFieldsAreEqual(const upb_msg *msg1,
upb_UnknownField_Context ctx = {
.arena = upb_arena_new(),
.depth = max_depth,
.tmp = NULL,
.tmp_size = 0,
};
if (!ctx.arena) return kUpb_UnknownCompareResult_OutOfMemory;
upb_UnknownFields *uf1 = upb_UnknownFields_Build(&ctx, buf1, size1);
upb_UnknownFields *uf2 = upb_UnknownFields_Build(&ctx, buf2, size2);
bool ret = upb_UnknownFields_IsEqual(uf1, uf2);
int ret = UPB_SETJMP(ctx.err);
if (UPB_LIKELY(ret == 0)) {
// First build both unknown fields into a sorted data structure (similar
// to the UnknownFieldSet in C++).
upb_UnknownFields *uf1 = upb_UnknownFields_Build(&ctx, buf1, size1);
upb_UnknownFields *uf2 = upb_UnknownFields_Build(&ctx, buf2, size2);
// Now perform the equality check on the sorted structures.
if (upb_UnknownFields_IsEqual(uf1, uf2)) {
ret = kUpb_UnknownCompareResult_Equal;
} else {
ret = kUpb_UnknownCompareResult_NotEqual;
}
}
upb_arena_free(ctx.arena);
return ret ? kUpb_UnknownCompareResult_Equal
: kUpb_UnknownCompareResult_NotEqual;
free(ctx.tmp);
return ret;
}

@ -37,21 +37,26 @@ extern "C" {
// Returns true if unknown fields from the two messages are equal when sorted
// and varints are made canonical.
//
// These semantics are unfortunate, as the comparison is lossy without schema
// data:
// This operation should be considered best effort; the comparison is inherently
// lossy without schema data:
//
// 1. We don't know whether delimited fields are sub-messages. Unknown
// sub-messages will therefore not have their fields sorted and varints
// canonicalized.
// 2. We don't know about oneof/non-repeated fields, which should semantically
// discard every value except the last.
typedef enum {
kUpb_UnknownCompareResult_Equal = 0,
kUpb_UnknownCompareResult_NotEqual = 1,
kUpb_UnknownCompareResult_OutOfMemory = 2,
kUpb_UnknownCompareResult_MaxDepthExceeded = 3,
} upb_UnknownCompareResult;
upb_UnknownCompareResult upb_Message_UnknownFieldsAreEqual(const upb_msg *msg1,
const upb_msg *msg2,
upb_UnknownCompareResult upb_Message_UnknownFieldsAreEqual(const char *buf1,
size_t size1,
const char *buf2,
size_t size2,
int max_depth);
#ifdef __cplusplus

@ -0,0 +1,255 @@
/*
* Copyright (c) 2009-2021, Google LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Google LLC nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "upb/util/compare.h"
#include "absl/strings/string_view.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include <stdint.h>
#include <vector>
#include <string_view>
struct UnknownField;
using UnknownFields = std::vector<UnknownField>;
enum class UnknownFieldType {
kVarint,
kLongVarint, // Over-encoded to have distinct wire format.
kDelimited,
kFixed64,
kFixed32,
kGroup,
};
union UnknownFieldValue {
uint64_t varint;
uint64_t fixed64;
uint32_t fixed32;
// NULL-terminated (strings must not have embedded NULL).
const char* delimited;
UnknownFields* group;
};
struct TypeAndValue {
UnknownFieldType type;
UnknownFieldValue value;
};
struct UnknownField {
uint32_t field_number;
TypeAndValue value;
};
TypeAndValue Varint(uint64_t val) {
TypeAndValue ret{UnknownFieldType::kVarint};
ret.value.varint = val;
return ret;
}
TypeAndValue LongVarint(uint64_t val) {
TypeAndValue ret{UnknownFieldType::kLongVarint};
ret.value.varint = val;
return ret;
}
TypeAndValue Fixed64(uint64_t val) {
TypeAndValue ret{UnknownFieldType::kFixed64};
ret.value.fixed64 = val;
return ret;
}
TypeAndValue Fixed32(uint32_t val) {
TypeAndValue ret{UnknownFieldType::kFixed32};
ret.value.fixed32 = val;
return ret;
}
TypeAndValue Delimited(const char* val) {
TypeAndValue ret{UnknownFieldType::kDelimited};
ret.value.delimited = val;
return ret;
}
TypeAndValue Group(UnknownFields nested) {
TypeAndValue ret{UnknownFieldType::kGroup};
ret.value.group = &nested;
return ret;
}
void f() {
auto fields = UnknownFields{
{1, Varint(1)},
{2, Fixed64(2)},
};
auto fields2 = UnknownFields{
{1, Group({
{2, Group({
{3, Varint(1)},
})},
})},
};
}
void EncodeVarint(uint64_t val, std::string* str) {
do {
char byte = val & 0x7fU;
val >>= 7;
if (val) byte |= 0x80U;
str->push_back(byte);
} while (val);
}
std::string ToBinaryPayload(const UnknownFields& fields) {
static const upb_wiretype_t wire_types[] = {
UPB_WIRE_TYPE_VARINT,
UPB_WIRE_TYPE_VARINT,
UPB_WIRE_TYPE_DELIMITED,
UPB_WIRE_TYPE_64BIT,
UPB_WIRE_TYPE_32BIT,
UPB_WIRE_TYPE_START_GROUP,
};
std::string ret;
for (const auto& field : fields) {
uint32_t tag = field.field_number << 3 |
(wire_types[static_cast<int>(field.value.type)]);
EncodeVarint(tag, &ret);
switch (field.value.type) {
case UnknownFieldType::kVarint:
EncodeVarint(field.value.value.varint, &ret);
break;
case UnknownFieldType::kLongVarint:
EncodeVarint(field.value.value.varint, &ret);
ret.back() |= 0x80;
ret.push_back(0);
break;
case UnknownFieldType::kDelimited:
EncodeVarint(strlen(field.value.value.delimited), &ret);
ret.append(field.value.value.delimited);
break;
case UnknownFieldType::kFixed64: {
uint64_t val = _upb_be_swap64(field.value.value.fixed64);
ret.append(reinterpret_cast<const char*>(&val), sizeof(val));
break;
}
case UnknownFieldType::kFixed32: {
uint32_t val = _upb_be_swap32(field.value.value.fixed32);
ret.append(reinterpret_cast<const char*>(&val), sizeof(val));
break;
}
case UnknownFieldType::kGroup: {
uint32_t end_tag = field.field_number << 3 | UPB_WIRE_TYPE_END_GROUP;
ret.append(ToBinaryPayload(*field.value.value.group));
EncodeVarint(end_tag, &ret);
break;
}
}
}
return ret;
}
upb_UnknownCompareResult CompareUnknownWithMaxDepth(UnknownFields uf1,
UnknownFields uf2,
int max_depth) {
std::string buf1 = ToBinaryPayload(uf1);
std::string buf2 = ToBinaryPayload(uf2);
return upb_Message_UnknownFieldsAreEqual(buf1.data(), buf1.size(),
buf2.data(), buf2.size(), max_depth);
}
upb_UnknownCompareResult CompareUnknown(UnknownFields uf1, UnknownFields uf2) {
return CompareUnknownWithMaxDepth(uf1, uf2, 64);
}
TEST(CompareTest, UnknownFieldsReflexive) {
EXPECT_EQ(kUpb_UnknownCompareResult_Equal, CompareUnknown({}, {}));
EXPECT_EQ(kUpb_UnknownCompareResult_Equal,
CompareUnknown({{1, Varint(123)}, {2, Fixed32(456)}},
{{1, Varint(123)}, {2, Fixed32(456)}}));
EXPECT_EQ(
kUpb_UnknownCompareResult_Equal,
CompareUnknown(
{{1, Group({{2, Group({{3, Fixed32(456)}, {4, Fixed64(123)}})}})}},
{{1, Group({{2, Group({{3, Fixed32(456)}, {4, Fixed64(123)}})}})}}));
}
TEST(CompareTest, UnknownFieldsOrdering) {
EXPECT_EQ(kUpb_UnknownCompareResult_Equal,
CompareUnknown({{1, Varint(111)},
{2, Delimited("ABC")},
{3, Fixed32(456)},
{4, Fixed64(123)},
{5, Group({})}},
{{5, Group({})},
{4, Fixed64(123)},
{3, Fixed32(456)},
{2, Delimited("ABC")},
{1, Varint(111)}}));
EXPECT_EQ(kUpb_UnknownCompareResult_NotEqual,
CompareUnknown({{1, Varint(111)},
{2, Delimited("ABC")},
{3, Fixed32(456)},
{4, Fixed64(123)},
{5, Group({})}},
{{5, Group({})},
{4, Fixed64(123)},
{3, Fixed32(455)}, // Small difference.
{2, Delimited("ABC")},
{1, Varint(111)}}));
EXPECT_EQ(kUpb_UnknownCompareResult_Equal,
CompareUnknown({{3, Fixed32(456)}, {4, Fixed64(123)}},
{{4, Fixed64(123)}, {3, Fixed32(456)}}));
EXPECT_EQ(
kUpb_UnknownCompareResult_Equal,
CompareUnknown(
{{1, Group({{2, Group({{3, Fixed32(456)}, {4, Fixed64(123)}})}})}},
{{1, Group({{2, Group({{4, Fixed64(123)}, {3, Fixed32(456)}})}})}}));
}
TEST(CompareTest, LongVarint) {
EXPECT_EQ(kUpb_UnknownCompareResult_Equal,
CompareUnknown({{1, LongVarint(123)}, {2, LongVarint(456)}},
{{1, Varint(123)}, {2, Varint(456)}}));
EXPECT_EQ(kUpb_UnknownCompareResult_Equal,
CompareUnknown({{2, LongVarint(456)}, {1, LongVarint(123)}},
{{1, Varint(123)}, {2, Varint(456)}}));
}
TEST(CompareTest, MaxDepth) {
EXPECT_EQ(
kUpb_UnknownCompareResult_MaxDepthExceeded,
CompareUnknownWithMaxDepth(
{{1, Group({{2, Group({{3, Fixed32(456)}, {4, Fixed64(123)}})}})}},
{{1, Group({{2, Group({{4, Fixed64(123)}, {3, Fixed32(456)}})}})}},
2));
}
Loading…
Cancel
Save