diff --git a/upb/util/BUILD b/upb/util/BUILD index 94eb9d86c0..48657a3167 100644 --- a/upb/util/BUILD +++ b/upb/util/BUILD @@ -5,6 +5,8 @@ load( "upb_proto_reflection_library", ) +# Def to Proto + cc_library( name = "def_to_proto", srcs = ["def_to_proto.c"], @@ -47,6 +49,8 @@ cc_test( ], ) +# Required fields + cc_library( name = "required_fields", srcs = ["required_fields.c"], @@ -82,3 +86,12 @@ cc_test( ":required_fields_test_upb_proto_reflection", ], ) + +# Compare + +cc_library( + name = "compare", + srcs = ["compare.c"], + hdrs = ["compare.h"], + deps = ["//:reflection"], +) diff --git a/upb/util/compare.c b/upb/util/compare.c new file mode 100644 index 0000000000..32588a42bc --- /dev/null +++ b/upb/util/compare.c @@ -0,0 +1,197 @@ + +#include "upb/util/compare.h" + +#include +#include + +#include "upb/port_def.inc" + +struct upb_UnknownFields; +typedef struct upb_UnknownFields upb_UnknownFields; + +typedef struct { + uint32_t tag; + union { + uint64_t varint; + uint64_t uint64; + uint32_t uint32; + upb_strview delimited; + upb_UnknownFields* group; + } data; +} upb_UnknownField; + +struct upb_UnknownFields { + size_t size; + size_t capacity; + upb_UnknownField* fields; +}; + +typedef struct { + const char *end; + upb_arena *arena; + int depth; + jmp_buf err; +} upb_UnknownField_Context; + +static void upb_UnknownFields_Grow(upb_UnknownField_Context *ctx, + upb_UnknownField **base, + upb_UnknownField **ptr, + upb_UnknownField **end) { + size_t old = (*ptr - *base); + size_t new = UPB_MAX(4, old * 2); + + *base = upb_arena_realloc(ctx->arena, *base, old * sizeof(*base), + new * sizeof(*base)); + if (!*base) UPB_LONGJMP(ctx->err, kUpb_UnknownCompareResult_OutOfMemory); + + *ptr = *base + old; + *end = *base + new; +} + +static const char *upb_UnknownFields_ParseVarint(const char *ptr, + const char *limit, + uint64_t *val) { + uint8_t byte; + int bitpos = 0; + *val = 0; + + do { + // Unknown field data must be valid. + UPB_ASSERT(bitpos < 70 && ptr < limit); + byte = *ptr; + *val |= (uint64_t)(byte & 0x7F) << bitpos; + ptr++; + bitpos += 7; + } while (byte & 0x80); + + return ptr; +} + +static upb_UnknownFields *upb_UnknownFields_DoBuild( + upb_UnknownField_Context *ctx, const char **buf) { + upb_UnknownField *arr_base = NULL; + upb_UnknownField *arr_ptr = NULL; + upb_UnknownField *arr_end = NULL; + const char *ptr = *buf; + while (ptr < ctx->end) { + if (arr_ptr == arr_end) { + upb_UnknownFields_Grow(ctx, &arr_base, &arr_ptr, &arr_end); + } + upb_UnknownField *field = arr_ptr; + arr_ptr++; + uint64_t val; + ptr = upb_UnknownFields_ParseVarint(ptr, ctx->end, &val); + UPB_ASSERT(val <= UINT32_MAX); + field->tag = val; + switch (field->tag & 7) { + case UPB_WIRE_TYPE_VARINT: + ptr = upb_UnknownFields_ParseVarint(ptr, ctx->end, &field->data.varint); + break; + case UPB_WIRE_TYPE_64BIT: + UPB_ASSERT(ctx->end - ptr >= 8); + memcpy(&field->data.uint64, ptr, 8); + ptr += 8; + break; + case UPB_WIRE_TYPE_32BIT: + UPB_ASSERT(ctx->end - ptr >= 4); + memcpy(&field->data.uint32, ptr, 4); + ptr += 8; + break; + case UPB_WIRE_TYPE_DELIMITED: { + uint64_t size; + ptr = upb_UnknownFields_ParseVarint(ptr, ctx->end, &size); + UPB_ASSERT(ctx->end - ptr >= size); + field->data.delimited.data = ptr; + field->data.delimited.size = size; + break; + } + case UPB_WIRE_TYPE_START_GROUP: + if (--ctx->depth == 0) { + UPB_LONGJMP(ctx->err, kUpb_UnknownCompareResult_MaxDepthExceeded); + } + field->data.group = upb_UnknownFields_DoBuild(ctx, &ptr); + ctx->depth++; + break; + case UPB_WIRE_TYPE_END_GROUP: + goto done; + default: + UPB_UNREACHABLE(); + } + } + +done: + *buf = ptr; + upb_UnknownFields *ret = upb_arena_malloc(ctx->arena, sizeof(*ret)); + if (!ret) UPB_LONGJMP(ctx->err, kUpb_UnknownCompareResult_OutOfMemory); + ret->fields = arr_base; + ret->size = arr_ptr - arr_base; + ret->capacity = arr_end - arr_base; + return ret; +} + +static upb_UnknownFields *upb_UnknownFields_Build(upb_UnknownField_Context *ctx, + const char *buf, + size_t size) { + ctx->end = buf + size; + upb_UnknownFields *fields = upb_UnknownFields_DoBuild(ctx, &buf); + UPB_ASSERT(buf == ctx->end); + return fields; +} + +static bool upb_UnknownFields_IsEqual(const upb_UnknownFields *uf1, + const upb_UnknownFields *uf2) { + if (uf1->size != uf2->size) return false; + for (size_t i = 0, n = uf1->size; i < n; i++) { + upb_UnknownField *f1 = &uf1->fields[i]; + upb_UnknownField *f2 = &uf2->fields[i]; + if (f1->tag != f2->tag) return false; + switch (f1->tag & 7) { + case UPB_WIRE_TYPE_VARINT: + if (f1->data.varint != f2->data.varint) return false; + break; + case UPB_WIRE_TYPE_64BIT: + if (f1->data.uint64 != f2->data.uint64) return false; + break; + case UPB_WIRE_TYPE_32BIT: + if (f1->data.uint32 != f2->data.uint32) return false; + break; + case UPB_WIRE_TYPE_DELIMITED: + if (!upb_strview_eql(f1->data.delimited, f2->data.delimited)) { + return false; + } + break; + case UPB_WIRE_TYPE_START_GROUP: + if (!upb_UnknownFields_IsEqual(f1->data.group, f2->data.group)) { + return false; + } + break; + default: + UPB_UNREACHABLE(); + } + } + return true; +} + +upb_UnknownCompareResult upb_Message_UnknownFieldsAreEqual(const upb_msg *msg1, + const upb_msg *msg2, + int max_depth) { + size_t size1, size2; + const char *buf1 = upb_msg_getunknown(msg1, &size1); + const char *buf2 = upb_msg_getunknown(msg2, &size2); + if (size1 == 0 && size2 == 0) return kUpb_UnknownCompareResult_Equal; + if (size1 == 0 || size2 == 0) return kUpb_UnknownCompareResult_NotEqual; + if (memcmp(buf1, buf2, size1) == 0) return kUpb_UnknownCompareResult_Equal; + + upb_UnknownField_Context ctx = { + .arena = upb_arena_new(), + .depth = max_depth, + }; + if (!ctx.arena) return kUpb_UnknownCompareResult_OutOfMemory; + + upb_UnknownFields *uf1 = upb_UnknownFields_Build(&ctx, buf1, size1); + upb_UnknownFields *uf2 = upb_UnknownFields_Build(&ctx, buf2, size2); + bool ret = upb_UnknownFields_IsEqual(uf1, uf2); + upb_arena_free(ctx.arena); + return ret ? kUpb_UnknownCompareResult_Equal + : kUpb_UnknownCompareResult_NotEqual; +} diff --git a/upb/util/compare.h b/upb/util/compare.h new file mode 100644 index 0000000000..944e71683a --- /dev/null +++ b/upb/util/compare.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UPB_UTIL_COMPARE_H_ +#define UPB_UTIL_COMPARE_H_ + +#include "upb/def.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Returns true if unknown fields from the two messages are equal when sorted +// and varints are made canonical. +// +// These semantics are unfortunate, as the comparison is lossy without schema +// data: +// 1. We don't know whether delimited fields are sub-messages. Unknown +// sub-messages will therefore not have their fields sorted and varints +// canonicalized. +// 2. We don't know about oneof/non-repeated fields, which should semantically +// discard every value except the last. +typedef enum { + kUpb_UnknownCompareResult_Equal = 0, + kUpb_UnknownCompareResult_NotEqual = 1, + kUpb_UnknownCompareResult_OutOfMemory = 2, + kUpb_UnknownCompareResult_MaxDepthExceeded = 3, +} upb_UnknownCompareResult; +upb_UnknownCompareResult upb_Message_UnknownFieldsAreEqual(const upb_msg *msg1, + const upb_msg *msg2, + int max_depth); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UPB_UTIL_COMPARE_H_ */