Merge pull request #451 from haberman/python-convert

Conversion functions between Python and upb data types
pull/13171/head
Joshua Haberman 3 years ago committed by GitHub
commit 272614dfd7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 4
      .bazelrc
  2. 3
      python/BUILD
  3. 356
      python/convert.c
  4. 62
      python/convert.h
  5. 6
      upb/def.c
  6. 1
      upb/def.h
  7. 24
      upb/util/BUILD
  8. 301
      upb/util/compare.c
  9. 66
      upb/util/compare.h
  10. 239
      upb/util/compare_test.cc

@ -6,7 +6,7 @@ build --extra_toolchains=@system_python//:python_toolchain
# Use our custom-configured c++ toolchain.
build:m32 --copt=-m32 --linkopt=-m32
build:asan --copt=-fsanitize=address --linkopt=-fsanitize=address
build:asan --copt=-fsanitize=address --linkopt=-fsanitize=address --copt=-D__SANITIZE_ADDRESS__=1
# For Valgrind, we have to disable checks of "possible" leaks because the Python
# interpreter does the sorts of things that flag Valgrind "possible" leak checks.
@ -14,7 +14,7 @@ build:asan --copt=-fsanitize=address --linkopt=-fsanitize=address
# know of an easy way to do that.
#
# We also have to disable pymalloc to avoid triggering Valgrind.
build:valgrind --run_under='valgrind --leak-check=full --trace-children=yes --show-possibly-lost=no --errors-for-leak-kinds=definite --error-exitcode=1' --action_env=PYTHONMALLOC=malloc
build:valgrind --run_under='valgrind --leak-check=full --track-origins=yes --trace-children=yes --show-possibly-lost=no --errors-for-leak-kinds=definite --error-exitcode=1' --action_env=PYTHONMALLOC=malloc
build:ubsan --copt=-fsanitize=undefined --linkopt=-fsanitize=undefined --action_env=UBSAN_OPTIONS=halt_on_error=1:print_stacktrace=1
# Workaround for the fact that Bazel links with $CC, not $CXX

@ -35,6 +35,8 @@ load(
cc_binary(
name = "message",
srcs = [
"convert.c",
"convert.h",
"descriptor.c",
"descriptor.h",
"descriptor_containers.c",
@ -61,6 +63,7 @@ cc_binary(
":version_script.lds",
"//:reflection",
"//:upb",
"//upb/util:compare",
"@system_python//:python_headers",
],
)

@ -0,0 +1,356 @@
/*
* Copyright (c) 2009-2021, Google LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Google LLC nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "python/convert.h"
#include "python/protobuf.h"
#include "upb/reflection.h"
#include "upb/util/compare.h"
PyObject* PyUpb_UpbToPy(upb_msgval val, const upb_fielddef *f, PyObject *arena) {
switch (upb_fielddef_type(f)) {
case UPB_TYPE_ENUM:
case UPB_TYPE_INT32:
return PyLong_FromLong(val.int32_val);
case UPB_TYPE_INT64:
return PyLong_FromLongLong(val.int64_val);
case UPB_TYPE_UINT32:
return PyLong_FromSize_t(val.uint32_val);
case UPB_TYPE_UINT64:
return PyLong_FromUnsignedLongLong(val.uint64_val);
case UPB_TYPE_FLOAT:
return PyFloat_FromDouble(val.float_val);
case UPB_TYPE_DOUBLE:
return PyFloat_FromDouble(val.double_val);
case UPB_TYPE_BOOL:
return PyBool_FromLong(val.bool_val);
case UPB_TYPE_BYTES:
return PyBytes_FromStringAndSize(val.str_val.data, val.str_val.size);
case UPB_TYPE_STRING:
return PyUnicode_DecodeUTF8(val.str_val.data, val.str_val.size, NULL);
case UPB_TYPE_MESSAGE:
PyErr_Format(PyExc_NotImplementedError,
"Conversion of message types not yet implemented");
return NULL;
default:
PyErr_Format(PyExc_SystemError,
"Getting a value from a field of unknown type %d",
upb_fielddef_type(f));
return NULL;
}
}
static bool PyUpb_GetInt64(PyObject *obj, int64_t *val) {
// If the value is already a Python long, PyLong_AsLongLong() retrieves it.
// Otherwise it performs any automatic conversions to long (using __index__()
// or __int__()) that users expect.
*val = PyLong_AsLongLong(obj);
if (!PyErr_Occurred()) return true;
if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
// Rewrite OverflowError -> ValueError.
// But don't rewrite other errors such as TypeError.
PyErr_Clear();
PyErr_Format(PyExc_ValueError, "Value out of range: %S", obj);
}
return false;
}
static bool PyUpb_GetUint64(PyObject *obj, uint64_t *val) {
// For uint64 Python does not offer any functions as convenient as
// PyLong_AsLongLong(). If the object is not already a "long" we must
// manually perform the automatic conversion (using __index__() or __int__())
// that users expect.
if (PyLong_Check(obj)) {
*val = PyLong_AsUnsignedLongLong(obj);
} else {
PyObject* casted = PyNumber_Long(obj);
if (!casted) return false;
*val = PyLong_AsUnsignedLongLong(casted);
Py_DECREF(casted);
}
if (!PyErr_Occurred()) return true;
PyErr_Clear();
PyErr_Format(PyExc_ValueError, "Value out of range: %S", obj);
return false;
}
static bool PyUpb_GetInt32(PyObject *obj, int32_t *val) {
int64_t i64;
if (!PyUpb_GetInt64(obj, &i64)) return false;
if (i64 < INT32_MIN || i64 > INT32_MAX) {
PyErr_Format(PyExc_ValueError, "Value out of range: %S", obj);
return false;
}
*val = i64;
return true;
}
static bool PyUpb_GetUint32(PyObject *obj, uint32_t *val) {
uint64_t u64;
if (!PyUpb_GetUint64(obj, &u64)) return false;
if (u64 > UINT32_MAX) {
PyErr_Format(PyExc_ValueError, "Value out of range: %S", obj);
return false;
}
*val = u64;
return true;
}
// If `arena` is specified, copies the string data into the given arena.
// Otherwise aliases the given data.
static upb_msgval PyUpb_MaybeCopyString(const char *ptr, size_t size,
upb_arena *arena) {
upb_msgval ret;
ret.str_val.size = size;
if (arena) {
char *buf = upb_arena_malloc(arena, size);
memcpy(buf, ptr, size);
ret.str_val.data = buf;
} else {
ret.str_val.data = ptr;
}
return ret;
}
static bool PyUpb_PyToUpbEnum(PyObject *obj, const upb_enumdef *e,
upb_msgval *val) {
if (PyUnicode_Check(obj)) {
Py_ssize_t size;
const char *name = PyUnicode_AsUTF8AndSize(obj, &size);
const upb_enumvaldef *ev = upb_enumdef_lookupname(e, name, size);
if (!ev) {
PyErr_Format(PyExc_ValueError, "unknown enum label \"%s\"", name);
return false;
}
val->int32_val = upb_enumvaldef_number(ev);
return true;
} else {
int32_t i32;
if (!PyUpb_GetInt32(obj, &i32)) return false;
if (upb_filedef_syntax(upb_enumdef_file(e)) == UPB_SYNTAX_PROTO2 &&
!upb_enumdef_checknum(e, i32)) {
PyErr_Format(PyExc_ValueError, "invalid enumerator %d", (int)i32);
return false;
}
val->int32_val = i32;
return true;
}
}
bool PyUpb_PyToUpb(PyObject *obj, const upb_fielddef *f, upb_msgval *val,
upb_arena *arena) {
switch (upb_fielddef_type(f)) {
case UPB_TYPE_ENUM:
return PyUpb_PyToUpbEnum(obj, upb_fielddef_enumsubdef(f), val);
case UPB_TYPE_INT32:
return PyUpb_GetInt32(obj, &val->int32_val);
case UPB_TYPE_INT64:
return PyUpb_GetInt64(obj, &val->int64_val);
case UPB_TYPE_UINT32:
return PyUpb_GetUint32(obj, &val->uint32_val);
case UPB_TYPE_UINT64:
return PyUpb_GetUint64(obj, &val->uint64_val);
case UPB_TYPE_FLOAT:
val->float_val = PyFloat_AsDouble(obj);
return !PyErr_Occurred();
case UPB_TYPE_DOUBLE:
val->double_val = PyFloat_AsDouble(obj);
return !PyErr_Occurred();
case UPB_TYPE_BOOL:
val->bool_val = PyLong_AsLong(obj);
return !PyErr_Occurred();
case UPB_TYPE_BYTES: {
char *ptr;
Py_ssize_t size;
if (PyBytes_AsStringAndSize(obj, &ptr, &size) < 0) return false;
*val = PyUpb_MaybeCopyString(ptr, size, arena);
return true;
}
case UPB_TYPE_STRING: {
Py_ssize_t size;
const char *ptr;
PyObject *unicode = NULL;
if (PyBytes_Check(obj)) {
unicode = obj = PyUnicode_FromEncodedObject(obj, "utf-8", NULL);
if (!obj) return false;
}
ptr = PyUnicode_AsUTF8AndSize(obj, &size);
if (PyErr_Occurred()) {
Py_XDECREF(unicode);
return false;
}
*val = PyUpb_MaybeCopyString(ptr, size, arena);
Py_XDECREF(unicode);
return true;
}
case UPB_TYPE_MESSAGE:
PyErr_Format(
PyExc_ValueError, "Message objects may not be assigned",
upb_fielddef_type(f));
return false;
default:
PyErr_Format(
PyExc_SystemError, "Getting a value from a field of unknown type %d",
upb_fielddef_type(f));
return false;
}
}
bool PyUpb_Message_IsEqual(const upb_msg *msg1, const upb_msg *msg2,
const upb_msgdef *m);
// -----------------------------------------------------------------------------
// Equal
// -----------------------------------------------------------------------------
bool PyUpb_ValueEq(upb_msgval val1, upb_msgval val2, const upb_fielddef *f) {
switch (upb_fielddef_type(f)) {
case UPB_TYPE_BOOL:
return val1.bool_val == val2.bool_val;
case UPB_TYPE_INT32:
case UPB_TYPE_UINT32:
case UPB_TYPE_ENUM:
return val1.int32_val == val2.int32_val;
case UPB_TYPE_INT64:
case UPB_TYPE_UINT64:
return val1.int64_val == val2.int64_val;
case UPB_TYPE_FLOAT:
return val1.float_val == val2.float_val;
case UPB_TYPE_DOUBLE:
return val1.double_val == val2.double_val;
case UPB_TYPE_STRING:
case UPB_TYPE_BYTES:
return val1.str_val.size == val2.str_val.size &&
memcmp(val1.str_val.data, val2.str_val.data, val1.str_val.size) == 0;
case UPB_TYPE_MESSAGE:
return PyUpb_Message_IsEqual(val1.msg_val, val2.msg_val,
upb_fielddef_msgsubdef(f));
default:
return false;
}
}
bool PyUpb_Map_IsEqual(const upb_map *map1, const upb_map *map2,
const upb_fielddef *f) {
assert(upb_fielddef_ismap(f));
if (map1 == map2) return true;
size_t size1 = map1 ? upb_map_size(map1) : 0;
size_t size2 = map2 ? upb_map_size(map2) : 0;
if (size1 != size2) return false;
if (size1 == 0) return true;
const upb_msgdef *entry_m = upb_fielddef_msgsubdef(f);
const upb_fielddef *val_f = upb_msgdef_field(entry_m, 1);
size_t iter = UPB_MAP_BEGIN;
while (upb_mapiter_next(map1, &iter)) {
upb_msgval key = upb_mapiter_key(map1, iter);
upb_msgval val1 = upb_mapiter_value(map1, iter);
upb_msgval val2;
if (!upb_map_get(map2, key, &val2)) return false;
if (!PyUpb_ValueEq(val1, val2, val_f)) return false;
}
return true;
}
static bool PyUpb_ArrayElem_IsEqual(const upb_array *arr1,
const upb_array *arr2, size_t i,
const upb_fielddef *f) {
assert(i < upb_array_size(arr1));
assert(i < upb_array_size(arr2));
upb_msgval val1 = upb_array_get(arr1, i);
upb_msgval val2 = upb_array_get(arr2, i);
return PyUpb_ValueEq(val1, val2, f);
}
bool PyUpb_Array_IsEqual(const upb_array *arr1, const upb_array *arr2,
const upb_fielddef *f) {
assert(upb_fielddef_isseq(f) && !upb_fielddef_ismap(f));
if (arr1 == arr2) return true;
size_t n1 = arr1 ? upb_array_size(arr1) : 0;
size_t n2 = arr2 ? upb_array_size(arr2) : 0;
if (n1 != n2) return false;
// Half the length rounded down. Important: the empty list rounds to 0.
size_t half = n1 / 2;
// Search from the ends-in. We expect differences to more quickly manifest
// at the ends than in the middle. If the length is odd we will miss the
// middle element.
for (size_t i = 0; i < half; i++) {
if (!PyUpb_ArrayElem_IsEqual(arr1, arr2, i, f)) return false;
if (!PyUpb_ArrayElem_IsEqual(arr1, arr2, n1 - 1 - i, f)) return false;
}
// For an odd-lengthed list, pick up the middle element.
if (n1 & 1) {
if (!PyUpb_ArrayElem_IsEqual(arr1, arr2, half, f)) return false;
}
return true;
}
bool PyUpb_Message_IsEqual(const upb_msg *msg1, const upb_msg *msg2,
const upb_msgdef *m) {
size_t iter1 = UPB_MSG_BEGIN;
size_t iter2 = UPB_MSG_BEGIN;
if (msg1 == msg2) return true;
while (true) {
const upb_fielddef *f1, *f2;
upb_msgval val1, val2;
bool ok1 = msg1 && upb_msg_next(msg1, m, NULL, &f1, &val1, &iter1);
bool ok2 = msg2 && upb_msg_next(msg2, m, NULL, &f2, &val2, &iter2);
if (ok1 != ok2) return false;
if (!ok1) break; // Both messages are at end.
// If the two messages yielded different "next" fields, then the set of
// present fields is different.
if (f1 != f2) return false;
if (upb_fielddef_ismap(f1)) {
if (!PyUpb_Map_IsEqual(val1.map_val, val2.map_val, f1)) return false;
} else if (upb_fielddef_isseq(f1)) {
if (!PyUpb_Array_IsEqual(val1.array_val, val2.array_val, f1)) {
return false;
}
} else {
if (!PyUpb_ValueEq(val1, val2, f1)) return false;
}
}
size_t usize1, usize2;
const char *uf1 = upb_msg_getunknown(msg1, &usize1);
const char *uf2 = upb_msg_getunknown(msg2, &usize2);
// 100 is arbitrary, we're trying to prevent stack overflow but it's not
// obvious how deep we should allow here.
return upb_Message_UnknownFieldsAreEqual(uf1, usize1, uf2, usize2, 100);
}

@ -0,0 +1,62 @@
/*
* Copyright (c) 2009-2021, Google LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Google LLC nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef PYUPB_CONVERT_H__
#define PYUPB_CONVERT_H__
#include "upb/def.h"
#include "upb/reflection.h"
#include "protobuf.h"
// Converts `val` to a Python object according to the type information in `f`.
// Any newly-created Python objects that reference non-primitive data from `val`
// will take a reference on `arena`; the caller must ensure that `val` belongs
// to `arena`. If the conversion cannot be performed, returns NULL and sets a
// Python error.
PyObject *PyUpb_UpbToPy(upb_msgval val, const upb_fielddef *f, PyObject *arena);
// Converts `obj` to a upb_msgval `*val` according to the type information in
// `f`. If `arena` is provided, any string data will be copied into `arena`,
// otherwise the returned value will alias the Python-owned data (this can be
// useful for an ephemeral upb_msgval). If the conversion cannot be performed,
// returns false.
bool PyUpb_PyToUpb(PyObject *obj, const upb_fielddef *f, upb_msgval *val,
upb_arena *arena);
// Returns true if the given values (of type `f`) are equal.
bool PyUpb_ValueEq(upb_msgval val1, upb_msgval val2, const upb_fielddef *f);
// Returns true if the given messages (of type `m`) are equal.
bool PyUpb_Message_IsEqual(const upb_msg *msg1, const upb_msg *msg2,
const upb_msgdef *m);
// Returns true if the two arrays (with element type `f`) are equal.
bool PyUpb_Array_IsEqual(const upb_array *arr1, const upb_array *arr2,
const upb_fielddef *f);
#endif // PYUPB_CONVERT_H__

@ -394,6 +394,12 @@ const upb_enumvaldef *upb_enumdef_lookupnum(const upb_enumdef *def, int32_t num)
: NULL;
}
bool upb_enumdef_checknum(const upb_enumdef *e, int32_t num) {
// We could use upb_enumdef_lookupnum(e, num) != NULL, but we expect this to
// be faster (especially for small numbers).
return _upb_enumlayout_checkval(e->layout, num);
}
const upb_enumvaldef *upb_enumdef_value(const upb_enumdef *e, int i) {
UPB_ASSERT(0 <= i && i < e->value_count);
return &e->values[i];

@ -318,6 +318,7 @@ const upb_enumvaldef *upb_enumdef_value(const upb_enumdef *e, int i);
const upb_enumvaldef *upb_enumdef_lookupname(const upb_enumdef *e,
const char *name, size_t len);
const upb_enumvaldef *upb_enumdef_lookupnum(const upb_enumdef *e, int32_t num);
bool upb_enumdef_checknum(const upb_enumdef *e, int32_t num);
/* DEPRECATED, slated for removal */
int upb_enumdef_numvals(const upb_enumdef *e);

@ -5,6 +5,8 @@ load(
"upb_proto_reflection_library",
)
# Def to Proto
cc_library(
name = "def_to_proto",
srcs = ["def_to_proto.c"],
@ -47,6 +49,8 @@ cc_test(
],
)
# Required fields
cc_library(
name = "required_fields",
srcs = ["required_fields.c"],
@ -82,3 +86,23 @@ cc_test(
":required_fields_test_upb_proto_reflection",
],
)
# Compare
cc_library(
name = "compare",
srcs = ["compare.c"],
hdrs = ["compare.h"],
deps = ["//:reflection"],
visibility = ["//visibility:public"],
)
cc_test(
name = "compare_test",
srcs = ["compare_test.cc"],
deps = [
"@com_google_absl//absl/strings",
"@com_google_googletest//:gtest_main",
":compare",
],
)

@ -0,0 +1,301 @@
/*
* Copyright (c) 2009-2021, Google LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Google LLC nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "upb/util/compare.h"
#include <stdbool.h>
#include <setjmp.h>
#include "upb/port_def.inc"
struct upb_UnknownFields;
typedef struct upb_UnknownFields upb_UnknownFields;
typedef struct {
uint32_t tag;
union {
uint64_t varint;
uint64_t uint64;
uint32_t uint32;
upb_strview delimited;
upb_UnknownFields* group;
} data;
} upb_UnknownField;
struct upb_UnknownFields {
size_t size;
size_t capacity;
upb_UnknownField* fields;
};
typedef struct {
const char *end;
upb_arena *arena;
upb_UnknownField *tmp;
size_t tmp_size;
int depth;
jmp_buf err;
} upb_UnknownField_Context;
static void upb_UnknownFields_Grow(upb_UnknownField_Context *ctx,
upb_UnknownField **base,
upb_UnknownField **ptr,
upb_UnknownField **end) {
size_t old = (*ptr - *base);
size_t new = UPB_MAX(4, old * 2);
*base = upb_arena_realloc(ctx->arena, *base, old * sizeof(**base),
new * sizeof(**base));
if (!*base) UPB_LONGJMP(ctx->err, kUpb_UnknownCompareResult_OutOfMemory);
*ptr = *base + old;
*end = *base + new;
}
static const char *upb_UnknownFields_ParseVarint(const char *ptr,
const char *limit,
uint64_t *val) {
uint8_t byte;
int bitpos = 0;
*val = 0;
do {
// Unknown field data must be valid.
UPB_ASSERT(bitpos < 70 && ptr < limit);
byte = *ptr;
*val |= (uint64_t)(byte & 0x7F) << bitpos;
ptr++;
bitpos += 7;
} while (byte & 0x80);
return ptr;
}
// We have to implement our own sort here, since qsort() is not an in-order
// sort. Here we use merge sort, the simplest in-order sort.
static void upb_UnknownFields_Merge(upb_UnknownField *arr, size_t start,
size_t mid, size_t end,
upb_UnknownField *tmp) {
memcpy(tmp, &arr[start], (end - start) * sizeof(*tmp));
upb_UnknownField* ptr1 = tmp;
upb_UnknownField* end1 = &tmp[mid - start];
upb_UnknownField* ptr2 = &tmp[mid - start];
upb_UnknownField* end2 = &tmp[end - start];
upb_UnknownField* out = &arr[start];
while (ptr1 < end1 && ptr2 < end2) {
if (ptr1->tag <= ptr2->tag) {
*out++ = *ptr1++;
} else {
*out++ = *ptr2++;
}
}
if (ptr1 < end1) {
memcpy(out, ptr1, (end1 - ptr1) * sizeof(*out));
} else if (ptr2 < end2) {
memcpy(out, ptr1, (end2 - ptr2) * sizeof(*out));
}
}
static void upb_UnknownFields_SortRecursive(upb_UnknownField *arr,
size_t start, size_t end,
upb_UnknownField *tmp) {
if (end - start > 1) {
size_t mid = start + ((end - start) / 2);
upb_UnknownFields_SortRecursive(arr, start, mid, tmp);
upb_UnknownFields_SortRecursive(arr, mid, end, tmp);
upb_UnknownFields_Merge(arr, start, mid, end, tmp);
}
}
static void upb_UnknownFields_Sort(upb_UnknownField_Context *ctx,
upb_UnknownFields *fields) {
if (ctx->tmp_size < fields->size) {
ctx->tmp_size = UPB_MAX(8, ctx->tmp_size);
while (ctx->tmp_size < fields->size) ctx->tmp_size *= 2;
ctx->tmp = realloc(ctx->tmp, ctx->tmp_size * sizeof(*ctx->tmp));
}
upb_UnknownFields_SortRecursive(fields->fields, 0, fields->size, ctx->tmp);
}
static upb_UnknownFields *upb_UnknownFields_DoBuild(
upb_UnknownField_Context *ctx, const char **buf) {
upb_UnknownField *arr_base = NULL;
upb_UnknownField *arr_ptr = NULL;
upb_UnknownField *arr_end = NULL;
const char *ptr = *buf;
uint32_t last_tag = 0;
bool sorted = true;
while (ptr < ctx->end) {
uint64_t tag;
ptr = upb_UnknownFields_ParseVarint(ptr, ctx->end, &tag);
UPB_ASSERT(tag <= UINT32_MAX);
int wire_type = tag & 7;
if (wire_type == UPB_WIRE_TYPE_END_GROUP) break;
if (tag < last_tag) sorted = false;
last_tag = tag;
if (arr_ptr == arr_end) {
upb_UnknownFields_Grow(ctx, &arr_base, &arr_ptr, &arr_end);
}
upb_UnknownField *field = arr_ptr;
field->tag = tag;
arr_ptr++;
switch (wire_type) {
case UPB_WIRE_TYPE_VARINT:
ptr = upb_UnknownFields_ParseVarint(ptr, ctx->end, &field->data.varint);
break;
case UPB_WIRE_TYPE_64BIT:
UPB_ASSERT(ctx->end - ptr >= 8);
memcpy(&field->data.uint64, ptr, 8);
ptr += 8;
break;
case UPB_WIRE_TYPE_32BIT:
UPB_ASSERT(ctx->end - ptr >= 4);
memcpy(&field->data.uint32, ptr, 4);
ptr += 4;
break;
case UPB_WIRE_TYPE_DELIMITED: {
uint64_t size;
ptr = upb_UnknownFields_ParseVarint(ptr, ctx->end, &size);
UPB_ASSERT(ctx->end - ptr >= size);
field->data.delimited.data = ptr;
field->data.delimited.size = size;
ptr += size;
break;
}
case UPB_WIRE_TYPE_START_GROUP:
if (--ctx->depth == 0) {
UPB_LONGJMP(ctx->err, kUpb_UnknownCompareResult_MaxDepthExceeded);
}
field->data.group = upb_UnknownFields_DoBuild(ctx, &ptr);
ctx->depth++;
break;
default:
UPB_UNREACHABLE();
}
}
*buf = ptr;
upb_UnknownFields *ret = upb_arena_malloc(ctx->arena, sizeof(*ret));
if (!ret) UPB_LONGJMP(ctx->err, kUpb_UnknownCompareResult_OutOfMemory);
ret->fields = arr_base;
ret->size = arr_ptr - arr_base;
ret->capacity = arr_end - arr_base;
if (!sorted) {
upb_UnknownFields_Sort(ctx, ret);
}
return ret;
}
// Builds a upb_UnknownFields data structure from the binary data in buf.
static upb_UnknownFields *upb_UnknownFields_Build(upb_UnknownField_Context *ctx,
const char *buf,
size_t size) {
ctx->end = buf + size;
upb_UnknownFields *fields = upb_UnknownFields_DoBuild(ctx, &buf);
UPB_ASSERT(buf == ctx->end);
return fields;
}
// Compares two sorted upb_UnknwonFields structures for equality.
static bool upb_UnknownFields_IsEqual(const upb_UnknownFields *uf1,
const upb_UnknownFields *uf2) {
if (uf1->size != uf2->size) return false;
for (size_t i = 0, n = uf1->size; i < n; i++) {
upb_UnknownField *f1 = &uf1->fields[i];
upb_UnknownField *f2 = &uf2->fields[i];
if (f1->tag != f2->tag) return false;
int wire_type = f1->tag & 7;
switch (wire_type) {
case UPB_WIRE_TYPE_VARINT:
if (f1->data.varint != f2->data.varint) return false;
break;
case UPB_WIRE_TYPE_64BIT:
if (f1->data.uint64 != f2->data.uint64) return false;
break;
case UPB_WIRE_TYPE_32BIT:
if (f1->data.uint32 != f2->data.uint32) return false;
break;
case UPB_WIRE_TYPE_DELIMITED:
if (!upb_strview_eql(f1->data.delimited, f2->data.delimited)) {
return false;
}
break;
case UPB_WIRE_TYPE_START_GROUP:
if (!upb_UnknownFields_IsEqual(f1->data.group, f2->data.group)) {
return false;
}
break;
default:
UPB_UNREACHABLE();
}
}
return true;
}
upb_UnknownCompareResult upb_Message_UnknownFieldsAreEqual(const char *buf1,
size_t size1,
const char *buf2,
size_t size2,
int max_depth) {
if (size1 == 0 && size2 == 0) return kUpb_UnknownCompareResult_Equal;
if (size1 == 0 || size2 == 0) return kUpb_UnknownCompareResult_NotEqual;
if (memcmp(buf1, buf2, size1) == 0) return kUpb_UnknownCompareResult_Equal;
upb_UnknownField_Context ctx = {
.arena = upb_arena_new(),
.depth = max_depth,
.tmp = NULL,
.tmp_size = 0,
};
if (!ctx.arena) return kUpb_UnknownCompareResult_OutOfMemory;
int ret = UPB_SETJMP(ctx.err);
if (UPB_LIKELY(ret == 0)) {
// First build both unknown fields into a sorted data structure (similar
// to the UnknownFieldSet in C++).
upb_UnknownFields *uf1 = upb_UnknownFields_Build(&ctx, buf1, size1);
upb_UnknownFields *uf2 = upb_UnknownFields_Build(&ctx, buf2, size2);
// Now perform the equality check on the sorted structures.
if (upb_UnknownFields_IsEqual(uf1, uf2)) {
ret = kUpb_UnknownCompareResult_Equal;
} else {
ret = kUpb_UnknownCompareResult_NotEqual;
}
}
upb_arena_free(ctx.arena);
free(ctx.tmp);
return ret;
}

@ -0,0 +1,66 @@
/*
* Copyright (c) 2009-2021, Google LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Google LLC nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef UPB_UTIL_COMPARE_H_
#define UPB_UTIL_COMPARE_H_
#include "upb/def.h"
#ifdef __cplusplus
extern "C" {
#endif
// Returns true if unknown fields from the two messages are equal when sorted
// and varints are made canonical.
//
// This function is discouraged, as the comparison is inherently lossy without
// schema data:
//
// 1. We don't know whether delimited fields are sub-messages. Unknown
// sub-messages will therefore not have their fields sorted and varints
// canonicalized.
// 2. We don't know about oneof/non-repeated fields, which should semantically
// discard every value except the last.
typedef enum {
kUpb_UnknownCompareResult_Equal = 0,
kUpb_UnknownCompareResult_NotEqual = 1,
kUpb_UnknownCompareResult_OutOfMemory = 2,
kUpb_UnknownCompareResult_MaxDepthExceeded = 3,
} upb_UnknownCompareResult;
upb_UnknownCompareResult upb_Message_UnknownFieldsAreEqual(const char *buf1,
size_t size1,
const char *buf2,
size_t size2,
int max_depth);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* UPB_UTIL_COMPARE_H_ */

@ -0,0 +1,239 @@
/*
* Copyright (c) 2009-2021, Google LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Google LLC nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "upb/util/compare.h"
#include "absl/strings/string_view.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include <stdint.h>
#include <vector>
#include <string_view>
struct UnknownField;
using UnknownFields = std::vector<UnknownField>;
enum class UnknownFieldType {
kVarint,
kLongVarint, // Over-encoded to have distinct wire format.
kDelimited,
kFixed64,
kFixed32,
kGroup,
};
union UnknownFieldValue {
uint64_t varint;
uint64_t fixed64;
uint32_t fixed32;
// NULL-terminated (strings must not have embedded NULL).
const char* delimited;
UnknownFields* group;
};
struct TypeAndValue {
UnknownFieldType type;
UnknownFieldValue value;
};
struct UnknownField {
uint32_t field_number;
TypeAndValue value;
};
TypeAndValue Varint(uint64_t val) {
TypeAndValue ret{UnknownFieldType::kVarint};
ret.value.varint = val;
return ret;
}
TypeAndValue LongVarint(uint64_t val) {
TypeAndValue ret{UnknownFieldType::kLongVarint};
ret.value.varint = val;
return ret;
}
TypeAndValue Fixed64(uint64_t val) {
TypeAndValue ret{UnknownFieldType::kFixed64};
ret.value.fixed64 = val;
return ret;
}
TypeAndValue Fixed32(uint32_t val) {
TypeAndValue ret{UnknownFieldType::kFixed32};
ret.value.fixed32 = val;
return ret;
}
TypeAndValue Delimited(const char* val) {
TypeAndValue ret{UnknownFieldType::kDelimited};
ret.value.delimited = val;
return ret;
}
TypeAndValue Group(UnknownFields nested) {
TypeAndValue ret{UnknownFieldType::kGroup};
ret.value.group = &nested;
return ret;
}
void EncodeVarint(uint64_t val, std::string* str) {
do {
char byte = val & 0x7fU;
val >>= 7;
if (val) byte |= 0x80U;
str->push_back(byte);
} while (val);
}
std::string ToBinaryPayload(const UnknownFields& fields) {
static const upb_wiretype_t wire_types[] = {
UPB_WIRE_TYPE_VARINT,
UPB_WIRE_TYPE_VARINT,
UPB_WIRE_TYPE_DELIMITED,
UPB_WIRE_TYPE_64BIT,
UPB_WIRE_TYPE_32BIT,
UPB_WIRE_TYPE_START_GROUP,
};
std::string ret;
for (const auto& field : fields) {
uint32_t tag = field.field_number << 3 |
(wire_types[static_cast<int>(field.value.type)]);
EncodeVarint(tag, &ret);
switch (field.value.type) {
case UnknownFieldType::kVarint:
EncodeVarint(field.value.value.varint, &ret);
break;
case UnknownFieldType::kLongVarint:
EncodeVarint(field.value.value.varint, &ret);
ret.back() |= 0x80;
ret.push_back(0);
break;
case UnknownFieldType::kDelimited:
EncodeVarint(strlen(field.value.value.delimited), &ret);
ret.append(field.value.value.delimited);
break;
case UnknownFieldType::kFixed64: {
uint64_t val = _upb_be_swap64(field.value.value.fixed64);
ret.append(reinterpret_cast<const char*>(&val), sizeof(val));
break;
}
case UnknownFieldType::kFixed32: {
uint32_t val = _upb_be_swap32(field.value.value.fixed32);
ret.append(reinterpret_cast<const char*>(&val), sizeof(val));
break;
}
case UnknownFieldType::kGroup: {
uint32_t end_tag = field.field_number << 3 | UPB_WIRE_TYPE_END_GROUP;
ret.append(ToBinaryPayload(*field.value.value.group));
EncodeVarint(end_tag, &ret);
break;
}
}
}
return ret;
}
upb_UnknownCompareResult CompareUnknownWithMaxDepth(UnknownFields uf1,
UnknownFields uf2,
int max_depth) {
std::string buf1 = ToBinaryPayload(uf1);
std::string buf2 = ToBinaryPayload(uf2);
return upb_Message_UnknownFieldsAreEqual(buf1.data(), buf1.size(),
buf2.data(), buf2.size(), max_depth);
}
upb_UnknownCompareResult CompareUnknown(UnknownFields uf1, UnknownFields uf2) {
return CompareUnknownWithMaxDepth(uf1, uf2, 64);
}
TEST(CompareTest, UnknownFieldsReflexive) {
EXPECT_EQ(kUpb_UnknownCompareResult_Equal, CompareUnknown({}, {}));
EXPECT_EQ(kUpb_UnknownCompareResult_Equal,
CompareUnknown({{1, Varint(123)}, {2, Fixed32(456)}},
{{1, Varint(123)}, {2, Fixed32(456)}}));
EXPECT_EQ(
kUpb_UnknownCompareResult_Equal,
CompareUnknown(
{{1, Group({{2, Group({{3, Fixed32(456)}, {4, Fixed64(123)}})}})}},
{{1, Group({{2, Group({{3, Fixed32(456)}, {4, Fixed64(123)}})}})}}));
}
TEST(CompareTest, UnknownFieldsOrdering) {
EXPECT_EQ(kUpb_UnknownCompareResult_Equal,
CompareUnknown({{1, Varint(111)},
{2, Delimited("ABC")},
{3, Fixed32(456)},
{4, Fixed64(123)},
{5, Group({})}},
{{5, Group({})},
{4, Fixed64(123)},
{3, Fixed32(456)},
{2, Delimited("ABC")},
{1, Varint(111)}}));
EXPECT_EQ(kUpb_UnknownCompareResult_NotEqual,
CompareUnknown({{1, Varint(111)},
{2, Delimited("ABC")},
{3, Fixed32(456)},
{4, Fixed64(123)},
{5, Group({})}},
{{5, Group({})},
{4, Fixed64(123)},
{3, Fixed32(455)}, // Small difference.
{2, Delimited("ABC")},
{1, Varint(111)}}));
EXPECT_EQ(kUpb_UnknownCompareResult_Equal,
CompareUnknown({{3, Fixed32(456)}, {4, Fixed64(123)}},
{{4, Fixed64(123)}, {3, Fixed32(456)}}));
EXPECT_EQ(
kUpb_UnknownCompareResult_Equal,
CompareUnknown(
{{1, Group({{2, Group({{3, Fixed32(456)}, {4, Fixed64(123)}})}})}},
{{1, Group({{2, Group({{4, Fixed64(123)}, {3, Fixed32(456)}})}})}}));
}
TEST(CompareTest, LongVarint) {
EXPECT_EQ(kUpb_UnknownCompareResult_Equal,
CompareUnknown({{1, LongVarint(123)}, {2, LongVarint(456)}},
{{1, Varint(123)}, {2, Varint(456)}}));
EXPECT_EQ(kUpb_UnknownCompareResult_Equal,
CompareUnknown({{2, LongVarint(456)}, {1, LongVarint(123)}},
{{1, Varint(123)}, {2, Varint(456)}}));
}
TEST(CompareTest, MaxDepth) {
EXPECT_EQ(
kUpb_UnknownCompareResult_MaxDepthExceeded,
CompareUnknownWithMaxDepth(
{{1, Group({{2, Group({{3, Fixed32(456)}, {4, Fixed64(123)}})}})}},
{{1, Group({{2, Group({{4, Fixed64(123)}, {3, Fixed32(456)}})}})}},
2));
}
Loading…
Cancel
Save