Protocol Buffers - Google's data interchange format (grpc依赖)
https://developers.google.com/protocol-buffers/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
431 lines
15 KiB
431 lines
15 KiB
// Protocol Buffers - Google's data interchange format |
|
// Copyright 2023 Google LLC. All rights reserved. |
|
// |
|
// Use of this source code is governed by a BSD-style |
|
// license that can be found in the LICENSE file or at |
|
// https://developers.google.com/open-source/licenses/bsd |
|
|
|
#include "python/convert.h" |
|
|
|
#include "python/message.h" |
|
#include "python/protobuf.h" |
|
#include "upb/message/map.h" |
|
#include "upb/reflection/message.h" |
|
#include "upb/util/compare.h" |
|
#include "utf8_range.h" |
|
|
|
// Must be last. |
|
#include "upb/port/def.inc" |
|
|
|
PyObject* PyUpb_UpbToPy(upb_MessageValue val, const upb_FieldDef* f, |
|
PyObject* arena) { |
|
switch (upb_FieldDef_CType(f)) { |
|
case kUpb_CType_Enum: |
|
case kUpb_CType_Int32: |
|
return PyLong_FromLong(val.int32_val); |
|
case kUpb_CType_Int64: |
|
return PyLong_FromLongLong(val.int64_val); |
|
case kUpb_CType_UInt32: |
|
return PyLong_FromSize_t(val.uint32_val); |
|
case kUpb_CType_UInt64: |
|
return PyLong_FromUnsignedLongLong(val.uint64_val); |
|
case kUpb_CType_Float: |
|
return PyFloat_FromDouble(val.float_val); |
|
case kUpb_CType_Double: |
|
return PyFloat_FromDouble(val.double_val); |
|
case kUpb_CType_Bool: |
|
return PyBool_FromLong(val.bool_val); |
|
case kUpb_CType_Bytes: |
|
return PyBytes_FromStringAndSize(val.str_val.data, val.str_val.size); |
|
case kUpb_CType_String: { |
|
PyObject* ret = |
|
PyUnicode_DecodeUTF8(val.str_val.data, val.str_val.size, NULL); |
|
// If the string can't be decoded in UTF-8, just return a bytes object |
|
// that contains the raw bytes. This can't happen if the value was |
|
// assigned using the members of the Python message object, but can happen |
|
// if the values were parsed from the wire (binary). |
|
if (ret == NULL) { |
|
PyErr_Clear(); |
|
ret = PyBytes_FromStringAndSize(val.str_val.data, val.str_val.size); |
|
} |
|
return ret; |
|
} |
|
case kUpb_CType_Message: |
|
return PyUpb_Message_Get((upb_Message*)val.msg_val, |
|
upb_FieldDef_MessageSubDef(f), arena); |
|
default: |
|
PyErr_Format(PyExc_SystemError, |
|
"Getting a value from a field of unknown type %d", |
|
upb_FieldDef_CType(f)); |
|
return NULL; |
|
} |
|
} |
|
|
|
static bool PyUpb_GetInt64(PyObject* obj, int64_t* val) { |
|
// We require that the value is either an integer or has an __index__ |
|
// conversion. |
|
obj = PyNumber_Index(obj); |
|
if (!obj) return false; |
|
// If the value is already a Python long, PyLong_AsLongLong() retrieves it. |
|
// Otherwise is converts to integer using __int__. |
|
*val = PyLong_AsLongLong(obj); |
|
bool ok = true; |
|
if (PyErr_Occurred()) { |
|
assert(PyErr_ExceptionMatches(PyExc_OverflowError)); |
|
PyErr_Clear(); |
|
PyErr_Format(PyExc_ValueError, "Value out of range: %S", obj); |
|
ok = false; |
|
} |
|
Py_DECREF(obj); |
|
return ok; |
|
} |
|
|
|
static bool PyUpb_GetUint64(PyObject* obj, uint64_t* val) { |
|
// We require that the value is either an integer or has an __index__ |
|
// conversion. |
|
obj = PyNumber_Index(obj); |
|
if (!obj) return false; |
|
*val = PyLong_AsUnsignedLongLong(obj); |
|
bool ok = true; |
|
if (PyErr_Occurred()) { |
|
assert(PyErr_ExceptionMatches(PyExc_OverflowError)); |
|
PyErr_Clear(); |
|
PyErr_Format(PyExc_ValueError, "Value out of range: %S", obj); |
|
ok = false; |
|
} |
|
Py_DECREF(obj); |
|
return ok; |
|
} |
|
|
|
static bool PyUpb_GetInt32(PyObject* obj, int32_t* val) { |
|
int64_t i64; |
|
if (!PyUpb_GetInt64(obj, &i64)) return false; |
|
if (i64 < INT32_MIN || i64 > INT32_MAX) { |
|
PyErr_Format(PyExc_ValueError, "Value out of range: %S", obj); |
|
return false; |
|
} |
|
*val = i64; |
|
return true; |
|
} |
|
|
|
static bool PyUpb_GetUint32(PyObject* obj, uint32_t* val) { |
|
uint64_t u64; |
|
if (!PyUpb_GetUint64(obj, &u64)) return false; |
|
if (u64 > UINT32_MAX) { |
|
PyErr_Format(PyExc_ValueError, "Value out of range: %S", obj); |
|
return false; |
|
} |
|
*val = u64; |
|
return true; |
|
} |
|
|
|
// If `arena` is specified, copies the string data into the given arena. |
|
// Otherwise aliases the given data. |
|
static upb_MessageValue PyUpb_MaybeCopyString(const char* ptr, size_t size, |
|
upb_Arena* arena) { |
|
upb_MessageValue ret; |
|
ret.str_val.size = size; |
|
if (arena) { |
|
char* buf = upb_Arena_Malloc(arena, size); |
|
memcpy(buf, ptr, size); |
|
ret.str_val.data = buf; |
|
} else { |
|
ret.str_val.data = ptr; |
|
} |
|
return ret; |
|
} |
|
|
|
const char* upb_FieldDef_TypeString(const upb_FieldDef* f) { |
|
switch (upb_FieldDef_CType(f)) { |
|
case kUpb_CType_Double: |
|
return "double"; |
|
case kUpb_CType_Float: |
|
return "float"; |
|
case kUpb_CType_Int64: |
|
return "int64"; |
|
case kUpb_CType_Int32: |
|
return "int32"; |
|
case kUpb_CType_UInt64: |
|
return "uint64"; |
|
case kUpb_CType_UInt32: |
|
return "uint32"; |
|
case kUpb_CType_Enum: |
|
return "enum"; |
|
case kUpb_CType_Bool: |
|
return "bool"; |
|
case kUpb_CType_String: |
|
return "string"; |
|
case kUpb_CType_Bytes: |
|
return "bytes"; |
|
case kUpb_CType_Message: |
|
return "message"; |
|
} |
|
UPB_UNREACHABLE(); |
|
} |
|
|
|
static bool PyUpb_PyToUpbEnum(PyObject* obj, const upb_EnumDef* e, |
|
upb_MessageValue* val) { |
|
if (PyUnicode_Check(obj)) { |
|
Py_ssize_t size; |
|
const char* name = PyUnicode_AsUTF8AndSize(obj, &size); |
|
const upb_EnumValueDef* ev = |
|
upb_EnumDef_FindValueByNameWithSize(e, name, size); |
|
if (!ev) { |
|
PyErr_Format(PyExc_ValueError, "unknown enum label \"%s\"", name); |
|
return false; |
|
} |
|
val->int32_val = upb_EnumValueDef_Number(ev); |
|
return true; |
|
} else { |
|
int32_t i32; |
|
if (!PyUpb_GetInt32(obj, &i32)) return false; |
|
if (upb_FileDef_Syntax(upb_EnumDef_File(e)) == kUpb_Syntax_Proto2 && |
|
!upb_EnumDef_CheckNumber(e, i32)) { |
|
PyErr_Format(PyExc_ValueError, "invalid enumerator %d", (int)i32); |
|
return false; |
|
} |
|
val->int32_val = i32; |
|
return true; |
|
} |
|
} |
|
|
|
bool PyUpb_IsNumpyNdarray(PyObject* obj, const upb_FieldDef* f) { |
|
PyObject* type_name_obj = |
|
PyObject_GetAttrString((PyObject*)Py_TYPE(obj), "__name__"); |
|
bool is_ndarray = false; |
|
if (!strcmp(PyUpb_GetStrData(type_name_obj), "ndarray")) { |
|
PyErr_Format(PyExc_TypeError, |
|
"%S has type ndarray, but expected one of: %s", obj, |
|
upb_FieldDef_TypeString(f)); |
|
is_ndarray = true; |
|
} |
|
Py_DECREF(type_name_obj); |
|
return is_ndarray; |
|
} |
|
|
|
bool PyUpb_PyToUpb(PyObject* obj, const upb_FieldDef* f, upb_MessageValue* val, |
|
upb_Arena* arena) { |
|
switch (upb_FieldDef_CType(f)) { |
|
case kUpb_CType_Enum: |
|
return PyUpb_PyToUpbEnum(obj, upb_FieldDef_EnumSubDef(f), val); |
|
case kUpb_CType_Int32: |
|
return PyUpb_GetInt32(obj, &val->int32_val); |
|
case kUpb_CType_Int64: |
|
return PyUpb_GetInt64(obj, &val->int64_val); |
|
case kUpb_CType_UInt32: |
|
return PyUpb_GetUint32(obj, &val->uint32_val); |
|
case kUpb_CType_UInt64: |
|
return PyUpb_GetUint64(obj, &val->uint64_val); |
|
case kUpb_CType_Float: |
|
if (PyUpb_IsNumpyNdarray(obj, f)) return false; |
|
val->float_val = PyFloat_AsDouble(obj); |
|
return !PyErr_Occurred(); |
|
case kUpb_CType_Double: |
|
if (PyUpb_IsNumpyNdarray(obj, f)) return false; |
|
val->double_val = PyFloat_AsDouble(obj); |
|
return !PyErr_Occurred(); |
|
case kUpb_CType_Bool: |
|
if (PyUpb_IsNumpyNdarray(obj, f)) return false; |
|
val->bool_val = PyLong_AsLong(obj); |
|
return !PyErr_Occurred(); |
|
case kUpb_CType_Bytes: { |
|
char* ptr; |
|
Py_ssize_t size; |
|
if (PyBytes_AsStringAndSize(obj, &ptr, &size) < 0) return false; |
|
*val = PyUpb_MaybeCopyString(ptr, size, arena); |
|
return true; |
|
} |
|
case kUpb_CType_String: { |
|
Py_ssize_t size; |
|
if (PyBytes_Check(obj)) { |
|
// Use the object's bytes if they are valid UTF-8. |
|
char* ptr; |
|
if (PyBytes_AsStringAndSize(obj, &ptr, &size) < 0) return false; |
|
if (!utf8_range_IsValid(ptr, size)) { |
|
// Invalid UTF-8. Try to convert the message to a Python Unicode |
|
// object, even though we know this will fail, just to get the |
|
// idiomatic Python error message. |
|
obj = PyUnicode_FromEncodedObject(obj, "utf-8", NULL); |
|
assert(!obj); |
|
return false; |
|
} |
|
*val = PyUpb_MaybeCopyString(ptr, size, arena); |
|
return true; |
|
} else { |
|
const char* ptr; |
|
ptr = PyUnicode_AsUTF8AndSize(obj, &size); |
|
if (PyErr_Occurred()) return false; |
|
*val = PyUpb_MaybeCopyString(ptr, size, arena); |
|
return true; |
|
} |
|
} |
|
case kUpb_CType_Message: |
|
PyErr_Format(PyExc_ValueError, "Message objects may not be assigned"); |
|
return false; |
|
default: |
|
PyErr_Format(PyExc_SystemError, |
|
"Getting a value from a field of unknown type %d", |
|
upb_FieldDef_CType(f)); |
|
return false; |
|
} |
|
} |
|
|
|
bool upb_Message_IsEqual(const upb_Message* msg1, const upb_Message* msg2, |
|
const upb_MessageDef* m); |
|
|
|
// ----------------------------------------------------------------------------- |
|
// Equal |
|
// ----------------------------------------------------------------------------- |
|
|
|
bool PyUpb_ValueEq(upb_MessageValue val1, upb_MessageValue val2, |
|
const upb_FieldDef* f) { |
|
switch (upb_FieldDef_CType(f)) { |
|
case kUpb_CType_Bool: |
|
return val1.bool_val == val2.bool_val; |
|
case kUpb_CType_Int32: |
|
case kUpb_CType_UInt32: |
|
case kUpb_CType_Enum: |
|
return val1.int32_val == val2.int32_val; |
|
case kUpb_CType_Int64: |
|
case kUpb_CType_UInt64: |
|
return val1.int64_val == val2.int64_val; |
|
case kUpb_CType_Float: |
|
return val1.float_val == val2.float_val; |
|
case kUpb_CType_Double: |
|
return val1.double_val == val2.double_val; |
|
case kUpb_CType_String: |
|
case kUpb_CType_Bytes: |
|
return val1.str_val.size == val2.str_val.size && |
|
memcmp(val1.str_val.data, val2.str_val.data, val1.str_val.size) == |
|
0; |
|
case kUpb_CType_Message: |
|
return upb_Message_IsEqual(val1.msg_val, val2.msg_val, |
|
upb_FieldDef_MessageSubDef(f)); |
|
default: |
|
return false; |
|
} |
|
} |
|
|
|
bool PyUpb_Map_IsEqual(const upb_Map* map1, const upb_Map* map2, |
|
const upb_FieldDef* f) { |
|
assert(upb_FieldDef_IsMap(f)); |
|
if (map1 == map2) return true; |
|
|
|
size_t size1 = map1 ? upb_Map_Size(map1) : 0; |
|
size_t size2 = map2 ? upb_Map_Size(map2) : 0; |
|
if (size1 != size2) return false; |
|
if (size1 == 0) return true; |
|
|
|
const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(f); |
|
const upb_FieldDef* val_f = upb_MessageDef_Field(entry_m, 1); |
|
size_t iter = kUpb_Map_Begin; |
|
|
|
upb_MessageValue key, val1; |
|
while (upb_Map_Next(map1, &key, &val1, &iter)) { |
|
upb_MessageValue val2; |
|
if (!upb_Map_Get(map2, key, &val2)) return false; |
|
if (!PyUpb_ValueEq(val1, val2, val_f)) return false; |
|
} |
|
|
|
return true; |
|
} |
|
|
|
static bool PyUpb_ArrayElem_IsEqual(const upb_Array* arr1, |
|
const upb_Array* arr2, size_t i, |
|
const upb_FieldDef* f) { |
|
assert(i < upb_Array_Size(arr1)); |
|
assert(i < upb_Array_Size(arr2)); |
|
upb_MessageValue val1 = upb_Array_Get(arr1, i); |
|
upb_MessageValue val2 = upb_Array_Get(arr2, i); |
|
return PyUpb_ValueEq(val1, val2, f); |
|
} |
|
|
|
bool PyUpb_Array_IsEqual(const upb_Array* arr1, const upb_Array* arr2, |
|
const upb_FieldDef* f) { |
|
assert(upb_FieldDef_IsRepeated(f) && !upb_FieldDef_IsMap(f)); |
|
if (arr1 == arr2) return true; |
|
|
|
size_t n1 = arr1 ? upb_Array_Size(arr1) : 0; |
|
size_t n2 = arr2 ? upb_Array_Size(arr2) : 0; |
|
if (n1 != n2) return false; |
|
|
|
// Half the length rounded down. Important: the empty list rounds to 0. |
|
size_t half = n1 / 2; |
|
|
|
// Search from the ends-in. We expect differences to more quickly manifest |
|
// at the ends than in the middle. If the length is odd we will miss the |
|
// middle element. |
|
for (size_t i = 0; i < half; i++) { |
|
if (!PyUpb_ArrayElem_IsEqual(arr1, arr2, i, f)) return false; |
|
if (!PyUpb_ArrayElem_IsEqual(arr1, arr2, n1 - 1 - i, f)) return false; |
|
} |
|
|
|
// For an odd-lengthed list, pick up the middle element. |
|
if (n1 & 1) { |
|
if (!PyUpb_ArrayElem_IsEqual(arr1, arr2, half, f)) return false; |
|
} |
|
|
|
return true; |
|
} |
|
|
|
bool upb_Message_IsEqual(const upb_Message* msg1, const upb_Message* msg2, |
|
const upb_MessageDef* m) { |
|
if (msg1 == msg2) return true; |
|
if (upb_Message_ExtensionCount(msg1) != upb_Message_ExtensionCount(msg2)) |
|
return false; |
|
|
|
// Compare messages field-by-field. This is slightly tricky, because while |
|
// we can iterate over normal fields in a predictable order, the extension |
|
// order is unpredictable and may be different between msg1 and msg2. |
|
// So we use the following strategy: |
|
// 1. Iterate over all msg1 fields (including extensions). |
|
// 2. For non-extension fields, we find the corresponding field by simply |
|
// using upb_Message_Next(msg2). If the two messages have the same set |
|
// of fields, this will yield the same field. |
|
// 3. For extension fields, we have to actually search for the corresponding |
|
// field, which we do with upb_Message_GetFieldByDef(msg2, ext_f1). |
|
// 4. Once iteration over msg1 is complete, we call upb_Message_Next(msg2) |
|
// one |
|
// final time to verify that we have visited all of msg2's regular fields |
|
// (we pass NULL for ext_dict so that iteration will *not* return |
|
// extensions). |
|
// |
|
// We don't need to visit all of msg2's extensions, because we verified up |
|
// front that both messages have the same number of extensions. |
|
const upb_DefPool* symtab = upb_FileDef_Pool(upb_MessageDef_File(m)); |
|
const upb_FieldDef *f1, *f2; |
|
upb_MessageValue val1, val2; |
|
size_t iter1 = kUpb_Message_Begin; |
|
size_t iter2 = kUpb_Message_Begin; |
|
while (upb_Message_Next(msg1, m, symtab, &f1, &val1, &iter1)) { |
|
if (upb_FieldDef_IsExtension(f1)) { |
|
val2 = upb_Message_GetFieldByDef(msg2, f1); |
|
} else { |
|
if (!upb_Message_Next(msg2, m, NULL, &f2, &val2, &iter2) || f1 != f2) { |
|
return false; |
|
} |
|
} |
|
|
|
if (upb_FieldDef_IsMap(f1)) { |
|
if (!PyUpb_Map_IsEqual(val1.map_val, val2.map_val, f1)) return false; |
|
} else if (upb_FieldDef_IsRepeated(f1)) { |
|
if (!PyUpb_Array_IsEqual(val1.array_val, val2.array_val, f1)) { |
|
return false; |
|
} |
|
} else { |
|
if (!PyUpb_ValueEq(val1, val2, f1)) return false; |
|
} |
|
} |
|
|
|
if (upb_Message_Next(msg2, m, NULL, &f2, &val2, &iter2)) return false; |
|
|
|
size_t usize1, usize2; |
|
const char* uf1 = upb_Message_GetUnknown(msg1, &usize1); |
|
const char* uf2 = upb_Message_GetUnknown(msg2, &usize2); |
|
// 100 is arbitrary, we're trying to prevent stack overflow but it's not |
|
// obvious how deep we should allow here. |
|
return upb_Message_UnknownFieldsAreEqual(uf1, usize1, uf2, usize2, 100) == |
|
kUpb_UnknownCompareResult_Equal; |
|
} |
|
|
|
#include "upb/port/undef.inc"
|
|
|