From bca2688f3a205b6f20da26d0f2762e7e4585bfbf Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 9 Jan 2022 19:37:41 -0800 Subject: [PATCH] Implemented weird behavior when a field contains invalid UTF-8. --- python/convert.c | 15 +++++++++++++-- python/pb_unit_tests/reflection_test_wrapper.py | 1 - 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/python/convert.c b/python/convert.c index 0171af876d..b6a335effe 100644 --- a/python/convert.c +++ b/python/convert.c @@ -51,8 +51,19 @@ PyObject* PyUpb_UpbToPy(upb_msgval val, const upb_fielddef *f, PyObject *arena) return PyBool_FromLong(val.bool_val); case UPB_TYPE_BYTES: return PyBytes_FromStringAndSize(val.str_val.data, val.str_val.size); - case UPB_TYPE_STRING: - return PyUnicode_DecodeUTF8(val.str_val.data, val.str_val.size, NULL); + case UPB_TYPE_STRING: { + PyObject* ret = + PyUnicode_DecodeUTF8(val.str_val.data, val.str_val.size, NULL); + // If the string can't be decoded in UTF-8, just return a bytes object + // that contains the raw bytes. This can't happen if the value was + // assigned using the members of the Python message object, but can happen + // if the values were parsed from the wire (binary). + if (ret == NULL) { + PyErr_Clear(); + ret = PyBytes_FromStringAndSize(val.str_val.data, val.str_val.size); + } + return ret; + } case UPB_TYPE_MESSAGE: return PyUpb_CMessage_Get((upb_msg*)val.msg_val, upb_fielddef_msgsubdef(f), arena); diff --git a/python/pb_unit_tests/reflection_test_wrapper.py b/python/pb_unit_tests/reflection_test_wrapper.py index 866d344535..76e3091d7e 100644 --- a/python/pb_unit_tests/reflection_test_wrapper.py +++ b/python/pb_unit_tests/reflection_test_wrapper.py @@ -30,7 +30,6 @@ import unittest # reasonable to guarantee. reflection_test.Proto2ReflectionTest.testExtensionIter.__unittest_expecting_failure__ = True -reflection_test.Proto2ReflectionTest.testStringUTF8Serialization.__unittest_expecting_failure__ = True reflection_test.Proto2ReflectionTest.testTopLevelExtensionsForOptionalMessage.__unittest_expecting_failure__ = True reflection_test.Proto2ReflectionTest.testTopLevelExtensionsForRepeatedMessage.__unittest_expecting_failure__ = True reflection_test.Proto2ReflectionTest.testTopLevelExtensionsForRepeatedScalar.__unittest_expecting_failure__ = True