Implemented weird behavior when a field contains invalid UTF-8.

pull/13171/head
Joshua Haberman 3 years ago
parent 3b25e6fde3
commit bca2688f3a
  1. 15
      python/convert.c
  2. 1
      python/pb_unit_tests/reflection_test_wrapper.py

@ -51,8 +51,19 @@ PyObject* PyUpb_UpbToPy(upb_msgval val, const upb_fielddef *f, PyObject *arena)
return PyBool_FromLong(val.bool_val);
case UPB_TYPE_BYTES:
return PyBytes_FromStringAndSize(val.str_val.data, val.str_val.size);
case UPB_TYPE_STRING:
return PyUnicode_DecodeUTF8(val.str_val.data, val.str_val.size, NULL);
case UPB_TYPE_STRING: {
PyObject* ret =
PyUnicode_DecodeUTF8(val.str_val.data, val.str_val.size, NULL);
// If the string can't be decoded in UTF-8, just return a bytes object
// that contains the raw bytes. This can't happen if the value was
// assigned using the members of the Python message object, but can happen
// if the values were parsed from the wire (binary).
if (ret == NULL) {
PyErr_Clear();
ret = PyBytes_FromStringAndSize(val.str_val.data, val.str_val.size);
}
return ret;
}
case UPB_TYPE_MESSAGE:
return PyUpb_CMessage_Get((upb_msg*)val.msg_val,
upb_fielddef_msgsubdef(f), arena);

@ -30,7 +30,6 @@ import unittest
# reasonable to guarantee.
reflection_test.Proto2ReflectionTest.testExtensionIter.__unittest_expecting_failure__ = True
reflection_test.Proto2ReflectionTest.testStringUTF8Serialization.__unittest_expecting_failure__ = True
reflection_test.Proto2ReflectionTest.testTopLevelExtensionsForOptionalMessage.__unittest_expecting_failure__ = True
reflection_test.Proto2ReflectionTest.testTopLevelExtensionsForRepeatedMessage.__unittest_expecting_failure__ = True
reflection_test.Proto2ReflectionTest.testTopLevelExtensionsForRepeatedScalar.__unittest_expecting_failure__ = True

Loading…
Cancel
Save