diff --git a/python/message.c b/python/message.c index b0df69857e..d6c55deedf 100644 --- a/python/message.c +++ b/python/message.c @@ -847,9 +847,12 @@ __attribute__((flatten)) static PyObject* PyUpb_CMessage_GetAttr( PyObject* ret = PyObject_GenericGetAttr(_self, attr); if (ret) return ret; - // If the attribute wasn't found, look for attributes on the class. But if a - // different kind of error (other than AttributeError) was found, return that. - if (PyErr_ExceptionMatches(PyExc_AttributeError)) { + // Swallow AttributeError if it occurred and try again on the metaclass + // to pick up class attributes. But we have to special-case "Extensions" + // which affirmatively returns AttributeError when a message is not + // extendable. + if (PyErr_ExceptionMatches(PyExc_AttributeError) && + strcmp(PyUpb_GetStrData(attr), "Extensions") != 0) { PyErr_Clear(); return PyUpb_MessageMeta_GetAttr((PyObject*)Py_TYPE(_self), attr); } @@ -986,11 +989,13 @@ PyObject* PyUpb_CMessage_MergeFromString(PyObject* _self, PyObject* arg) { const upb_extreg* extreg = upb_symtab_extreg(upb_filedef_symtab(file)); const upb_msglayout* layout = upb_msgdef_layout(msgdef); upb_arena* arena = PyUpb_Arena_Get(self->arena); + PyUpb_ModuleState* state = PyUpb_ModuleState_Get(); + int options = + UPB_DECODE_MAXDEPTH(state->allow_oversize_protos ? UINT32_MAX : 100); upb_DecodeStatus status = - _upb_decode(buf, size, self->ptr.msg, layout, extreg, 0, arena); + _upb_decode(buf, size, self->ptr.msg, layout, extreg, options, arena); Py_XDECREF(bytes); if (status != kUpb_DecodeStatus_Ok) { - PyUpb_ModuleState* state = PyUpb_ModuleState_Get(); PyErr_Format(state->decode_error_class, "Error parsing message"); return NULL; } @@ -1180,9 +1185,10 @@ PyObject* PyUpb_CMessage_SerializeInternal(PyObject* _self, PyObject* args, const upb_msgdef* msgdef = _PyUpb_CMessage_GetMsgdef(self); const upb_msglayout* layout = upb_msgdef_layout(msgdef); size_t size = 0; - int options = 0; + int options = UPB_ENCODE_MAXDEPTH(UINT32_MAX); if (check_required) options |= UPB_ENCODE_CHECKREQUIRED; if (deterministic) options |= UPB_ENCODE_DETERMINISTIC; + // Python does not currently have any effective limit on serialization depth. char* pb = upb_encode_ex(self->ptr.msg, layout, options, arena, &size); PyObject* ret = NULL; @@ -1248,10 +1254,6 @@ static PyObject* PyUpb_CMessage_GetExtensionDict(PyObject* _self, static PyGetSetDef PyUpb_CMessage_Getters[] = { {"Extensions", PyUpb_CMessage_GetExtensionDict, NULL, "Extension dict"}, - /* - {"_extensions_by_name", (getter)GetExtensionsByName, NULL}, - {"_extensions_by_number", (getter)GetExtensionsByNumber, NULL}, - */ {NULL}}; static PyMethodDef PyUpb_CMessage_Methods[] = { diff --git a/python/minimal_test.py b/python/minimal_test.py index 26d6553c84..01eef6edc1 100644 --- a/python/minimal_test.py +++ b/python/minimal_test.py @@ -32,6 +32,9 @@ from google.protobuf.pyext import _message from google.protobuf.internal import api_implementation from google.protobuf import unittest_pb2 from google.protobuf import descriptor_pool +from google.protobuf import text_format +from google.protobuf import message_factory +from google.protobuf import message from google.protobuf.internal import factory_test1_pb2 from google.protobuf.internal import factory_test2_pb2 from google.protobuf import descriptor_pb2 @@ -72,9 +75,35 @@ class TestMessageExtension(unittest.TestCase): test_slice(11, 3, -2) test_slice(11, 3, -3) test_slice(10, 25, 4) + + def testExtensionsErrors(self): + msg = unittest_pb2.TestAllTypes() + self.assertRaises(AttributeError, getattr, msg, 'Extensions') #TestMessageExtension.test_descriptor_pool.__unittest_expecting_failure__ = True +class OversizeProtosTest(unittest.TestCase): + def setUp(self): + msg = unittest_pb2.NestedTestAllTypes() + m = msg + for i in range(101): + m = m.child + m.Clear() + self.p_serialized = msg.SerializeToString() + + def testAssertOversizeProto(self): + from google.protobuf.pyext._message import SetAllowOversizeProtos + SetAllowOversizeProtos(False) + q = unittest_pb2.NestedTestAllTypes() + with self.assertRaises(message.DecodeError): + q.ParseFromString(self.p_serialized) + print(q) + + def testSucceedOversizeProto(self): + from google.protobuf.pyext._message import SetAllowOversizeProtos + SetAllowOversizeProtos(True) + q = unittest_pb2.NestedTestAllTypes() + q.ParseFromString(self.p_serialized) if __name__ == '__main__': unittest.main(verbosity=2) diff --git a/python/pb_unit_tests/message_test_wrapper.py b/python/pb_unit_tests/message_test_wrapper.py index 993f8e0e3d..d73997b81e 100644 --- a/python/pb_unit_tests/message_test_wrapper.py +++ b/python/pb_unit_tests/message_test_wrapper.py @@ -26,28 +26,29 @@ from google.protobuf.internal import message_test import unittest -message_test.MessageTest.testBadUtf8String_proto2.__unittest_expecting_failure__ = True -message_test.MessageTest.testBadUtf8String_proto3.__unittest_expecting_failure__ = True +# We don't want to support extending repeated fields with nothing; this behavior +# is marked for deprecation in the existing library. message_test.MessageTest.testExtendFloatWithNothing_proto2.__unittest_expecting_failure__ = True message_test.MessageTest.testExtendFloatWithNothing_proto3.__unittest_expecting_failure__ = True message_test.MessageTest.testExtendInt32WithNothing_proto2.__unittest_expecting_failure__ = True message_test.MessageTest.testExtendInt32WithNothing_proto3.__unittest_expecting_failure__ = True message_test.MessageTest.testExtendStringWithNothing_proto2.__unittest_expecting_failure__ = True message_test.MessageTest.testExtendStringWithNothing_proto3.__unittest_expecting_failure__ = True + +# Our float printing suffers from not having dtoa(). message_test.MessageTest.testFloatPrinting_proto2.__unittest_expecting_failure__ = True message_test.MessageTest.testFloatPrinting_proto3.__unittest_expecting_failure__ = True message_test.MessageTest.testHighPrecisionDoublePrinting_proto2.__unittest_expecting_failure__ = True message_test.MessageTest.testHighPrecisionDoublePrinting_proto3.__unittest_expecting_failure__ = True -message_test.MessageTest.testInsertRepeatedCompositeField_proto2.__unittest_expecting_failure__ = True -message_test.MessageTest.testInsertRepeatedCompositeField_proto3.__unittest_expecting_failure__ = True -message_test.Proto2Test.testExtensionsErrors.__unittest_expecting_failure__ = True -message_test.Proto2Test.testPythonicInit.__unittest_expecting_failure__ = True -message_test.Proto2Test.test_documentation.__unittest_expecting_failure__ = True + +# For these tests we are throwing the correct error, only the text of the error +# message is a mismatch. For technical reasons around the limited API, matching +# the existing error message exactly is not feasible. message_test.Proto3Test.testCopyFromBadType.__unittest_expecting_failure__ = True -message_test.Proto3Test.testMapDeterministicSerialization.__unittest_expecting_failure__ = True message_test.Proto3Test.testMergeFromBadType.__unittest_expecting_failure__ = True -message_test.OversizeProtosTest.testAssertOversizeProto.__unittest_expecting_failure__ = True -message_test.OversizeProtosTest.testSucceedOversizeProto.__unittest_expecting_failure__ = True + +message_test.Proto2Test.testPythonicInit.__unittest_expecting_failure__ = True +message_test.Proto2Test.test_documentation.__unittest_expecting_failure__ = True if __name__ == '__main__': unittest.main(module=message_test, verbosity=2) diff --git a/python/pb_unit_tests/well_known_types_test_wrapper.py b/python/pb_unit_tests/well_known_types_test_wrapper.py index 22a35d5534..6ded8d4525 100644 --- a/python/pb_unit_tests/well_known_types_test_wrapper.py +++ b/python/pb_unit_tests/well_known_types_test_wrapper.py @@ -26,7 +26,5 @@ from google.protobuf.internal import well_known_types_test import unittest -well_known_types_test.AnyTest.testPackDeterministic.__unittest_expecting_failure__ = True - if __name__ == '__main__': unittest.main(module=well_known_types_test, verbosity=2) diff --git a/python/protobuf.c b/python/protobuf.c index c57728114c..8eb49a46e4 100644 --- a/python/protobuf.c +++ b/python/protobuf.c @@ -40,11 +40,28 @@ static void PyUpb_ModuleDealloc(void *module) { PyUpb_WeakMap_Free(s->obj_cache); } +PyObject* PyUpb_SetAllowOversizeProtos(PyObject* m, PyObject* arg) { + if (!arg || !PyBool_Check(arg)) { + PyErr_SetString(PyExc_TypeError, + "Argument to SetAllowOversizeProtos must be boolean"); + return NULL; + } + PyUpb_ModuleState* state = PyUpb_ModuleState_Get(); + state->allow_oversize_protos = PyObject_IsTrue(arg); + Py_INCREF(arg); + return arg; +} + +static PyMethodDef PyUpb_ModuleMethods[] = { + {"SetAllowOversizeProtos", PyUpb_SetAllowOversizeProtos, METH_O, + "Enable/disable oversize proto parsing."}, + {NULL, NULL}}; + static struct PyModuleDef module_def = {PyModuleDef_HEAD_INIT, PYUPB_MODULE_NAME, "Protobuf Module", sizeof(PyUpb_ModuleState), - NULL, // m_methods + PyUpb_ModuleMethods, // m_methods NULL, // m_slots NULL, // m_traverse NULL, // m_clear @@ -302,6 +319,7 @@ PyMODINIT_FUNC PyInit__message(void) { PyUpb_ModuleState *state = PyUpb_ModuleState_GetFromModule(m); + state->allow_oversize_protos = false; state->wkt_bases = NULL; state->obj_cache = PyUpb_WeakMap_New(); diff --git a/python/protobuf.h b/python/protobuf.h index fa9785a15c..75c8e019a1 100644 --- a/python/protobuf.h +++ b/python/protobuf.h @@ -83,6 +83,7 @@ typedef struct { PyTypeObject *message_meta_type; // From protobuf.c + bool allow_oversize_protos; PyObject *wkt_bases; PyTypeObject *arena_type; PyUpb_WeakMap *obj_cache; diff --git a/upb/msg.c b/upb/msg.c index 3555cd26a3..ee7fe4d2fa 100644 --- a/upb/msg.c +++ b/upb/msg.c @@ -252,34 +252,36 @@ static void _upb_mapsorter_getkeys(const void *_a, const void *_b, void *a_key, _upb_map_fromkey(b_tabkey, b_key, size); } +#define UPB_COMPARE_INTEGERS(a, b) ((a) < (b) ? -1 : ((a) == (b) ? 0 : 1)) + static int _upb_mapsorter_cmpi64(const void *_a, const void *_b) { int64_t a, b; _upb_mapsorter_getkeys(_a, _b, &a, &b, 8); - return a - b; + return UPB_COMPARE_INTEGERS(a, b); } static int _upb_mapsorter_cmpu64(const void *_a, const void *_b) { uint64_t a, b; _upb_mapsorter_getkeys(_a, _b, &a, &b, 8); - return a - b; + return UPB_COMPARE_INTEGERS(a, b); } static int _upb_mapsorter_cmpi32(const void *_a, const void *_b) { int32_t a, b; _upb_mapsorter_getkeys(_a, _b, &a, &b, 4); - return a - b; + return UPB_COMPARE_INTEGERS(a, b); } static int _upb_mapsorter_cmpu32(const void *_a, const void *_b) { uint32_t a, b; _upb_mapsorter_getkeys(_a, _b, &a, &b, 4); - return a - b; + return UPB_COMPARE_INTEGERS(a, b); } static int _upb_mapsorter_cmpbool(const void *_a, const void *_b) { bool a, b; _upb_mapsorter_getkeys(_a, _b, &a, &b, 1); - return a - b; + return UPB_COMPARE_INTEGERS(a, b); } static int _upb_mapsorter_cmpstr(const void *_a, const void *_b) { @@ -287,10 +289,12 @@ static int _upb_mapsorter_cmpstr(const void *_a, const void *_b) { _upb_mapsorter_getkeys(_a, _b, &a, &b, UPB_MAPTYPE_STRING); size_t common_size = UPB_MIN(a.size, b.size); int cmp = memcmp(a.data, b.data, common_size); - if (cmp) return cmp; - return a.size - b.size; + if (cmp) return -cmp; + return UPB_COMPARE_INTEGERS(a.size, b.size); } +#undef UPB_COMPARE_INTEGERS + bool _upb_mapsorter_pushmap(_upb_mapsorter *s, upb_descriptortype_t key_type, const upb_map *map, _upb_sortedmap *sorted) { int map_size = _upb_map_size(map); diff --git a/upb/text_encode.c b/upb/text_encode.c index 12840c0fae..09d0f21f86 100644 --- a/upb/text_encode.c +++ b/upb/text_encode.c @@ -152,9 +152,24 @@ static void txtenc_string(txtenc *e, upb_strview str, bool bytes) { static void txtenc_field(txtenc *e, upb_msgval val, const upb_fielddef *f) { txtenc_indent(e); - txtenc_printf(e, "%s: ", upb_fielddef_name(f)); + upb_fieldtype_t type = upb_fielddef_type(f); + const char* name = upb_fielddef_name(f); - switch (upb_fielddef_type(f)) { + if (type == UPB_TYPE_MESSAGE) { + txtenc_printf(e, "%s {", name); + txtenc_endfield(e); + e->indent_depth++; + txtenc_msg(e, val.msg_val, upb_fielddef_msgsubdef(f)); + e->indent_depth--; + txtenc_indent(e); + txtenc_putstr(e, "}"); + txtenc_endfield(e); + return; + } + + txtenc_printf(e, "%s: ", name); + + switch (type) { case UPB_TYPE_BOOL: txtenc_putstr(e, val.bool_val ? "true" : "false"); break; @@ -185,15 +200,8 @@ static void txtenc_field(txtenc *e, upb_msgval val, const upb_fielddef *f) { case UPB_TYPE_ENUM: txtenc_enum(val.int32_val, f, e); break; - case UPB_TYPE_MESSAGE: - txtenc_putstr(e, "{"); - txtenc_endfield(e); - e->indent_depth++; - txtenc_msg(e, val.msg_val, upb_fielddef_msgsubdef(f)); - e->indent_depth--; - txtenc_indent(e); - txtenc_putstr(e, "}"); - break; + default: + UPB_UNREACHABLE(); } txtenc_endfield(e);