diff --git a/.bazelrc b/.bazelrc index b890fcd138..a9a31d2421 100644 --- a/.bazelrc +++ b/.bazelrc @@ -14,7 +14,7 @@ build:asan --copt=-fsanitize=address --linkopt=-fsanitize=address # know of an easy way to do that. # # We also have to disable pymalloc to avoid triggering Valgrind. -build:valgrind --run_under='valgrind --leak-check=full --track-origins=yes --trace-children=yes --show-possibly-lost=no --errors-for-leak-kinds=definite --error-exitcode=1' --action_env=PYTHONMALLOC=malloc +build:valgrind --run_under='valgrind --leak-check=full --track-origins=yes --trace-children=yes --show-leak-kinds=all --error-exitcode=1 --num-callers=500 ' --action_env=PYTHONMALLOC=malloc build:ubsan --copt=-fsanitize=undefined --linkopt=-fsanitize=undefined --action_env=UBSAN_OPTIONS=halt_on_error=1:print_stacktrace=1 # Workaround for the fact that Bazel links with $CC, not $CXX diff --git a/bazel/protobuf.patch b/bazel/protobuf.patch index 04ed074886..4b3e93813c 100644 --- a/bazel/protobuf.patch +++ b/bazel/protobuf.patch @@ -71,3 +71,19 @@ raise RuntimeError( 'Could not find golden files. This test must be run from within the ' 'protobuf source package so that it can read test data files from the ' + +--- python/google/protobuf/internal/testing_refleaks.py ++++ python/google/protobuf/internal/testing_refleaks.py +@@ -67,6 +67,12 @@ class ReferenceLeakCheckerMixin(object): + NB_RUNS = 3 + + def run(self, result=None): ++ testMethod = getattr(self, self._testMethodName) ++ expecting_failure_method = getattr(testMethod, "__unittest_expecting_failure__", False) ++ expecting_failure_class = getattr(self, "__unittest_expecting_failure__", False) ++ if expecting_failure_class or expecting_failure_method: ++ return ++ + # python_message.py registers all Message classes to some pickle global + # registry, which makes the classes immortal. + # We save a copy of this registry, and reset it before we could references. diff --git a/python/descriptor.c b/python/descriptor.c index 34fdddba87..704e47429c 100644 --- a/python/descriptor.c +++ b/python/descriptor.c @@ -169,7 +169,9 @@ static PyObject* PyUpb_DescriptorBase_CopyToProto(PyObject* _self, PyObject* serialized = PyUpb_DescriptorBase_GetSerializedProto(_self, func, layout); if (!serialized) return NULL; - return PyUpb_CMessage_MergeFromString(py_proto, serialized); + PyObject* ret = PyUpb_CMessage_MergeFromString(py_proto, serialized); + Py_DECREF(serialized); + return ret; } static void PyUpb_DescriptorBase_Dealloc(PyUpb_DescriptorBase* base) { diff --git a/python/descriptor_pool.c b/python/descriptor_pool.c index 95e14feaaa..1b0d3259fa 100644 --- a/python/descriptor_pool.c +++ b/python/descriptor_pool.c @@ -135,7 +135,10 @@ static bool PyUpb_DescriptorPool_TryLoadFileProto(PyUpb_DescriptorPool* self, return false; } if (proto == Py_None) return true; - return PyUpb_DescriptorPool_DoAdd((PyObject*)self, proto) != NULL; + PyObject* ret = PyUpb_DescriptorPool_DoAdd((PyObject*)self, proto); + bool ok = ret != NULL; + Py_XDECREF(ret); + return ok; } static bool PyUpb_DescriptorPool_TryLoadSymbol(PyUpb_DescriptorPool* self, @@ -246,13 +249,14 @@ done: static PyObject* PyUpb_DescriptorPool_DoAdd(PyObject* _self, PyObject* file_desc) { - PyObject* subargs = PyTuple_New(0); if (!PyUpb_CMessage_Check(file_desc)) return NULL; const upb_MessageDef* m = PyUpb_CMessage_GetMsgdef(file_desc); const char* file_proto_name = "google.protobuf.FileDescriptorProto"; if (strcmp(upb_MessageDef_FullName(m), file_proto_name) != 0) { return PyErr_Format(PyExc_TypeError, "Can only add FileDescriptorProto"); } + PyObject* subargs = PyTuple_New(0); + if (!subargs) return NULL; PyObject* serialized = PyUpb_CMessage_SerializeToString(file_desc, subargs, NULL); Py_DECREF(subargs); diff --git a/python/extension_dict.c b/python/extension_dict.c index 277da0e161..6139519858 100644 --- a/python/extension_dict.c +++ b/python/extension_dict.c @@ -184,7 +184,7 @@ static PyObject* PyUpb_ExtensionIterator_New(PyObject* _ext_dict) { static void PyUpb_ExtensionIterator_Dealloc(void* _self) { PyUpb_ExtensionIterator* self = (PyUpb_ExtensionIterator*)_self; - Py_DECREF(&self->msg); + Py_DECREF(self->msg); PyUpb_Dealloc(_self); } diff --git a/python/message.c b/python/message.c index 7ef2f9219b..e2ac314ab9 100644 --- a/python/message.c +++ b/python/message.c @@ -55,6 +55,7 @@ typedef struct { // For each member, we note the equivalent expression that we could use in the // full (non-limited) API. newfunc type_new; // PyTypeObject.tp_new + destructor type_dealloc; // PyTypeObject.tp_dealloc getattrofunc type_getattro; // PyTypeObject.tp_getattro setattrofunc type_setattro; // PyTypeObject.tp_setattro size_t type_basicsize; // sizeof(PyHeapTypeObject) @@ -69,6 +70,36 @@ typedef struct { // A global containing the values for this process. PyUpb_CPythonBits cpython_bits; +destructor upb_Pre310_PyType_GetDeallocSlot(PyTypeObject* type_subclass) { + // This is a bit desperate. We need type_dealloc(), but PyType_GetSlot(type, + // Py_tp_dealloc) will return subtype_dealloc(). There appears to be no way + // whatsoever to fetch type_dealloc() through the limited API until Python + // 3.10. + // + // To work around this so we attempt to find it by looking for the offset of + // tp_dealloc in PyTypeObject, then memcpy() it directly. This should always + // work in practice. + // + // Starting with Python 3.10 on you can call PyType_GetSlot() on non-heap + // types. We will be able to replace all this hack with just: + // + // PyType_GetSlot(&PyType_Type, Py_tp_dealloc) + // + destructor subtype_dealloc = PyType_GetSlot(type_subclass, Py_tp_dealloc); + for (size_t i = 0; i < 2000; i += sizeof(uintptr_t)) { + destructor maybe_subtype_dealloc; + memcpy(&maybe_subtype_dealloc, (char*)type_subclass + i, + sizeof(destructor)); + if (maybe_subtype_dealloc == subtype_dealloc) { + destructor type_dealloc; + memcpy(&type_dealloc, (char*)&PyType_Type + i, sizeof(destructor)); + return type_dealloc; + } + } + assert(false); + return NULL; +} + static bool PyUpb_CPythonBits_Init(PyUpb_CPythonBits* bits) { PyObject* bases = NULL; PyTypeObject* type = NULL; @@ -97,6 +128,7 @@ static bool PyUpb_CPythonBits_Init(PyUpb_CPythonBits* bits) { if (!type) goto err; bits->type_new = PyType_GetSlot(type, Py_tp_new); + bits->type_dealloc = upb_Pre310_PyType_GetDeallocSlot(type); bits->type_getattro = PyType_GetSlot(type, Py_tp_getattro); bits->type_setattro = PyType_GetSlot(type, Py_tp_setattro); @@ -105,10 +137,14 @@ static bool PyUpb_CPythonBits_Init(PyUpb_CPythonBits* bits) { bits->type_basicsize = PyLong_AsLong(size); if (bits->type_basicsize == -1) goto err; - assert(bits->type_new && bits->type_getattro && bits->type_setattro); + assert(bits->type_new); + assert(bits->type_dealloc); + assert(bits->type_getattro); + assert(bits->type_setattro); #ifndef Py_LIMITED_API assert(bits->type_new == PyType_Type.tp_new); + assert(bits->type_dealloc == PyType_Type.tp_dealloc); assert(bits->type_getattro == PyType_Type.tp_getattro); assert(bits->type_setattro == PyType_Type.tp_setattro); assert(bits->type_basicsize == sizeof(PyHeapTypeObject)); @@ -249,7 +285,12 @@ static bool PyUpb_CMessage_LookupName(PyUpb_CMessage* self, PyObject* py_name, } else if (PyBytes_Check(py_name)) { PyBytes_AsStringAndSize(py_name, (char**)&name, &size); } - if (!name) return NULL; + if (!name) { + PyErr_Format(exc_type, + "Expected a field name, but got non-string argument %S.", + py_name); + return false; + } const upb_MessageDef* msgdef = _PyUpb_CMessage_GetMsgdef(self); if (!upb_MessageDef_FindByNameWithSize(msgdef, name, size, f, o)) { @@ -344,11 +385,18 @@ static bool PyUpb_CMessage_InitMapAttribute(PyObject* _self, PyObject* name, static bool PyUpb_CMessage_InitRepeatedAttribute(PyObject* _self, PyObject* name, PyObject* value) { + bool ok = false; + PyObject* tmp = NULL; PyObject* repeated = PyUpb_CMessage_GetAttr(_self, name); - PyObject* tmp = PyUpb_RepeatedContainer_Extend(repeated, value); - if (!tmp) return false; - Py_DECREF(tmp); - return true; + if (!repeated) goto err; + tmp = PyUpb_RepeatedContainer_Extend(repeated, value); + if (!tmp) goto err; + ok = true; + +err: + Py_XDECREF(repeated); + Py_XDECREF(tmp); + return ok; } static bool PyUpb_CMessage_InitMessageAttribute(PyObject* _self, PyObject* name, @@ -956,7 +1004,6 @@ static PyObject* PyUpb_CMessage_IsInitialized(PyObject* _self, PyObject* args) { if (!PyArg_ParseTuple(args, "|O", &errors)) { return NULL; } - upb_msg* msg = PyUpb_CMessage_GetIfReified(_self); if (errors) { // We need to collect a list of unset required fields and append it to // `errors`. @@ -964,6 +1011,7 @@ static PyObject* PyUpb_CMessage_IsInitialized(PyObject* _self, PyObject* args) { } else { // We just need to return a boolean "true" or "false" for whether all // required fields are set. + upb_msg* msg = PyUpb_CMessage_GetIfReified(_self); const upb_MessageDef* m = PyUpb_CMessage_GetMsgdef(_self); const upb_DefPool* symtab = upb_FileDef_Pool(upb_MessageDef_File(m)); bool initialized = !upb_util_HasUnsetRequired(msg, m, symtab, NULL); @@ -982,7 +1030,7 @@ static PyObject* PyUpb_CMessage_ListFieldsItemKey(PyObject* self, static bool PyUpb_CMessage_SortFieldList(PyObject* list) { PyUpb_ModuleState* state = PyUpb_ModuleState_Get(); bool ok = false; - PyObject* args = PyList_New(0); + PyObject* args = PyTuple_New(0); PyObject* kwargs = PyDict_New(); PyObject* method = PyObject_GetAttrString(list, "sort"); PyObject* call_result = NULL; @@ -1262,7 +1310,9 @@ static PyObject* PyUpb_CMessage_FindInitializationErrors(PyObject* _self, need = upb_FieldPath_ToText(&fields, buf, size); assert(size > need); } - PyList_Append(ret, PyUnicode_FromString(buf)); + PyObject* str = PyUnicode_FromString(buf); + PyList_Append(ret, str); + Py_DECREF(str); } free(buf); } @@ -1579,9 +1629,10 @@ PyObject* PyUpb_MessageMeta_DoCreateClass(PyObject* py_descriptor, assert(!PyUpb_ObjCache_Get(upb_MessageDef_MiniTable(msgdef))); PyObject* slots = PyTuple_New(0); - if (PyDict_SetItemString(dict, "__slots__", slots) < 0) { - return NULL; - } + if (!slots) return NULL; + int status = PyDict_SetItemString(dict, "__slots__", slots); + Py_DECREF(slots); + if (status < 0) return NULL; // Bases are either: // (CMessage, Message) # for regular messages @@ -1607,7 +1658,7 @@ PyObject* PyUpb_MessageMeta_DoCreateClass(PyObject* py_descriptor, meta->layout = upb_MessageDef_MiniTable(msgdef); Py_INCREF(meta->py_message_descriptor); - PyUpb_ObjCache_Add(upb_MessageDef_MiniTable(msgdef), ret); + PyUpb_ObjCache_Add(meta->layout, ret); return ret; } @@ -1653,7 +1704,9 @@ static void PyUpb_MessageMeta_Dealloc(PyObject* self) { PyUpb_MessageMeta* meta = PyUpb_GetMessageMeta(self); PyUpb_ObjCache_Delete(meta->layout); Py_DECREF(meta->py_message_descriptor); - PyUpb_Dealloc(self); + PyTypeObject* tp = Py_TYPE(self); + cpython_bits.type_dealloc(self); + Py_DECREF(tp); } void PyUpb_MessageMeta_AddFieldNumber(PyObject* self, const upb_FieldDef* f) { diff --git a/python/pb_unit_tests/descriptor_pool_test_wrapper.py b/python/pb_unit_tests/descriptor_pool_test_wrapper.py index 3fd5d4e78c..f97001cffa 100644 --- a/python/pb_unit_tests/descriptor_pool_test_wrapper.py +++ b/python/pb_unit_tests/descriptor_pool_test_wrapper.py @@ -27,8 +27,6 @@ from google.protobuf.internal import descriptor_pool_test import unittest import copy -print(unittest) - descriptor_pool_test.AddDescriptorTest.testAddTypeError.__unittest_expecting_failure__ = True descriptor_pool_test.AddDescriptorTest.testEnum.__unittest_expecting_failure__ = True descriptor_pool_test.AddDescriptorTest.testFile.__unittest_expecting_failure__ = True diff --git a/python/protobuf.c b/python/protobuf.c index af0777e0a3..4debeb1342 100644 --- a/python/protobuf.c +++ b/python/protobuf.c @@ -335,7 +335,7 @@ PyMODINIT_FUNC PyInit__message(void) { // Temporary: an cookie we can use in the tests to ensure we are testing upb // and not another protobuf library on the system. - PyModule_AddObject(m, "_IS_UPB", Py_True); + PyModule_AddIntConstant(m, "_IS_UPB", 1); return m; } diff --git a/python/protobuf.h b/python/protobuf.h index 312a8773fa..da8ace7baa 100644 --- a/python/protobuf.h +++ b/python/protobuf.h @@ -192,11 +192,10 @@ PyObject* PyUpb_Forbidden_New(PyObject* cls, PyObject* args, PyObject* kwds); // function can work for any type. static inline void PyUpb_Dealloc(void* self) { PyTypeObject* tp = Py_TYPE(self); + assert(PyType_GetFlags(tp) & Py_TPFLAGS_HEAPTYPE); freefunc tp_free = PyType_GetSlot(tp, Py_tp_free); tp_free(self); - if (PyType_GetFlags(tp) & Py_TPFLAGS_HEAPTYPE) { - Py_DECREF(tp); - } + Py_DECREF(tp); } // Equivalent to the Py_NewRef() function introduced in Python 3.10. If/when we diff --git a/python/repeated.c b/python/repeated.c index 7faa888afb..c40bb1fc95 100644 --- a/python/repeated.c +++ b/python/repeated.c @@ -209,10 +209,12 @@ PyObject* PyUpb_RepeatedContainer_DeepCopy(PyObject* _self, PyObject* value) { clone->ptr.arr = upb_Array_New(PyUpb_Arena_Get(clone->arena), upb_FieldDef_CType(f)); PyUpb_ObjCache_Add(clone->ptr.arr, (PyObject*)clone); - if (!PyUpb_RepeatedContainer_MergeFrom((PyObject*)clone, _self)) { + PyObject* result = PyUpb_RepeatedContainer_MergeFrom((PyObject*)clone, _self); + if (!result) { Py_DECREF(clone); return NULL; - } + } + Py_DECREF(result); return (PyObject*)clone; }