From e32d0948e7b2799037be0e4364816249119acc3f Mon Sep 17 00:00:00 2001 From: Protobuf Team Bot Date: Mon, 6 Nov 2023 06:43:06 -0800 Subject: [PATCH] Properly untrack Python GC objects during deallocation. Add PyObject_GC_UnTrack() in deallocation functions for Python types that have PyTPFLAGS_HAVE_GC set, either explicitly or by inheriting from a type with GC set. Not untracking before clearing instance data introduces potential race conditions (if GC happens to run between the partial clearing and the actual deallocation) and produces a warning under Python 3.11. (The warning then triggered an assertion failure, which only showed up when building in Py_DEBUG mode; this therefor also fixes that assertion failure.) PiperOrigin-RevId: 579827001 --- python/descriptor.c | 29 ++++++++++++++++++++++++----- python/descriptor_pool.c | 1 + python/message.c | 12 +++++++++++- 3 files changed, 36 insertions(+), 6 deletions(-) diff --git a/python/descriptor.c b/python/descriptor.c index 023e943605..42e4bfea1e 100644 --- a/python/descriptor.c +++ b/python/descriptor.c @@ -197,10 +197,25 @@ static PyObject* PyUpb_DescriptorBase_CopyToProto(PyObject* _self, } static void PyUpb_DescriptorBase_Dealloc(PyUpb_DescriptorBase* base) { + // This deallocator can be called on different types (which, despite + // 'Base' in the name of one of them, do not inherit from each other). + // Some of these types are GC types (they have Py_TPFLAGS_HAVE_GC set), + // which means Python's GC can visit them (via tp_visit and/or tp_clear + // methods) at any time. This also means we *must* stop GC from tracking + // instances of them before we start destructing the object. In Python + // 3.11, failing to do so would raise a runtime warning. + if (PyType_HasFeature(Py_TYPE(base), Py_TPFLAGS_HAVE_GC)) { + PyObject_GC_UnTrack(base); + } PyUpb_ObjCache_Delete(base->def); - Py_XDECREF(base->message_meta); - Py_DECREF(base->pool); - Py_XDECREF(base->options); + // In addition to being visited by GC, instances can also (potentially) be + // accessed whenever arbitrary code is executed. Destructors can execute + // arbitrary code, so any struct members we DECREF should be set to NULL + // or a new value *before* calling Py_DECREF on them. The Py_CLEAR macro + // (and Py_SETREF in Python 3.8+) takes care to do this safely. + Py_CLEAR(base->message_meta); + Py_CLEAR(base->pool); + Py_CLEAR(base->options); PyUpb_Dealloc(base); } @@ -256,9 +271,13 @@ err: void PyUpb_Descriptor_SetClass(PyObject* py_descriptor, PyObject* meta) { PyUpb_DescriptorBase* base = (PyUpb_DescriptorBase*)py_descriptor; - Py_XDECREF(base->message_meta); - base->message_meta = meta; Py_INCREF(meta); + // Py_SETREF replaces strong references without an intermediate invalid + // object state, which code executed by base->message_meta's destructor + // might see, but it's Python 3.8+. + PyObject* tmp = base->message_meta; + base->message_meta = meta; + Py_XDECREF(tmp); } // The LookupNested*() functions provide name lookup for entities nested inside diff --git a/python/descriptor_pool.c b/python/descriptor_pool.c index ee4167754c..00612f8bdc 100644 --- a/python/descriptor_pool.c +++ b/python/descriptor_pool.c @@ -99,6 +99,7 @@ PyObject* PyUpb_DescriptorPool_Get(const upb_DefPool* symtab) { } static void PyUpb_DescriptorPool_Dealloc(PyUpb_DescriptorPool* self) { + PyObject_GC_UnTrack(self); PyUpb_DescriptorPool_Clear(self); upb_DefPool_Free(self->symtab); PyUpb_ObjCache_Delete(self->symtab); diff --git a/python/message.c b/python/message.c index 0777e7ed5f..3ed6514fe6 100644 --- a/python/message.c +++ b/python/message.c @@ -1850,7 +1850,15 @@ static PyObject* PyUpb_MessageMeta_New(PyTypeObject* type, PyObject* args, static void PyUpb_MessageMeta_Dealloc(PyObject* self) { PyUpb_MessageMeta* meta = PyUpb_GetMessageMeta(self); PyUpb_ObjCache_Delete(meta->layout); - Py_DECREF(meta->py_message_descriptor); + // The MessageMeta type is a GC type, which means we should untrack the + // object before invalidating internal state (so that code executed by the + // GC doesn't see the invalid state). Unfortunately since we're calling + // cpython_bits.type_dealloc, which also untracks the object, we can't. + // Instead just make sure the internal state remains reasonable by using + // Py_CLEAR(), which sets the struct member to NULL. The tp_traverse and + // tp_clear methods, which are called by Python's GC, already allow for it + // to be NULL. + Py_CLEAR(meta->py_message_descriptor); PyTypeObject* tp = Py_TYPE(self); cpython_bits.type_dealloc(self); Py_DECREF(tp); @@ -1948,6 +1956,8 @@ static int PyUpb_MessageMeta_Traverse(PyObject* self, visitproc visit, } static int PyUpb_MessageMeta_Clear(PyObject* self, visitproc visit, void* arg) { + PyUpb_MessageMeta* meta = PyUpb_GetMessageMeta(self); + Py_CLEAR(meta->py_message_descriptor); return cpython_bits.type_clear(self); }