From f96b569eb3792de1a4743144c528034f44ceff4e Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Thu, 7 Jul 2022 06:02:14 -0700 Subject: [PATCH] Fixes a few bugs in the descriptor mapping containers. 1. `for x in mapping` now yields keys rather than values, to match Python conventions and the behavior of the old library. 2. Lookup operations now correctly reject unhashable types as map keys. 3. We implement `repr()` to use the same format as `dict` PiperOrigin-RevId: 459491734 --- python/descriptor_containers.c | 166 ++++++++++++++---- .../pb_unit_tests/descriptor_test_wrapper.py | 4 + python/protobuf.h | 3 +- 3 files changed, 133 insertions(+), 40 deletions(-) diff --git a/python/descriptor_containers.c b/python/descriptor_containers.c index ac7158520a..0f2d4ff023 100644 --- a/python/descriptor_containers.c +++ b/python/descriptor_containers.c @@ -25,41 +25,54 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include "descriptor_containers.h" +#include "python/descriptor_containers.h" -#include "descriptor.h" -#include "protobuf.h" +#include "python/descriptor.h" +#include "python/protobuf.h" #include "upb/def.h" +// Implements __repr__ as str(dict(self)). +static PyObject* PyUpb_DescriptorMap_Repr(PyObject* _self) { + PyObject* dict = PyDict_New(); + PyObject* ret = NULL; + if (!dict) goto err; + if (PyDict_Merge(dict, _self, 1) != 0) goto err; + ret = PyObject_Str(dict); + +err: + Py_XDECREF(dict); + return ret; +} + // ----------------------------------------------------------------------------- -// DescriptorIterator +// ByNameIterator // ----------------------------------------------------------------------------- typedef struct { PyObject_HEAD; - const PyUpb_GenericSequence_Funcs* funcs; + const PyUpb_ByNameMap_Funcs* funcs; const void* parent; // upb_MessageDef*, upb_DefPool*, etc. PyObject* parent_obj; // Python object that keeps parent alive, we own a ref. int index; // Current iterator index. -} PyUpb_DescriptorIterator; +} PyUpb_ByNameIterator; -PyUpb_DescriptorIterator* PyUpb_DescriptorIterator_Self(PyObject* obj) { - assert(Py_TYPE(obj) == PyUpb_ModuleState_Get()->descriptor_iterator_type); - return (PyUpb_DescriptorIterator*)obj; +static PyUpb_ByNameIterator* PyUpb_ByNameIterator_Self(PyObject* obj) { + assert(Py_TYPE(obj) == PyUpb_ModuleState_Get()->by_name_iterator_type); + return (PyUpb_ByNameIterator*)obj; } -static void PyUpb_DescriptorIterator_Dealloc(PyObject* _self) { - PyUpb_DescriptorIterator* self = PyUpb_DescriptorIterator_Self(_self); +static void PyUpb_ByNameIterator_Dealloc(PyObject* _self) { + PyUpb_ByNameIterator* self = PyUpb_ByNameIterator_Self(_self); Py_DECREF(self->parent_obj); PyUpb_Dealloc(self); } -PyObject* PyUpb_DescriptorIterator_New(const PyUpb_GenericSequence_Funcs* funcs, - const void* parent, - PyObject* parent_obj) { +static PyObject* PyUpb_ByNameIterator_New(const PyUpb_ByNameMap_Funcs* funcs, + const void* parent, + PyObject* parent_obj) { PyUpb_ModuleState* s = PyUpb_ModuleState_Get(); - PyUpb_DescriptorIterator* iter = - (void*)PyType_GenericAlloc(s->descriptor_iterator_type, 0); + PyUpb_ByNameIterator* iter = + (void*)PyType_GenericAlloc(s->by_name_iterator_type, 0); iter->funcs = funcs; iter->parent = parent; iter->parent_obj = parent_obj; @@ -68,27 +81,87 @@ PyObject* PyUpb_DescriptorIterator_New(const PyUpb_GenericSequence_Funcs* funcs, return &iter->ob_base; } -PyObject* PyUpb_DescriptorIterator_IterNext(PyObject* _self) { - PyUpb_DescriptorIterator* self = PyUpb_DescriptorIterator_Self(_self); - int size = self->funcs->get_elem_count(self->parent); +static PyObject* PyUpb_ByNameIterator_IterNext(PyObject* _self) { + PyUpb_ByNameIterator* self = PyUpb_ByNameIterator_Self(_self); + int size = self->funcs->base.get_elem_count(self->parent); if (self->index >= size) return NULL; - const void* elem = self->funcs->index(self->parent, self->index); + const void* elem = self->funcs->base.index(self->parent, self->index); self->index++; - return self->funcs->get_elem_wrapper(elem); + return PyUnicode_FromString(self->funcs->get_elem_name(elem)); } -static PyType_Slot PyUpb_DescriptorIterator_Slots[] = { - {Py_tp_dealloc, PyUpb_DescriptorIterator_Dealloc}, +static PyType_Slot PyUpb_ByNameIterator_Slots[] = { + {Py_tp_dealloc, PyUpb_ByNameIterator_Dealloc}, {Py_tp_iter, PyObject_SelfIter}, - {Py_tp_iternext, PyUpb_DescriptorIterator_IterNext}, + {Py_tp_iternext, PyUpb_ByNameIterator_IterNext}, {0, NULL}}; -static PyType_Spec PyUpb_DescriptorIterator_Spec = { - PYUPB_MODULE_NAME ".DescriptorIterator", // tp_name - sizeof(PyUpb_DescriptorIterator), // tp_basicsize - 0, // tp_itemsize - Py_TPFLAGS_DEFAULT, // tp_flags - PyUpb_DescriptorIterator_Slots, +static PyType_Spec PyUpb_ByNameIterator_Spec = { + PYUPB_MODULE_NAME "._ByNameIterator", // tp_name + sizeof(PyUpb_ByNameIterator), // tp_basicsize + 0, // tp_itemsize + Py_TPFLAGS_DEFAULT, // tp_flags + PyUpb_ByNameIterator_Slots, +}; + +// ----------------------------------------------------------------------------- +// ByNumberIterator +// ----------------------------------------------------------------------------- + +typedef struct { + PyObject_HEAD; + const PyUpb_ByNumberMap_Funcs* funcs; + const void* parent; // upb_MessageDef*, upb_DefPool*, etc. + PyObject* parent_obj; // Python object that keeps parent alive, we own a ref. + int index; // Current iterator index. +} PyUpb_ByNumberIterator; + +static PyUpb_ByNumberIterator* PyUpb_ByNumberIterator_Self(PyObject* obj) { + assert(Py_TYPE(obj) == PyUpb_ModuleState_Get()->by_number_iterator_type); + return (PyUpb_ByNumberIterator*)obj; +} + +static void PyUpb_ByNumberIterator_Dealloc(PyObject* _self) { + PyUpb_ByNumberIterator* self = PyUpb_ByNumberIterator_Self(_self); + Py_DECREF(self->parent_obj); + PyUpb_Dealloc(self); +} + +static PyObject* PyUpb_ByNumberIterator_New( + const PyUpb_ByNumberMap_Funcs* funcs, const void* parent, + PyObject* parent_obj) { + PyUpb_ModuleState* s = PyUpb_ModuleState_Get(); + PyUpb_ByNumberIterator* iter = + (void*)PyType_GenericAlloc(s->by_number_iterator_type, 0); + iter->funcs = funcs; + iter->parent = parent; + iter->parent_obj = parent_obj; + iter->index = 0; + Py_INCREF(iter->parent_obj); + return &iter->ob_base; +} + +static PyObject* PyUpb_ByNumberIterator_IterNext(PyObject* _self) { + PyUpb_ByNumberIterator* self = PyUpb_ByNumberIterator_Self(_self); + int size = self->funcs->base.get_elem_count(self->parent); + if (self->index >= size) return NULL; + const void* elem = self->funcs->base.index(self->parent, self->index); + self->index++; + return PyLong_FromLong(self->funcs->get_elem_num(elem)); +} + +static PyType_Slot PyUpb_ByNumberIterator_Slots[] = { + {Py_tp_dealloc, PyUpb_ByNumberIterator_Dealloc}, + {Py_tp_iter, PyObject_SelfIter}, + {Py_tp_iternext, PyUpb_ByNumberIterator_IterNext}, + {0, NULL}}; + +static PyType_Spec PyUpb_ByNumberIterator_Spec = { + PYUPB_MODULE_NAME "._ByNumberIterator", // tp_name + sizeof(PyUpb_ByNumberIterator), // tp_basicsize + 0, // tp_itemsize + Py_TPFLAGS_DEFAULT, // tp_flags + PyUpb_ByNumberIterator_Slots, }; // ----------------------------------------------------------------------------- @@ -306,6 +379,8 @@ static PyObject* PyUpb_ByNameMap_Subscript(PyObject* _self, PyObject* key) { const char* name = PyUpb_GetStrData(key); const void* elem = name ? self->funcs->lookup(self->parent, name) : NULL; + if (!name && PyObject_Hash(key) == -1) return NULL; + if (elem) { return self->funcs->base.get_elem_wrapper(elem); } else { @@ -325,6 +400,7 @@ static int PyUpb_ByNameMap_Contains(PyObject* _self, PyObject* key) { PyUpb_ByNameMap* self = PyUpb_ByNameMap_Self(_self); const char* name = PyUpb_GetStrData(key); const void* elem = name ? self->funcs->lookup(self->parent, name) : NULL; + if (!name && PyObject_Hash(key) == -1) return -1; return elem ? 1 : 0; } @@ -339,6 +415,8 @@ static PyObject* PyUpb_ByNameMap_Get(PyObject* _self, PyObject* args) { const char* name = PyUpb_GetStrData(key); const void* elem = name ? self->funcs->lookup(self->parent, name) : NULL; + if (!name && PyObject_Hash(key) == -1) return NULL; + if (elem) { return self->funcs->base.get_elem_wrapper(elem); } else { @@ -349,8 +427,7 @@ static PyObject* PyUpb_ByNameMap_Get(PyObject* _self, PyObject* args) { static PyObject* PyUpb_ByNameMap_GetIter(PyObject* _self) { PyUpb_ByNameMap* self = PyUpb_ByNameMap_Self(_self); - return PyUpb_DescriptorIterator_New(&self->funcs->base, self->parent, - self->parent_obj); + return PyUpb_ByNameIterator_New(self->funcs, self->parent, self->parent_obj); } static PyObject* PyUpb_ByNameMap_Keys(PyObject* _self, PyObject* args) { @@ -460,6 +537,7 @@ static PyType_Slot PyUpb_ByNameMap_Slots[] = { {Py_tp_dealloc, &PyUpb_ByNameMap_Dealloc}, {Py_tp_iter, PyUpb_ByNameMap_GetIter}, {Py_tp_methods, &PyUpb_ByNameMap_Methods}, + {Py_tp_repr, &PyUpb_DescriptorMap_Repr}, {Py_tp_richcompare, &PyUpb_ByNameMap_RichCompare}, {0, NULL}, }; @@ -515,6 +593,8 @@ static const void* PyUpb_ByNumberMap_LookupHelper(PyUpb_ByNumberMap* self, long num = PyLong_AsLong(key); if (num == -1 && PyErr_Occurred()) { PyErr_Clear(); + // Ensure that the key is hashable (this will raise an error if not). + PyObject_Hash(key); return NULL; } else { return self->funcs->lookup(self->parent, num); @@ -527,7 +607,9 @@ static PyObject* PyUpb_ByNumberMap_Subscript(PyObject* _self, PyObject* key) { if (elem) { return self->funcs->base.get_elem_wrapper(elem); } else { - PyErr_SetObject(PyExc_KeyError, key); + if (!PyErr_Occurred()) { + PyErr_SetObject(PyExc_KeyError, key); + } return NULL; } } @@ -550,6 +632,8 @@ static PyObject* PyUpb_ByNumberMap_Get(PyObject* _self, PyObject* args) { const void* elem = PyUpb_ByNumberMap_LookupHelper(self, key); if (elem) { return self->funcs->base.get_elem_wrapper(elem); + } else if (PyErr_Occurred()) { + return NULL; } else { return PyUpb_NewRef(default_value); } @@ -557,8 +641,8 @@ static PyObject* PyUpb_ByNumberMap_Get(PyObject* _self, PyObject* args) { static PyObject* PyUpb_ByNumberMap_GetIter(PyObject* _self) { PyUpb_ByNumberMap* self = PyUpb_ByNumberMap_Self(_self); - return PyUpb_DescriptorIterator_New(&self->funcs->base, self->parent, - self->parent_obj); + return PyUpb_ByNumberIterator_New(self->funcs, self->parent, + self->parent_obj); } static PyObject* PyUpb_ByNumberMap_Keys(PyObject* _self, PyObject* args) { @@ -626,7 +710,9 @@ error: static int PyUpb_ByNumberMap_Contains(PyObject* _self, PyObject* key) { PyUpb_ByNumberMap* self = PyUpb_ByNumberMap_Self(_self); const void* elem = PyUpb_ByNumberMap_LookupHelper(self, key); - return elem ? 1 : 0; + if (elem) return 1; + if (PyErr_Occurred()) return -1; + return 0; } // A mapping container can only be equal to another mapping container, or (for @@ -674,6 +760,7 @@ static PyType_Slot PyUpb_ByNumberMap_Slots[] = { {Py_tp_dealloc, &PyUpb_ByNumberMap_Dealloc}, {Py_tp_iter, PyUpb_ByNumberMap_GetIter}, {Py_tp_methods, &PyUpb_ByNumberMap_Methods}, + {Py_tp_repr, &PyUpb_DescriptorMap_Repr}, {Py_tp_richcompare, &PyUpb_ByNumberMap_RichCompare}, {0, NULL}, }; @@ -695,10 +782,11 @@ bool PyUpb_InitDescriptorContainers(PyObject* m) { s->by_name_map_type = PyUpb_AddClass(m, &PyUpb_ByNameMap_Spec); s->by_number_map_type = PyUpb_AddClass(m, &PyUpb_ByNumberMap_Spec); - s->descriptor_iterator_type = - PyUpb_AddClass(m, &PyUpb_DescriptorIterator_Spec); + s->by_name_iterator_type = PyUpb_AddClass(m, &PyUpb_ByNameIterator_Spec); + s->by_number_iterator_type = PyUpb_AddClass(m, &PyUpb_ByNumberIterator_Spec); s->generic_sequence_type = PyUpb_AddClass(m, &PyUpb_GenericSequence_Spec); return s->by_name_map_type && s->by_number_map_type && - s->descriptor_iterator_type && s->generic_sequence_type; + s->by_name_iterator_type && s->by_number_iterator_type && + s->generic_sequence_type; } diff --git a/python/pb_unit_tests/descriptor_test_wrapper.py b/python/pb_unit_tests/descriptor_test_wrapper.py index 06f01a69ca..f4cd3e75b4 100644 --- a/python/pb_unit_tests/descriptor_test_wrapper.py +++ b/python/pb_unit_tests/descriptor_test_wrapper.py @@ -26,10 +26,14 @@ from google.protobuf.internal.descriptor_test import * import unittest +# begin:github_only # Our behavior here matches pure-Python, which does not allow # foo.enum_values_by_name.get([]). We reject it because we return a true # dict (like pure Python), which does not allow hashing by a list. GeneratedDescriptorTest.testDescriptor.__unittest_expecting_failure__ = True +GeneratedDescriptorTest.testCppDescriptorContainer.__unittest_expecting_failure__ = True +GeneratedDescriptorTest.testServiceDescriptor.__unittest_expecting_failure__ = True +# end:github_only # These fail because they attempt to add fields with conflicting JSON names. # We don't want to support this going forward. diff --git a/python/protobuf.h b/python/protobuf.h index 2169a60e47..b96691a459 100644 --- a/python/protobuf.h +++ b/python/protobuf.h @@ -67,8 +67,9 @@ typedef struct { // From descriptor_containers.c PyTypeObject* by_name_map_type; + PyTypeObject* by_name_iterator_type; PyTypeObject* by_number_map_type; - PyTypeObject* descriptor_iterator_type; + PyTypeObject* by_number_iterator_type; PyTypeObject* generic_sequence_type; // From descriptor_pool.c