From 4111d131727e2a4a194d92bb3958761b41f8c6c1 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 4 Dec 2021 18:42:53 -0800 Subject: [PATCH] WIP. --- python/BUILD | 4 + python/message.c | 1465 +++++++++++++++++++++++++++++++++++++++++++++ python/message.h | 60 ++ python/protobuf.c | 19 + python/protobuf.h | 60 +- 5 files changed, 1603 insertions(+), 5 deletions(-) create mode 100644 python/message.c create mode 100644 python/message.h diff --git a/python/BUILD b/python/BUILD index 7da6bf3670..102e9f0bee 100644 --- a/python/BUILD +++ b/python/BUILD @@ -43,6 +43,8 @@ cc_binary( "descriptor_containers.h", "descriptor_pool.c", "descriptor_pool.h", + "message.c", + "message.h", "protobuf.c", "protobuf.h", "python.h", @@ -63,9 +65,11 @@ cc_binary( deps = [ ":version_script.lds", "//:reflection", + "//:textformat", "//:upb", "//upb/util:compare", "//upb/util:def_to_proto", + "//upb/util:required_fields", "@system_python//:python_headers", ], ) diff --git a/python/message.c b/python/message.c new file mode 100644 index 0000000000..2db3f6a6f1 --- /dev/null +++ b/python/message.c @@ -0,0 +1,1465 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "python/message.h" + +#include "python/convert.h" +#include "python/descriptor.h" +#include "upb/def.h" +#include "upb/reflection.h" +#include "upb/text_encode.h" +#include "upb/util/required_fields.h" + +const upb_msgdef* PyUpb_MessageMeta_GetMsgdef(PyObject* cls); + +// ----------------------------------------------------------------------------- +// CPythonBits +// ----------------------------------------------------------------------------- + +// This struct contains a few things that are not exposed directly through the +// limited API, but that we can get at in somewhat more roundabout ways. The +// roundabout ways are slower, so we cache the values here. +// +// These values are valid to cache in a global, even across sub-interpreters, +// because they are not pointers to interpreter state. They are process +// globals that will be the same for any interpreter in this process. +typedef struct { + // For each member, we note the equivalent expression that we could use in the + // full (non-limited) API. + newfunc type_new; // PyTypeObject.tp_new + getattrofunc type_getattro; // PyTypeObject.tp_getattro + setattrofunc type_setattro; // PyTypeObject.tp_setattro + size_t type_basicsize; // sizeof(PyHeapTypeObject) + + // While we can refer to PY_VERSION_HEX in the limited API, this will give us + // the version of Python we were compiled against, which may be different + // than the version we are dynamically linked against. Here we want the + // version that is actually running in this process. + long python_version_hex; // PY_VERSION_HEX + + PyObject* descriptor; +} PyUpb_CPythonBits; + +// A global containing the values for this process. +PyUpb_CPythonBits cpython_bits; + +static bool PyUpb_CPythonBits_Init(PyUpb_CPythonBits* bits) { + // PyType_GetSlot() only works on heap types, so we cannot use it on + // &PyType_Type directly. Instead we create our own (temporary) type derived + // from PyType_Type: this will inherit all of the slots from PyType_Type, but + // as a heap type it can be queried with PyType_GetSlot(). + static PyType_Slot dummy_slots[] = {{0, NULL}}; + + static PyType_Spec dummy_spec = { + "module.DummyClass", // tp_name + 0, // To be filled in by size of base // tp_basicsize + 0, // tp_itemsize + Py_TPFLAGS_DEFAULT, // tp_flags + dummy_slots, + }; + + PyObject* bases = Py_BuildValue("(O)", &PyType_Type); + if (!bases) return false; + PyObject* type = PyType_FromSpecWithBases(&dummy_spec, bases); + if (!type) return false; + Py_DECREF(bases); + + bits->type_new = PyType_GetSlot((PyTypeObject*)type, Py_tp_new); + bits->type_getattro = PyType_GetSlot((PyTypeObject*)type, Py_tp_getattro); + bits->type_setattro = PyType_GetSlot((PyTypeObject*)type, Py_tp_setattro); + Py_DECREF(type); + + PyObject* size = + PyObject_GetAttrString((PyObject*)&PyType_Type, "__basicsize__"); + bits->type_basicsize = PyLong_AsLong(size); + Py_DECREF(size); + + assert(bits->type_new && bits->type_getattro && bits->type_setattro); + +#ifndef Py_LIMITED_API + assert(bits->type_new == PyType_Type.tp_new); + assert(bits->type_getattro == PyType_Type.tp_getattro); + assert(bits->type_setattro == PyType_Type.tp_setattro); + assert(bits->type_basicsize == sizeof(PyHeapTypeObject)); +#endif + + PyObject* sys = PyImport_ImportModule("sys"); + PyObject* hex_version = PyObject_GetAttrString(sys, "hexversion"); + bits->python_version_hex = PyLong_AsLong(hex_version); + Py_DECREF(hex_version); + Py_DECREF(sys); + + return true; +} + +// ----------------------------------------------------------------------------- +// CMessage +// ----------------------------------------------------------------------------- + +// The main message object. The type of the object (PyUpb_CMessage.ob_type) +// will be an instance of the PyUpb_MessageMeta type (defined below). So the +// chain is: +// FooMessage = MessageMeta(...) +// foo = FooMessage() +// +// Which becomes: +// Object C Struct Type Python type (ob_type) +// ----------------- ----------------- --------------------- +// foo PyUpb_CMessage FooMessage +// FooMessage PyUpb_MessageMeta message_meta_type +// message_meta_type PyTypeObject 'type' in Python +// +// A message object can be in one of two states: present or non-present. When +// a message is non-present, it stores a reference to its parent, and a write +// to any attribute will trigger the message to become present in its parent. +// The parent may also be non-present, in which case a mutation will trigger a +// chain reaction. +typedef struct PyUpb_CMessage { + PyObject_HEAD + PyObject* arena; + uintptr_t def; // Tagged, low bit 1 == upb_fielddef*, else upb_msgdef* + union { + // when def is msgdef, the data for this msg. + upb_msg* msg; + // when def is fielddef, owning pointer to parent + struct PyUpb_CMessage* parent; + }; + PyObject* ext_dict; // Weak pointer to extension dict, if any. + // name->obj dict for non-present msg/map/repeated, NULL if none. + PyUpb_WeakMap* unset_subobj_map; + int version; +} PyUpb_CMessage; + +static PyObject* PyUpb_CMessage_GetAttr(PyObject* _self, PyObject* attr); + +bool PyUpb_CMessage_IsUnset(PyUpb_CMessage* msg) { return msg->def & 1; } + +const upb_fielddef* PyUpb_CMessage_GetFieldDef(PyUpb_CMessage* msg) { + assert(PyUpb_CMessage_IsUnset(msg)); + return (void*)(msg->def & ~(uintptr_t)1); +} + +static const upb_msgdef* _PyUpb_CMessage_GetMsgdef(PyUpb_CMessage* msg) { + return PyUpb_CMessage_IsUnset(msg) + ? upb_fielddef_msgsubdef(PyUpb_CMessage_GetFieldDef(msg)) + : (void*)msg->def; +} + +const upb_msgdef* PyUpb_CMessage_GetMsgdef(PyObject* self) { + return _PyUpb_CMessage_GetMsgdef((PyUpb_CMessage*)self); +} + +static upb_msg* PyUpb_CMessage_GetMsg(PyUpb_CMessage* self) { + assert(!PyUpb_CMessage_IsUnset(self)); + return self->msg; +} + +bool PyUpb_CMessage_TryCheck(PyObject* self) { + PyUpb_ModuleState* state = PyUpb_ModuleState_Get(); + PyObject* type = (PyObject*)Py_TYPE(self); + return Py_TYPE(type) == state->message_meta_type; +} + +bool PyUpb_CMessage_Check(PyObject* self) { + if (!PyUpb_CMessage_TryCheck(self)) { + PyErr_Format(PyExc_TypeError, "Expected a message object, but got %R.", + self); + return false; + } + return true; +} + +upb_msg* PyUpb_CMessage_GetIfWritable(PyObject* _self) { + PyUpb_CMessage* self = (PyUpb_CMessage*)_self; + return PyUpb_CMessage_IsUnset(self) ? NULL : self->msg; +} + +static PyObject* PyUpb_CMessage_New(PyObject* cls, PyObject* unused_args, + PyObject* unused_kwargs) { + const upb_msgdef* msgdef = PyUpb_MessageMeta_GetMsgdef(cls); + PyUpb_CMessage* msg = (void*)PyType_GenericAlloc((PyTypeObject*)cls, 0); + msg->def = (uintptr_t)msgdef; + msg->arena = PyUpb_Arena_New(); + msg->msg = upb_msg_new(msgdef, PyUpb_Arena_Get(msg->arena)); + msg->unset_subobj_map = NULL; + msg->ext_dict = NULL; + msg->version = 0; + + PyObject* ret = &msg->ob_base; + PyUpb_ObjCache_Add(msg->msg, ret); + return ret; +} + +static bool PyUpb_CMessage_LookupName(PyUpb_CMessage* self, PyObject* py_name, + const upb_fielddef** f, + const upb_oneofdef** o, + PyObject* exc_type) { + Py_ssize_t size; + const char* name = PyUnicode_AsUTF8AndSize(py_name, &size); + if (!name) return NULL; + const upb_msgdef* msgdef = _PyUpb_CMessage_GetMsgdef(self); + + if (!upb_msgdef_lookupname(msgdef, name, size, f, o)) { + if (exc_type) { + PyErr_Format(exc_type, + "Protocol message %s has no field or oneof named %s.", + upb_msgdef_fullname(msgdef), name); + } + return false; + } else if (!o && !*f) { + if (exc_type) { + PyErr_Format(exc_type, "Expected a field name, but got oneof name %s.", + name); + } + return false; + } else if (!f && !*o) { + if (exc_type) { + PyErr_Format(exc_type, "Expected a oneof name, but got field name %s.", + name); + } + return false; + } + + return true; +} + +int PyUpb_CMessage_InitMapAttributes(PyObject* map, PyObject* value, + const upb_fielddef* f) { + const upb_msgdef* entry_m = upb_fielddef_msgsubdef(f); + const upb_fielddef* val_f = upb_msgdef_field(entry_m, 1); + if (upb_fielddef_issubmsg(val_f)) { + PyObject* iter = PyObject_GetIter(value); + if (iter == NULL) { + PyErr_Format(PyExc_TypeError, "Argument for field %s is not iterable", + upb_fielddef_fullname(f)); + return -1; + } + PyObject* item; + while ((item = PyIter_Next(iter)) != NULL) { + PyObject* src = PyObject_GetItem(value, item); + PyObject* dst = PyObject_GetItem(map, item); + Py_DECREF(item); + + if (!src || !dst) { + Py_XDECREF(src); + Py_XDECREF(dst); + Py_DECREF(iter); + return -1; + } + + PyObject* ok = PyObject_CallMethod(dst, "Clear", NULL); + assert(ok); + Py_DECREF(ok); + PyObject* ok2 = PyObject_CallMethod(dst, "MergeFrom", "O", src); + Py_DECREF(src); + Py_DECREF(dst); + Py_XDECREF(ok2); + if (!ok2) { + Py_DECREF(iter); + return -1; + } + } + Py_DECREF(iter); + } else { + PyObject* tmp = PyObject_CallMethod(map, "update", "O", value); + if (!tmp) return -1; + Py_DECREF(tmp); + } + return 0; +} + +void PyUpb_CMessage_AssureWritable(PyUpb_CMessage* self); + +int PyUpb_CMessage_InitAttributes(PyObject* _self, PyObject* args, + PyObject* kwargs) { + assert(!PyErr_Occurred()); + + if (args != NULL && PyTuple_Size(args) != 0) { + PyErr_SetString(PyExc_TypeError, "No positional arguments allowed"); + return -1; + } + + if (kwargs == NULL) return 0; + + PyUpb_CMessage* self = (PyUpb_CMessage*)_self; + Py_ssize_t pos = 0; + PyObject* name; + PyObject* value; + PyUpb_CMessage_AssureWritable(self); + upb_msg* msg = PyUpb_CMessage_GetMsg(self); + upb_arena* arena = PyUpb_Arena_Get(self->arena); + + while (PyDict_Next(kwargs, &pos, &name, &value)) { + assert(!PyErr_Occurred()); + const upb_fielddef* f; + assert(!PyErr_Occurred()); + if (!PyUpb_CMessage_LookupName(self, name, &f, NULL, PyExc_ValueError)) { + return -1; + } + + if (value == Py_None) continue; // Ignored. + + assert(!PyErr_Occurred()); + + if (upb_fielddef_ismap(f)) { + PyObject* map = PyUpb_CMessage_GetAttr(_self, name); + int ok = PyUpb_CMessage_InitMapAttributes(map, value, f); + Py_DECREF(map); + if (ok < 0) return -1; + } else if (upb_fielddef_isseq(f)) { + // TODO(haberman): disabled until repeated container is in. + // PyObject* repeated = PyUpb_CMessage_GetAttr(_self, name); + // PyObject* tmp = PyUpb_RepeatedContainer_Extend(repeated, value); + // if (!tmp) return -1; + // Py_DECREF(tmp); + PyErr_SetString(PyExc_NotImplementedError, "repeated init"); + return -1; + } else if (upb_fielddef_issubmsg(f)) { + PyObject* submsg = PyUpb_CMessage_GetAttr(_self, name); + if (!submsg) return -1; + assert(!PyErr_Occurred()); + bool ok; + if (PyUpb_CMessage_TryCheck(value)) { + PyObject* tmp = PyUpb_CMessage_MergeFrom(submsg, value); + ok = tmp != NULL; + Py_DECREF(tmp); + } else { + assert(!PyErr_Occurred()); + ok = PyUpb_CMessage_InitAttributes(submsg, NULL, value) >= 0; + } + Py_DECREF(submsg); + if (!ok) return -1; + } else { + upb_msgval msgval; + assert(!PyErr_Occurred()); + if (!PyUpb_PyToUpb(value, f, &msgval, arena)) { + PyErr_Clear(); + PyErr_Format(PyExc_ValueError, "Error initializing field %s", + upb_fielddef_fullname(f)); + return -1; + } + upb_msg_set(msg, f, msgval, arena); + } + if (PyErr_Occurred()) return -1; + } + + if (PyErr_Occurred()) return -1; + return 0; +} + +static int PyUpb_CMessage_Init(PyObject* _self, PyObject* args, + PyObject* kwargs) { + if (args != NULL && PyTuple_Size(args) != 0) { + PyErr_SetString(PyExc_TypeError, "No positional arguments allowed"); + return -1; + } + + return PyUpb_CMessage_InitAttributes(_self, args, kwargs); +} + +static PyObject* PyUpb_CMessage_NewUnset(PyObject* parent, + const upb_fielddef* f, + PyObject* arena) { + const upb_msgdef* sub_m = upb_fielddef_msgsubdef(f); + PyObject* cls = PyUpb_Descriptor_GetClass(sub_m); + + PyUpb_CMessage* msg = (void*)PyType_GenericAlloc((PyTypeObject*)cls, 0); + msg->def = (uintptr_t)f | 1; + msg->arena = arena; + msg->parent = (PyUpb_CMessage*)parent; + msg->unset_subobj_map = NULL; + msg->ext_dict = NULL; + msg->version = 0; + + Py_DECREF(cls); + Py_INCREF(parent); + Py_INCREF(arena); + PyObject* ret = &msg->ob_base; + return ret; +} + +static bool PyUpb_CMessage_IsEqual(PyUpb_CMessage* m1, PyObject* _m2) { + PyUpb_CMessage* m2 = (PyUpb_CMessage*)_m2; + if (m1 == m2) return true; + if (!PyObject_TypeCheck(_m2, m1->ob_base.ob_type)) { + return false; + } + const upb_msgdef* m1_msgdef = _PyUpb_CMessage_GetMsgdef(m1); + const upb_msgdef* m2_msgdef = _PyUpb_CMessage_GetMsgdef(m2); + const upb_msg* m1_msg = PyUpb_CMessage_GetIfWritable((PyObject*)m1); + const upb_msg* m2_msg = PyUpb_CMessage_GetIfWritable(_m2); + (void)m2_msgdef; + assert(m1_msgdef == m2_msgdef); + return PyUpb_Message_IsEqual(m1_msg, m2_msg, m1_msgdef); +} + +/* + * PyUpb_CMessage_AssureWritable() + * + * This implements the "expando" behavior of Python protos: + * foo = FooProto() + * + * # The intermediate messages don't really exist, and won't be serialized. + * x = foo.bar.bar.bar.bar.bar.baz + * + * # Now all the intermediate objects are created. + * foo.bar.bar.bar.bar.bar.baz = 5 + * + * This function should be called before performing any mutation of a protobuf + * object. + * + * Post-condition: + * PyUpb_CMessage_IsUnset(self) is false + */ +void PyUpb_CMessage_AssureWritable(PyUpb_CMessage* self) { + if (!PyUpb_CMessage_IsUnset(self)) return; + + // This is a non-present message. We need to create a real upb_msg for this + // object and every parent until we reach a present message. + upb_arena* arena = PyUpb_Arena_Get(self->arena); + PyUpb_CMessage* child = self; + PyUpb_CMessage* parent = self->parent; + const upb_fielddef* child_f = PyUpb_CMessage_GetFieldDef(child); + // This overwrites child->parent. + child->msg = upb_msg_new(upb_fielddef_msgsubdef(child_f), arena); + + while (PyUpb_CMessage_IsUnset(child)) { + PyUpb_CMessage* next_parent = parent->parent; + const upb_fielddef* parent_f = NULL; + if (PyUpb_CMessage_IsUnset(parent)) { + parent_f = PyUpb_CMessage_GetFieldDef(parent); + // This overwrites parent->parent. + parent->msg = upb_msg_new(upb_fielddef_msgsubdef(parent_f), arena); + } + upb_msgval msgval; + msgval.msg_val = child->msg; + upb_msg_set(parent->msg, child_f, msgval, arena); + child->def = (uintptr_t)upb_fielddef_msgsubdef(child_f); + PyUpb_WeakMap_Delete(parent->unset_subobj_map, child_f); + PyUpb_ObjCache_Add(child->msg, &child->ob_base); + if (child != self) { + Py_DECREF(child); // Was previously a parent. + } + child = parent; + child_f = parent_f; + parent = next_parent; + } + + Py_DECREF(child); + self->version++; +} + +static void PyUpb_CMessage_SyncSubobjs(PyUpb_CMessage* self) { + PyUpb_WeakMap* subobj_map = self->unset_subobj_map; + upb_msg* msg = PyUpb_CMessage_GetMsg(self); + intptr_t iter = PYUPB_WEAKMAP_BEGIN; + const void* key; + PyObject* obj; + + if (!subobj_map) return; + + // The last ref to this message could disappear during iteration. + Py_INCREF(&self->ob_base); + + while (PyUpb_WeakMap_Next(subobj_map, &key, &obj, &iter)) { + const upb_fielddef* f = key; + if (upb_fielddef_haspresence(f) && !upb_msg_has(msg, f)) continue; + upb_msgval msgval = upb_msg_get(msg, f); + PyUpb_WeakMap_DeleteIter(subobj_map, &iter); + if (upb_fielddef_ismap(f)) { + if (!msgval.map_val) continue; + // TODO(haberman): re-enable when maps are checked in. + // PyUpb_MapContainer_SwitchToSet(obj, (upb_map*)msgval.map_val); + } else if (upb_fielddef_isseq(f)) { + if (!msgval.array_val) continue; + // TODO(haberman): re-enable when repeated fields are checked in. + // PyUpb_RepeatedContainer_SwitchToSet(obj, (upb_array*)msgval.array_val); + } else { + PyUpb_CMessage* sub = (PyUpb_CMessage*)obj; + PyUpb_ObjCache_Add(msgval.msg_val, obj); + assert(self == sub->parent); + assert(f == PyUpb_CMessage_GetFieldDef(sub)); + Py_DECREF((PyObject*)self); + sub->msg = (upb_msg*)msgval.msg_val; + sub->def = (uintptr_t)upb_fielddef_msgsubdef(f); + PyUpb_CMessage_SyncSubobjs(sub); + } + } + + Py_DECREF(&self->ob_base); +} + +static PyObject* PyUpb_CMessage_ToString(PyUpb_CMessage* self) { + if (PyUpb_CMessage_IsUnset(self)) { + return PyUnicode_FromStringAndSize(NULL, 0); + } + upb_msg* msg = PyUpb_CMessage_GetMsg(self); + const upb_msgdef* msgdef = _PyUpb_CMessage_GetMsgdef(self); + const upb_symtab* symtab = upb_filedef_symtab(upb_msgdef_file(msgdef)); + char buf[1024]; + int options = UPB_TXTENC_SKIPUNKNOWN; + size_t size = upb_text_encode(msg, msgdef, symtab, options, buf, sizeof(buf)); + if (size < sizeof(buf)) { + return PyUnicode_FromStringAndSize(buf, size); + } else { + char* buf2 = malloc(size + 1); + size_t size2 = + upb_text_encode(msg, msgdef, symtab, options, buf2, size + 1); + assert(size == size2); + PyObject* ret = PyUnicode_FromStringAndSize(buf2, size2); + free(buf2); + return ret; + } +} + +static PyObject* PyUpb_CMessage_RichCompare(PyObject* _self, PyObject* other, + int opid) { + PyUpb_CMessage* self = (PyUpb_CMessage*)_self; + if (opid != Py_EQ && opid != Py_NE) { + Py_INCREF(Py_NotImplemented); + return Py_NotImplemented; + } + bool ret = (opid == Py_EQ) == PyUpb_CMessage_IsEqual(self, other); + return PyBool_FromLong(ret); +} + +void PyUpb_CMessage_CacheDelete(PyObject* _self, const upb_fielddef* f) { + PyUpb_CMessage* self = (PyUpb_CMessage*)_self; + PyUpb_WeakMap_Delete(self->unset_subobj_map, f); +} + +void PyUpb_CMessage_SetConcreteSubobj(PyObject* _self, const upb_fielddef* f, + upb_msgval subobj) { + PyUpb_CMessage* self = (PyUpb_CMessage*)_self; + PyUpb_CMessage_AssureWritable(self); + PyUpb_CMessage_CacheDelete(_self, f); + upb_msg_set(self->msg, f, subobj, PyUpb_Arena_Get(self->arena)); +} + +static void PyUpb_CMessage_Dealloc(PyObject* _self) { + PyUpb_CMessage* self = (PyUpb_CMessage*)_self; + + if (PyUpb_CMessage_IsUnset(self)) { + PyUpb_CMessage_CacheDelete((PyObject*)self->parent, + PyUpb_CMessage_GetFieldDef(self)); + Py_DECREF(self->parent); + } else { + PyUpb_ObjCache_Delete(self->msg); + } + + if (self->unset_subobj_map) { + PyUpb_WeakMap_Free(self->unset_subobj_map); + } + + Py_DECREF(self->arena); + + // We do not use PyUpb_Dealloc() here because CMessage is a base type and for + // base types there is a bug we have to work around in this case (see below). + PyTypeObject* tp = Py_TYPE(self); + freefunc tp_free = PyType_GetSlot(tp, Py_tp_free); + tp_free(self); + + if (cpython_bits.python_version_hex >= 0x03080000) { + // Prior to Python 3.8 there is a bug where deallocating the type here would + // lead to a double-decref: https://bugs.python.org/issue37879 + Py_DECREF(tp); + } +} + +PyObject* PyUpb_CMessage_Get(upb_msg* u_msg, const upb_msgdef* m, + PyObject* arena) { + PyObject* ret = PyUpb_ObjCache_Get(u_msg); + + if (!ret) { + PyObject* cls = PyUpb_Descriptor_GetClass(m); + // https://bugs.python.org/issue35810 + PyUpb_CMessage* py_msg = (void*)PyType_GenericAlloc((PyTypeObject*)cls, 0); + py_msg->arena = arena; + py_msg->def = (uintptr_t)m; + py_msg->msg = u_msg; + py_msg->unset_subobj_map = NULL; + py_msg->ext_dict = NULL; + py_msg->version = 0; + ret = &py_msg->ob_base; + Py_DECREF(cls); + Py_INCREF(arena); + PyUpb_ObjCache_Add(u_msg, ret); + } + + return ret; +} + +PyObject* PyUpb_CMessage_GetFieldValue(PyObject* _self, + const upb_fielddef* field) { + PyUpb_CMessage* self = (PyUpb_CMessage*)_self; + assert(upb_fielddef_containingtype(field) == PyUpb_CMessage_GetMsgdef(_self)); + bool submsg = upb_fielddef_issubmsg(field); + bool seq = upb_fielddef_isseq(field); + + if ((PyUpb_CMessage_IsUnset(self) && (submsg || seq)) || + (submsg && !upb_msg_has(self->msg, field))) { + // Non-present messages return magical "empty" messages that point to their + // parent, but will materialize into real messages if any fields are + // assigned. + if (!self->unset_subobj_map) { + self->unset_subobj_map = PyUpb_WeakMap_New(); + } + PyObject* subobj = PyUpb_WeakMap_Get(self->unset_subobj_map, field); + + if (!subobj) { + if (upb_fielddef_ismap(field)) { + // TODO(haberman): re-enable when maps are checked in. + // subobj = PyUpb_MapContainer_NewUnset(_self, field, self->arena); + PyErr_SetString(PyExc_NotImplementedError, "unset map"); + return NULL; + } else if (seq) { + // TODO(haberman): re-enable when repeated fields are checked in. + // subobj = PyUpb_RepeatedContainer_NewUnset(_self, field, self->arena); + PyErr_SetString(PyExc_NotImplementedError, "unset repeated"); + return NULL; + } else { + subobj = PyUpb_CMessage_NewUnset(_self, field, self->arena); + } + PyUpb_WeakMap_Add(self->unset_subobj_map, field, subobj); + } + + assert(!PyErr_Occurred()); + return subobj; + } + + if (seq) { + assert(!PyUpb_CMessage_IsUnset(self)); + upb_mutmsgval mutval = + upb_msg_mutable(self->msg, field, PyUpb_Arena_Get(self->arena)); + if (upb_fielddef_ismap(field)) { + // TODO(haberman): re-enable when maps are checked in. + // return PyUpb_MapContainer_GetOrCreateWrapper(mutval.map, field, + // self->arena); + (void)mutval; + PyErr_SetString(PyExc_NotImplementedError, "access map"); + return NULL; + } else { + // TODO(haberman): re-enable when repeated fields are checked in. + // return PyUpb_RepeatedContainer_GetOrCreateWrapper(mutval.array, _self, + // field, self->arena); + PyErr_SetString(PyExc_NotImplementedError, "access repeated"); + return NULL; + } + } else { + upb_msgval val; + if (PyUpb_CMessage_IsUnset(self)) { + // Unset message always returns default values. + val = upb_fielddef_default(field); + } else { + val = upb_msg_get(self->msg, field); + } + return PyUpb_UpbToPy(val, field, self->arena); + } +} + +int PyUpb_CMessage_SetFieldValue(PyObject* _self, const upb_fielddef* field, + PyObject* value) { + PyUpb_CMessage* self = (PyUpb_CMessage*)_self; + if (upb_fielddef_issubmsg(field) || upb_fielddef_isseq(field)) { + PyErr_Format(PyExc_AttributeError, + "Assignment not allowed to message, map, or repeated " + "field \"%s\" in protocol message object.", + upb_fielddef_name(field)); + return -1; + } + + PyUpb_CMessage_AssureWritable(self); + + upb_msgval val; + upb_arena* arena = PyUpb_Arena_Get(self->arena); + if (!PyUpb_PyToUpb(value, field, &val, arena)) { + return -1; + } + + upb_msg_set(self->msg, field, val, arena); + return 0; +} + +int PyUpb_CMessage_GetVersion(PyObject* _self) { + PyUpb_CMessage* self = (PyUpb_CMessage*)_self; + return self->version; +} + +static PyObject* PyUpb_MessageMeta_GetAttr(PyObject* self, PyObject* name); + +/* + * PyUpb_CMessage_GetAttr() + * + * Implements: + * foo = msg.foo + * + * Attribute lookup must find both message fields and base class methods like + * msg.SerializeToString(). + */ +__attribute__((flatten)) static PyObject* PyUpb_CMessage_GetAttr( + PyObject* _self, PyObject* attr) { + PyUpb_CMessage* self = (PyUpb_CMessage*)_self; + + // Lookup field by name. + const upb_fielddef* field; + if (PyUpb_CMessage_LookupName(self, attr, &field, NULL, NULL)) { + return PyUpb_CMessage_GetFieldValue(_self, field); + } + + // Check base class attributes. + assert(!PyErr_Occurred()); + PyObject* ret = PyObject_GenericGetAttr(_self, attr); + + // Return value if found, swallow AttributeError if raised. + if (ret) { + return ret; + } + if (!PyErr_ExceptionMatches(PyExc_AttributeError)) { + return NULL; + } + PyErr_Clear(); + + return PyUpb_MessageMeta_GetAttr((PyObject*)Py_TYPE(_self), attr); +} + +/* + * PyUpb_CMessage_SetAttr() + * + * Implements: + * msg.foo = foo + */ +static int PyUpb_CMessage_SetAttr(PyObject* _self, PyObject* attr, + PyObject* value) { + PyUpb_CMessage* self = (PyUpb_CMessage*)_self; + const upb_fielddef* field; + if (!PyUpb_CMessage_LookupName(self, attr, &field, NULL, + PyExc_AttributeError)) { + return -1; + } + + return PyUpb_CMessage_SetFieldValue(_self, field, value); +} + +static PyObject* PyUpb_CMessage_HasField(PyObject* _self, PyObject* arg) { + PyUpb_CMessage* self = (PyUpb_CMessage*)_self; + const upb_fielddef* field; + const upb_oneofdef* oneof; + + if (!PyUpb_CMessage_LookupName(self, arg, &field, &oneof, PyExc_ValueError)) { + return NULL; + } + + if (field && !upb_fielddef_haspresence(field)) { + PyErr_Format(PyExc_ValueError, "Field %s does not have presence.", + upb_fielddef_fullname(field)); + return NULL; + } + + if (PyUpb_CMessage_IsUnset(self)) Py_RETURN_FALSE; + + return PyBool_FromLong(field ? upb_msg_has(self->msg, field) + : upb_msg_whichoneof(self->msg, oneof) != NULL); +} + +static PyObject* PyUpb_CMessage_ListFields(PyObject* _self, PyObject* arg) { + PyObject* list = PyList_New(0); + upb_msg* msg = PyUpb_CMessage_GetIfWritable(_self); + + if (msg) { + size_t iter1 = UPB_MSG_BEGIN; + const upb_msgdef* m = PyUpb_CMessage_GetMsgdef(_self); + const upb_symtab* symtab = upb_filedef_symtab(upb_msgdef_file(m)); + const upb_fielddef* f; + upb_msgval val; + while (upb_msg_next(msg, m, symtab, &f, &val, &iter1)) { + PyObject* field_desc = PyUpb_FieldDescriptor_Get(f); + PyObject* py_val = PyUpb_CMessage_GetFieldValue(_self, f); + PyObject* tuple = Py_BuildValue("(NN)", field_desc, py_val); + PyList_Append(list, tuple); + Py_DECREF(tuple); + } + } + + return list; +} + +PyObject* PyUpb_CMessage_MergeFrom(PyObject* self, PyObject* arg) { + if (self->ob_type != arg->ob_type) { + PyErr_Format(PyExc_TypeError, + "Parameter to MergeFrom() must be instance of same class: " + "expected %S got %S.", + Py_TYPE(self), Py_TYPE(arg)); + return NULL; + } + // OPT: exit if src is empty. + PyObject* subargs = PyTuple_New(0); + PyObject* serialized = PyUpb_CMessage_SerializeToString(arg, subargs, NULL); + Py_DECREF(subargs); + if (!serialized) return NULL; + PyObject* ret = PyUpb_CMessage_MergeFromString(self, serialized); + Py_DECREF(serialized); + Py_DECREF(ret); + Py_RETURN_NONE; +} + +static PyObject* PyUpb_CMessage_SetInParent(PyObject* _self, PyObject* arg) { + PyUpb_CMessage* self = (PyUpb_CMessage*)_self; + PyUpb_CMessage_AssureWritable(self); + Py_RETURN_NONE; +} + +static PyObject* PyUpb_CMessage_UnknownFields(PyObject* _self, PyObject* arg) { + // TODO(haberman): re-enable when unknown fields are added. + // return PyUpb_UnknownFields_New(_self); + PyErr_SetString(PyExc_NotImplementedError, "unknown field accessor"); + return NULL; +} + +PyObject* PyUpb_CMessage_MergeFromString(PyObject* _self, PyObject* arg) { + PyUpb_CMessage* self = (PyUpb_CMessage*)_self; + char* buf; + Py_ssize_t size; + PyObject* bytes = NULL; + + if (PyMemoryView_Check(arg)) { + bytes = PyBytes_FromObject(arg); + PyBytes_AsStringAndSize(bytes, &buf, &size); + } else if (PyBytes_AsStringAndSize(arg, &buf, &size) < 0) { + return NULL; + } + + PyUpb_CMessage_AssureWritable(self); + const upb_msgdef* msgdef = _PyUpb_CMessage_GetMsgdef(self); + const upb_filedef* file = upb_msgdef_file(msgdef); + const upb_extreg* extreg = upb_symtab_extreg(upb_filedef_symtab(file)); + const upb_msglayout* layout = upb_msgdef_layout(msgdef); + upb_arena* arena = PyUpb_Arena_Get(self->arena); + upb_DecodeStatus status = + _upb_decode(buf, size, self->msg, layout, extreg, 0, arena); + Py_XDECREF(bytes); + if (status != kUpb_DecodeStatus_Ok) { + PyUpb_ModuleState* state = PyUpb_ModuleState_Get(); + PyErr_Format(state->decode_error_class, "Error parsing message"); + return NULL; + } + PyUpb_CMessage_SyncSubobjs(self); + return PyLong_FromSsize_t(size); +} + +static PyObject* PyUpb_CMessage_Clear(PyUpb_CMessage* self, PyObject* args); + +static PyObject* PyUpb_CMessage_ParseFromString(PyObject* self, PyObject* arg) { + PyObject* tmp = PyUpb_CMessage_Clear((PyUpb_CMessage*)self, NULL); + Py_DECREF(tmp); + return PyUpb_CMessage_MergeFromString(self, arg); +} + +static PyObject* PyUpb_CMessage_ByteSize(PyObject* self, PyObject* args) { + // At the moment upb does not have a "byte size" function, so we just + // serialize to string and get the size of the string. + PyObject* subargs = PyTuple_New(0); + PyObject* serialized = PyUpb_CMessage_SerializeToString(self, subargs, NULL); + Py_DECREF(subargs); + if (!serialized) return NULL; + size_t size = PyBytes_Size(serialized); + Py_DECREF(serialized); + return PyLong_FromSize_t(size); +} + +static PyObject* PyUpb_CMessage_Clear(PyUpb_CMessage* self, PyObject* args) { + PyUpb_CMessage_AssureWritable(self); + const upb_msgdef* msgdef = _PyUpb_CMessage_GetMsgdef(self); + upb_msg_clear(self->msg, msgdef); + Py_RETURN_NONE; +} + +static void PyUpb_CMessage_AbandonField(PyUpb_CMessage* self, + const upb_fielddef* f) { + if (self->unset_subobj_map && upb_fielddef_issubmsg(f)) { + PyObject* sub = PyUpb_WeakMap_Get(self->unset_subobj_map, f); + if (sub) { + PyUpb_CMessage_AssureWritable((PyUpb_CMessage*)sub); + } + } +} + +static PyObject* PyUpb_CMessage_ClearExtension(PyUpb_CMessage* self, + PyObject* arg) { + PyUpb_CMessage_AssureWritable(self); + const upb_msgdef* msgdef = _PyUpb_CMessage_GetMsgdef(self); + const upb_fielddef* f = PyUpb_FieldDescriptor_GetDef(arg); + if (!f) return NULL; + if (upb_fielddef_containingtype(f) != msgdef) { + PyErr_Format(PyExc_ValueError, "Extension doesn't match (%s vs %s)", + upb_msgdef_fullname(msgdef), upb_fielddef_fullname(f)); + } + PyUpb_CMessage_AbandonField(self, f); + upb_msg_clearfield(self->msg, f); + Py_RETURN_NONE; +} + +static PyObject* PyUpb_CMessage_ClearField(PyUpb_CMessage* self, + PyObject* arg) { + PyUpb_CMessage_AssureWritable(self); + const upb_fielddef* f; + const upb_oneofdef* o; + if (!PyUpb_CMessage_LookupName(self, arg, &f, &o, PyExc_ValueError)) { + return NULL; + } + + if (o) f = upb_msg_whichoneof(self->msg, o); + if (f) upb_msg_clearfield(self->msg, f); + + if (upb_fielddef_ismap(f)) { + // We have to invalidate any existing iterator over this map. + PyObject* obj = NULL; + if (self->unset_subobj_map) { + obj = PyUpb_WeakMap_Get(self->unset_subobj_map, f); + } + if (!obj) { + upb_msg* msg = PyUpb_CMessage_GetMsg(self); + upb_msgval msgval = upb_msg_get(msg, f); + obj = PyUpb_ObjCache_Get(msgval.map_val); + } + if (obj) { + PyUpb_MapContainer_Invalidate(obj); + Py_DECREF(obj); + } + } + + PyUpb_CMessage_AbandonField(self, f); + + Py_RETURN_NONE; +} + +static PyObject* PyUpb_CMessage_DiscardUnknownFields(PyUpb_CMessage* self, + PyObject* arg) { + PyUpb_CMessage_AssureWritable(self); + const upb_msgdef* msgdef = _PyUpb_CMessage_GetMsgdef(self); + upb_msg_discardunknown(self->msg, msgdef, 64); + Py_RETURN_NONE; +} + +static PyObject* PyUpb_CMessage_FindInitializationErrors(PyObject* _self, + PyObject* arg) { + PyUpb_CMessage* self = (PyUpb_CMessage*)_self; + upb_msg* msg = PyUpb_CMessage_GetIfWritable(_self); + if (!msg) return PyList_New(0); + const upb_msgdef* msgdef = _PyUpb_CMessage_GetMsgdef(self); + const upb_symtab* ext_pool = NULL; // TODO + upb_FieldPathEntry* fields; + PyObject* ret = PyList_New(0); + if (upb_util_HasUnsetRequired(msg, msgdef, ext_pool, &fields)) { + char* buf = NULL; + size_t size = 0; + size_t i = 0; + while (fields) { + upb_FieldPathEntry* field = fields; + size_t need = upb_FieldPath_ToText(&fields, buf, size); + if (need >= size) { + fields = field; + size = 16; + while (size <= need) size *= 2; + buf = realloc(buf, size); + need = upb_FieldPath_ToText(&fields, buf, size); + assert(size > need); + } + PyList_SetItem(ret, i, PyUnicode_FromString(buf)); + } + free(buf); + } + return ret; +} + +static PyObject* PyUpb_CMessage_FromString(PyObject* cls, + PyObject* serialized) { + PyObject* ret = PyObject_CallObject(cls, NULL); + if (ret == NULL) return NULL; + + PyObject* length = PyUpb_CMessage_MergeFromString(ret, serialized); + if (length == NULL) { + Py_DECREF(ret); + return NULL; + } + + Py_DECREF(length); + return ret; +} + +static PyObject* PyUpb_CMessage_HasExtension(PyObject* _self, + PyObject* ext_desc) { + upb_msg* msg = PyUpb_CMessage_GetIfWritable(_self); + const upb_fielddef* f = PyUpb_FieldDescriptor_GetDef(ext_desc); + if (!f) return NULL; + if (!msg) Py_RETURN_FALSE; + return PyBool_FromLong(upb_msg_has(msg, f)); +} + +PyObject* PyUpb_CMessage_SerializeInternal(PyObject* _self, PyObject* args, + PyObject* kwargs, + bool check_required) { + PyUpb_CMessage* self = (PyUpb_CMessage*)_self; + if (!PyUpb_CMessage_Check((PyObject*)self)) return NULL; + static const char* kwlist[] = {"deterministic", NULL}; + int deterministic = 0; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|p", (char**)(kwlist), + &deterministic)) { + return NULL; + } + + if (PyUpb_CMessage_IsUnset(self)) { + return PyBytes_FromStringAndSize(NULL, 0); + } + + upb_arena* arena = upb_arena_new(); + const upb_msgdef* msgdef = _PyUpb_CMessage_GetMsgdef(self); + const upb_msglayout* layout = upb_msgdef_layout(msgdef); + size_t size = 0; + int options = 0; + if (check_required) options |= UPB_ENCODE_CHECKREQUIRED; + if (deterministic) options |= UPB_ENCODE_DETERMINISTIC; + char* pb = upb_encode_ex(self->msg, layout, options, arena, &size); + PyObject* ret = NULL; + + if (!pb) { + PyUpb_ModuleState* state = PyUpb_ModuleState_Get(); + PyErr_Format(state->encode_error_class, "Failed to serialize proto"); + goto done; + } + + ret = PyBytes_FromStringAndSize(pb, size); + +done: + upb_arena_free(arena); + return ret; +} + +PyObject* PyUpb_CMessage_SerializeToString(PyObject* _self, PyObject* args, + PyObject* kwargs) { + return PyUpb_CMessage_SerializeInternal(_self, args, kwargs, true); +} + +PyObject* PyUpb_CMessage_SerializePartialToString(PyObject* _self, + PyObject* args, + PyObject* kwargs) { + return PyUpb_CMessage_SerializeInternal(_self, args, kwargs, false); +} + +static PyObject* PyUpb_CMessage_WhichOneof(PyObject* _self, PyObject* name) { + PyUpb_CMessage* self = (PyUpb_CMessage*)_self; + const upb_oneofdef* o; + if (!PyUpb_CMessage_LookupName(self, name, NULL, &o, PyExc_ValueError)) { + return NULL; + } + upb_msg* msg = PyUpb_CMessage_GetIfWritable(_self); + if (!msg) Py_RETURN_NONE; + const upb_fielddef* f = upb_msg_whichoneof(msg, o); + if (!f) Py_RETURN_NONE; + return PyUnicode_FromString(upb_fielddef_name(f)); +} + +void PyUpb_CMessage_ClearExtensionDict(PyObject* _self) { + PyUpb_CMessage* self = (PyUpb_CMessage*)_self; + assert(self->ext_dict); + self->ext_dict = NULL; +} + +static PyObject* PyUpb_CMessage_GetExtensionDict(PyObject* _self, + void* closure) { + PyUpb_CMessage* self = (PyUpb_CMessage*)_self; + if (self->ext_dict) { + return self->ext_dict; + } + + const upb_msgdef* m = _PyUpb_CMessage_GetMsgdef(self); + if (upb_msgdef_extrangecount(m) == 0) { + PyErr_SetNone(PyExc_AttributeError); + return NULL; + } + + // TODO(haberman): re-enable when ExtensionDict is checked in. + // self->ext_dict = PyUpb_ExtensionDict_New(_self); + // return self->ext_dict; + PyErr_SetString(PyExc_NotImplementedError, "get extension dict"); + return NULL; +} + +static PyGetSetDef PyUpb_CMessage_Getters[] = { + {"Extensions", PyUpb_CMessage_GetExtensionDict, NULL, "Extension dict"}, + /* + {"_extensions_by_name", (getter)GetExtensionsByName, NULL}, + {"_extensions_by_number", (getter)GetExtensionsByNumber, NULL}, + */ + {NULL}}; + +static PyMethodDef PyUpb_CMessage_Methods[] = { + //{ "__deepcopy__", (PyCFunction)DeepCopy, METH_VARARGS, + // "Makes a deep copy of the class." }, + //{ "__unicode__", (PyCFunction)ToUnicode, METH_NOARGS, + // "Outputs a unicode representation of the message." }, + {"ByteSize", (PyCFunction)PyUpb_CMessage_ByteSize, METH_NOARGS, + "Returns the size of the message in bytes."}, + {"Clear", (PyCFunction)PyUpb_CMessage_Clear, METH_NOARGS, + "Clears the message."}, + {"ClearExtension", (PyCFunction)PyUpb_CMessage_ClearExtension, METH_O, + "Clears a message field."}, + {"ClearField", (PyCFunction)PyUpb_CMessage_ClearField, METH_O, + "Clears a message field."}, + //{ "CopyFrom", (PyCFunction)CopyFrom, METH_O, + // "Copies a protocol message into the current message." }, + {"DiscardUnknownFields", (PyCFunction)PyUpb_CMessage_DiscardUnknownFields, + METH_NOARGS, "Discards the unknown fields."}, + {"FindInitializationErrors", + (PyCFunction)PyUpb_CMessage_FindInitializationErrors, METH_NOARGS, + "Finds unset required fields."}, + {"FromString", PyUpb_CMessage_FromString, METH_O | METH_CLASS, + "Creates new method instance from given serialized data."}, + {"HasExtension", PyUpb_CMessage_HasExtension, METH_O, + "Checks if a message field is set."}, + {"HasField", PyUpb_CMessage_HasField, METH_O, + "Checks if a message field is set."}, + //{ "IsInitialized", (PyCFunction)IsInitialized, METH_VARARGS, + // "Checks if all required fields of a protocol message are set." }, + {"ListFields", PyUpb_CMessage_ListFields, METH_NOARGS, + "Lists all set fields of a message."}, + {"MergeFrom", PyUpb_CMessage_MergeFrom, METH_O, + "Merges a protocol message into the current message."}, + {"MergeFromString", PyUpb_CMessage_MergeFromString, METH_O, + "Merges a serialized message into the current message."}, + {"ParseFromString", PyUpb_CMessage_ParseFromString, METH_O, + "Parses a serialized message into the current message."}, + //{ "RegisterExtension", (PyCFunction)RegisterExtension, METH_O | + // METH_CLASS, + // "Registers an extension with the current message." }, + {"SerializePartialToString", + (PyCFunction)PyUpb_CMessage_SerializePartialToString, + METH_VARARGS | METH_KEYWORDS, + "Serializes the message to a string, even if it isn't initialized."}, + {"SerializeToString", (PyCFunction)PyUpb_CMessage_SerializeToString, + METH_VARARGS | METH_KEYWORDS, + "Serializes the message to a string, only for initialized messages."}, + {"SetInParent", (PyCFunction)PyUpb_CMessage_SetInParent, METH_NOARGS, + "Sets the has bit of the given field in its parent message."}, + {"UnknownFields", (PyCFunction)PyUpb_CMessage_UnknownFields, METH_NOARGS, + "Parse unknown field set"}, + {"WhichOneof", PyUpb_CMessage_WhichOneof, METH_O, + "Returns the name of the field set inside a oneof, " + "or None if no field is set."}, + //{ "_CheckCalledFromGeneratedFile", + //(PyCFunction)_CheckCalledFromGeneratedFile, + // METH_NOARGS | METH_STATIC, + // "Raises TypeError if the caller is not in a _pb2.py file."}, + {NULL, NULL}}; + +static PyType_Slot PyUpb_CMessage_Slots[] = { + {Py_tp_dealloc, PyUpb_CMessage_Dealloc}, + {Py_tp_doc, "A ProtocolMessage"}, + {Py_tp_getattro, PyUpb_CMessage_GetAttr}, + {Py_tp_getset, PyUpb_CMessage_Getters}, + {Py_tp_hash, PyObject_HashNotImplemented}, + {Py_tp_methods, PyUpb_CMessage_Methods}, + {Py_tp_new, PyUpb_CMessage_New}, + {Py_tp_str, PyUpb_CMessage_ToString}, + {Py_tp_repr, PyUpb_CMessage_ToString}, + {Py_tp_richcompare, PyUpb_CMessage_RichCompare}, + {Py_tp_setattro, PyUpb_CMessage_SetAttr}, + {Py_tp_init, PyUpb_CMessage_Init}, + {0, NULL}}; + +PyType_Spec PyUpb_CMessage_Spec = { + PYUPB_MODULE_NAME ".CMessage", // tp_name + sizeof(PyUpb_CMessage), // tp_basicsize + 0, // tp_itemsize + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, // tp_flags + PyUpb_CMessage_Slots, +}; + +// ----------------------------------------------------------------------------- +// MessageMeta +// ----------------------------------------------------------------------------- + +typedef struct { + const upb_msglayout* layout; + PyObject* py_message_descriptor; +} PyUpb_MessageMeta; + +PyUpb_MessageMeta* PyUpb_GetMessageMeta(PyObject* cls) { +#ifndef NDEBUG + PyUpb_ModuleState* state = PyUpb_ModuleState_MaybeGet(); + assert(!state || cls->ob_type == state->message_meta_type); +#endif + return (PyUpb_MessageMeta*)((char*)cls + cpython_bits.type_basicsize); +} + +PyObject* PyUpb_MessageMeta_ModuleQualifiedName(const upb_msgdef* m) { + const upb_filedef* file = upb_msgdef_file(m); + const char* filename = upb_filedef_name(file); + const char* msgname = upb_msgdef_name(m); + const char* final_dot = strrchr(filename, '.'); + size_t len = final_dot ? final_dot - filename : strlen(filename); + char* modname = malloc(len + 1); + if (!modname) return NULL; + for (size_t i = 0; i < len; i++) { + if (filename[i] == '/') { + modname[i] = '.'; + } else { + modname[i] = filename[i]; + } + } + modname[len] = '\0'; + PyObject* ret = PyUnicode_FromFormat("%s_pb2.%s", modname, msgname); + free(modname); + return ret; +} + +PyObject* PyUpb_MessageMeta_DoCreateClass(PyObject* py_descriptor, + const char* name, PyObject* dict) { + PyUpb_ModuleState* state = PyUpb_ModuleState_Get(); + PyTypeObject* descriptor_type = state->descriptor_types[kPyUpb_Descriptor]; + if (!PyObject_TypeCheck(py_descriptor, descriptor_type)) { + return PyErr_Format(PyExc_TypeError, "Expected a message Descriptor"); + } + + const upb_msgdef* msgdef = PyUpb_Descriptor_GetDef(py_descriptor); + assert(msgdef); + assert(!PyUpb_ObjCache_Get(upb_msgdef_layout(msgdef))); + + PyObject* slots = PyTuple_New(0); + if (PyDict_SetItemString(dict, "__slots__", slots) < 0) { + return NULL; + } + + // Bases are either: + // (CMessage, Message) # for regular messages + // (CMessage, Message, WktBase) # For well-known types + PyObject* wkt_bases = PyUpb_GetWktBases(state); + PyObject* wkt_base = + PyDict_GetItemString(wkt_bases, upb_msgdef_fullname(msgdef)); + PyObject* args; + if (wkt_base == NULL) { + args = Py_BuildValue("s(OO)O", name, state->cmessage_type, + state->message_class, dict); + } else { + args = Py_BuildValue("s(OOO)O", name, state->cmessage_type, + state->message_class, wkt_base, dict); + } + + PyObject* ret = cpython_bits.type_new(state->message_meta_type, args, NULL); + assert(Py_REFCNT(args) == 1); + Py_DECREF(args); + if (!ret) return NULL; + + PyUpb_MessageMeta* meta = PyUpb_GetMessageMeta(ret); + meta->py_message_descriptor = py_descriptor; + meta->layout = upb_msgdef_layout(msgdef); + Py_INCREF(meta->py_message_descriptor); + + PyUpb_ObjCache_Add(upb_msgdef_layout(msgdef), ret); + + return ret; +} + +static PyObject* PyUpb_MessageMeta_New(PyTypeObject* type, PyObject* args, + PyObject* kwargs) { + PyUpb_ModuleState* state = PyUpb_ModuleState_Get(); + static const char* kwlist[] = {"name", "bases", "dict", 0}; + PyObject *bases, *dict; + const char* name; + + // Check arguments: (name, bases, dict) + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "sO!O!:type", (char**)kwlist, + &name, &PyTuple_Type, &bases, &PyDict_Type, + &dict)) { + return NULL; + } + + // Check bases: only (), or (message.Message,) are allowed + Py_ssize_t size = PyTuple_Size(bases); + if (!(size == 0 || + (size == 1 && PyTuple_GetItem(bases, 0) == state->message_class))) { + PyErr_SetString(PyExc_TypeError, + "A Message class can only inherit from Message"); + return NULL; + } + + // Check dict['DESCRIPTOR'] + PyObject* py_descriptor = PyDict_GetItemString(dict, "DESCRIPTOR"); + if (py_descriptor == NULL) { + PyErr_SetString(PyExc_TypeError, "Message class has no DESCRIPTOR"); + return NULL; + } + + const upb_msgdef* m = PyUpb_Descriptor_GetDef(py_descriptor); + PyObject* ret = PyUpb_ObjCache_Get(upb_msgdef_layout(m)); + if (ret) return ret; + return PyUpb_MessageMeta_DoCreateClass(py_descriptor, name, dict); +} + +static void PyUpb_MessageMeta_Dealloc(PyObject* self) { + PyUpb_MessageMeta* meta = PyUpb_GetMessageMeta(self); + PyUpb_ObjCache_Delete(meta->layout); + Py_DECREF(meta->py_message_descriptor); + PyUpb_Dealloc(self); +} + +PyObject* PyUpb_MessageMeta_CreateClass(PyObject* py_descriptor) { + const upb_msgdef* m = PyUpb_Descriptor_GetDef(py_descriptor); + PyObject* dict = PyDict_New(); + PyObject* ret = PyUpb_MessageMeta_DoCreateClass(py_descriptor, + upb_msgdef_fullname(m), dict); + Py_DECREF(dict); + return ret; +} + +/* +// Compute some class attributes on the fly: +// - All the _FIELD_NUMBER attributes, for all fields and nested extensions. +// Returns a new reference, or NULL with an exception set. +static PyObject* GetClassAttribute(CMessageClass *self, PyObject* name) { + char* attr; + Py_ssize_t attr_size; + static const char kSuffix[] = "_FIELD_NUMBER"; + if (PyString_AsStringAndSize(name, &attr, &attr_size) >= 0 && + HasSuffixString(StringPiece(attr, attr_size), kSuffix)) { + std::string field_name(attr, attr_size - sizeof(kSuffix) + 1); + LowerString(&field_name); + + // Try to find a field with the given name, without the suffix. + const FieldDescriptor* field = + self->message_descriptor->FindFieldByLowercaseName(field_name); + if (!field) { + // Search nested extensions as well. + field = + self->message_descriptor->FindExtensionByLowercaseName(field_name); + } + if (field) { + return PyInt_FromLong(field->number()); + } + } + PyErr_SetObject(PyExc_AttributeError, name); + return NULL; +} + +*/ + +static PyObject* PyUpb_MessageMeta_GetDynamicAttr(PyObject* self, + PyObject* name) { + const char* name_buf = PyUpb_GetStrData(name); + const upb_msgdef* msgdef = PyUpb_MessageMeta_GetMsgdef(self); + const upb_filedef* filedef = upb_msgdef_file(msgdef); + const upb_symtab* symtab = upb_filedef_symtab(filedef); + + PyObject* py_key = + PyBytes_FromFormat("%s.%s", upb_msgdef_fullname(msgdef), name_buf); + const char* key = PyUpb_GetStrData(py_key); + PyObject* ret = NULL; + const upb_msgdef* nested = upb_symtab_lookupmsg(symtab, key); + const upb_enumdef* enumdef; + const upb_enumvaldef* enumval; + const upb_fielddef* ext; + + if (nested) { + ret = PyUpb_Descriptor_GetClass(nested); + } else if ((enumdef = upb_symtab_lookupenum(symtab, key))) { + PyUpb_ModuleState* state = PyUpb_ModuleState_Get(); + PyObject* klass = state->enum_type_wrapper_class; + ret = PyUpb_EnumDescriptor_Get(enumdef); + ret = PyObject_CallFunctionObjArgs(klass, ret, NULL); + } else if ((enumval = upb_symtab_lookupenumval(symtab, key))) { + ret = PyLong_FromLong(upb_enumvaldef_number(enumval)); + } else if ((ext = upb_symtab_lookupext(symtab, key))) { + ret = PyUpb_FieldDescriptor_Get(ext); + } + + Py_DECREF(py_key); + + return ret; +} + +static PyObject* PyUpb_MessageMeta_GetAttr(PyObject* self, PyObject* name) { + // We want to first delegate to the type's tp_dict to retrieve any attributes + // that were previously calculated and cached in the type's dict. + PyObject* ret = cpython_bits.type_getattro(self, name); + if (ret) return ret; + + // We did not find a cached attribute. Try to calculate the attribute + // dynamically, using the descriptor as an argument. + PyErr_Clear(); + ret = PyUpb_MessageMeta_GetDynamicAttr(self, name); + + if (ret) { + PyObject_SetAttr(self, name, ret); + PyErr_Clear(); + return ret; + } + + PyErr_SetObject(PyExc_AttributeError, name); + return NULL; +} + +static PyType_Slot PyUpb_MessageMeta_Slots[] = { + {Py_tp_new, PyUpb_MessageMeta_New}, + {Py_tp_dealloc, PyUpb_MessageMeta_Dealloc}, + {Py_tp_getattro, PyUpb_MessageMeta_GetAttr}, + {0, NULL}}; + +static PyType_Spec PyUpb_MessageMeta_Spec = { + PYUPB_MODULE_NAME ".MessageMeta", // tp_name + 0, // To be filled in by size of base // tp_basicsize + 0, // tp_itemsize + // TODO(haberman): remove BASETYPE, Python should just use MessageMeta + // directly instead of subclassing it. + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, // tp_flags + PyUpb_MessageMeta_Slots, +}; + +bool PyUpb_GetTypeFuncs(void) { +} + +bool PyUpb_InitMessage(PyObject* m) { + if (!PyUpb_GetTypeFuncs()) return false; + + PyUpb_ModuleState* state = PyUpb_ModuleState_GetFromModule(m); + state->cmessage_type = PyUpb_AddClass(m, &PyUpb_CMessage_Spec); + + if (!state->cmessage_type) return false; + + PyObject* bases = Py_BuildValue("(O)", &PyType_Type); + PyUpb_MessageMeta_Spec.basicsize = + cpython_bits.type_basicsize + sizeof(PyUpb_MessageMeta); + PyObject* type = PyType_FromSpecWithBases(&PyUpb_MessageMeta_Spec, bases); + Py_DECREF(bases); + + if (!type) return false; + + if (PyModule_AddObject(m, "MessageMeta", type) == 0) { + state->message_meta_type = (PyTypeObject*)type; + return true; + } else { + return false; + } +} diff --git a/python/message.h b/python/message.h new file mode 100644 index 0000000000..e28f24f4f0 --- /dev/null +++ b/python/message.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2009-2021, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef PYPB_MESSAGE_H__ +#define PYPB_MESSAGE_H__ + +#include + +#include "python/protobuf.h" +#include "upb/reflection.h" + +PyObject* PyUpb_MessageMeta_CreateClass(PyObject* py_descriptor); +void PyUpb_CMessage_CacheDelete(PyObject* _self, const upb_fielddef* f); +void PyUpb_CMessage_SetConcreteSubobj(PyObject* _self, const upb_fielddef* f, + upb_msgval subobj); +PyObject* PyUpb_CMessage_Get(upb_msg* u_msg, const upb_msgdef* m, + PyObject* arena); +bool PyUpb_CMessage_Check(PyObject* self); +upb_msg* PyUpb_CMessage_GetIfWritable(PyObject* _self); +const upb_msgdef* PyUpb_CMessage_GetMsgdef(PyObject* self); +PyObject* PyUpb_CMessage_MergeFrom(PyObject* self, PyObject* arg); +PyObject* PyUpb_CMessage_MergeFromString(PyObject* self, PyObject* arg); +PyObject* PyUpb_CMessage_SerializeToString(PyObject* self, PyObject* args, + PyObject* kwargs); +int PyUpb_CMessage_InitAttributes(PyObject* _self, PyObject* args, + PyObject* kwargs); +void PyUpb_CMessage_ClearExtensionDict(PyObject* _self); +PyObject* PyUpb_CMessage_GetFieldValue(PyObject* _self, + const upb_fielddef* field); +int PyUpb_CMessage_SetFieldValue(PyObject* _self, const upb_fielddef* field, + PyObject* value); +int PyUpb_CMessage_GetVersion(PyObject* _self); + +bool PyUpb_InitMessage(PyObject* m); + +#endif // PYPB_MESSAGE_H__ diff --git a/python/protobuf.c b/python/protobuf.c index 7e010010e8..367f70d796 100644 --- a/python/protobuf.c +++ b/python/protobuf.c @@ -68,6 +68,25 @@ PyUpb_ModuleState *PyUpb_ModuleState_Get(void) { return PyUpb_ModuleState_GetFromModule(module); } +PyObject *PyUpb_GetWktBases(PyUpb_ModuleState *state) { + if (!state->wkt_bases) { + PyObject *wkt_module = + PyImport_ImportModule("google.protobuf.internal.well_known_types"); + + if (wkt_module == NULL) { + return false; + } + + state->wkt_bases = PyObject_GetAttrString(wkt_module, "WKTBASES"); + PyObject *m = PyState_FindModule(&module_def); + // Make sure it is GC'd. + PyModule_AddObject(m, "__internal_wktbases", state->wkt_bases); + Py_DECREF(wkt_module); + } + + return state->wkt_bases; +} + // ----------------------------------------------------------------------------- // ObjectCache // ----------------------------------------------------------------------------- diff --git a/python/protobuf.h b/python/protobuf.h index d24f3ab672..161b26b887 100644 --- a/python/protobuf.h +++ b/python/protobuf.h @@ -61,10 +61,20 @@ typedef struct { // From descriptor_pool.c PyTypeObject *descriptor_pool_type; + // From message.c + PyObject *decode_error_class; + PyObject* descriptor_string; + PyObject *encode_error_class; + PyObject *enum_type_wrapper_class; + PyObject *message_class; + PyTypeObject *cmessage_type; + PyTypeObject *message_meta_type; + // From protobuf.c + PyObject *wkt_bases; + PyTypeObject *arena_type; upb_arena *obj_cache_arena; upb_inttable obj_cache; - PyTypeObject *arena_type; } PyUpb_ModuleState; // Returns the global state object from the current interpreter. The current @@ -72,18 +82,58 @@ typedef struct { PyUpb_ModuleState *PyUpb_ModuleState_Get(void); PyUpb_ModuleState *PyUpb_ModuleState_GetFromModule(PyObject *module); +// Returns NULL if module state is not yet available (during startup). +// Any use of the module state during startup needs to be passed explicitly. +PyUpb_ModuleState* PyUpb_ModuleState_MaybeGet(void); + +// Returns: +// from google.protobuf.internal.well_known_types import WKTBASES +// +// This has to be imported lazily rather than at module load time, because +// otherwise it would cause a circular import. +PyObject *PyUpb_GetWktBases(PyUpb_ModuleState *state); + // ----------------------------------------------------------------------------- -// ObjectCache +// WeakMap // ----------------------------------------------------------------------------- -// The ObjectCache is a weak map that maps C pointers to the corresponding -// Python wrapper object. We want a consistent Python wrapper object for each -// C object, both to save memory and to provide object stability (ie. x is x). +// A WeakMap maps C pointers to the corresponding Python wrapper object. We +// want a consistent Python wrapper object for each C object, both to save +// memory and to provide object stability (ie. x is x). // // Each wrapped object should add itself to the map when it is constructed and // remove itself from the map when it is destroyed. The map is weak so it does // not take references to the cached objects. +struct PyUpb_WeakMap; +typedef struct PyUpb_WeakMap PyUpb_WeakMap; + +PyUpb_WeakMap *PyUpb_WeakMap_New(void); +void PyUpb_WeakMap_Free(PyUpb_WeakMap *map); + +// Adds the given object to the map, indexed by the given key. +void PyUpb_WeakMap_Add(PyUpb_WeakMap *map, const void *key, PyObject *py_obj); + +// Removes the given key from the cache. It must exist in the cache currently. +void PyUpb_WeakMap_Delete(PyUpb_WeakMap *map, const void *key); +void PyUpb_WeakMap_TryDelete(PyUpb_WeakMap *map, const void *key); + +// Returns a new reference to an object if it exists, otherwise returns NULL. +PyObject *PyUpb_WeakMap_Get(PyUpb_WeakMap *map, const void *key); + +#define PYUPB_WEAKMAP_BEGIN UPB_INTTABLE_BEGIN + +bool PyUpb_WeakMap_Next(PyUpb_WeakMap *map, const void **key, PyObject **obj, + intptr_t *iter); +void PyUpb_WeakMap_DeleteIter(PyUpb_WeakMap *map, intptr_t *iter); + +// ----------------------------------------------------------------------------- +// ObjectCache +// ----------------------------------------------------------------------------- + +// The object cache is a global WeakMap for mapping upb objects to the +// corresponding wrapper. + // Adds the given object to the cache, indexed by the given key. void PyUpb_ObjCache_Add(const void *key, PyObject *py_obj);