Merge pull request #458 from haberman/python-descriptor

Implemented descriptor wrappers in Python
pull/13171/head
Joshua Haberman 3 years ago committed by GitHub
commit 0d87ddc7d7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 1
      .github/workflows/bazel_tests.yml
  2. 1
      bazel/workspace_deps.bzl
  3. 2
      python/BUILD
  4. 1526
      python/descriptor.c
  5. 49
      python/descriptor.h
  6. 26
      python/descriptor_pool.c
  7. 6
      python/descriptor_pool.h
  8. 73
      python/protobuf.c
  9. 31
      python/protobuf.h
  10. 42
      python/python.h

@ -15,6 +15,7 @@ jobs:
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
strategy: strategy:
fail-fast: false # Don't cancel all jobs if one fails.
matrix: matrix:
include: include:
- { CC: clang, os: ubuntu-20.04, flags: "" } - { CC: clang, os: ubuntu-20.04, flags: "" }

@ -16,7 +16,6 @@ def upb_deps():
sha256 = "b10bf4e2d1a7586f54e64a5d9e7837e5188fc75ae69e36f215eb01def4f9721b", sha256 = "b10bf4e2d1a7586f54e64a5d9e7837e5188fc75ae69e36f215eb01def4f9721b",
strip_prefix = "protobuf-3.15.3", strip_prefix = "protobuf-3.15.3",
urls = [ urls = [
"https://mirror.bazel.build/github.com/protocolbuffers/protobuf/archive/v3.15.3.tar.gz",
"https://github.com/protocolbuffers/protobuf/archive/v3.15.3.tar.gz", "https://github.com/protocolbuffers/protobuf/archive/v3.15.3.tar.gz",
], ],
) )

@ -45,6 +45,7 @@ cc_binary(
"descriptor_pool.h", "descriptor_pool.h",
"protobuf.c", "protobuf.c",
"protobuf.h", "protobuf.h",
"python.h",
], ],
copts = UPB_DEFAULT_COPTS + [ copts = UPB_DEFAULT_COPTS + [
# The Python API requires patterns that are ISO C incompatible, like # The Python API requires patterns that are ISO C incompatible, like
@ -64,6 +65,7 @@ cc_binary(
"//:reflection", "//:reflection",
"//:upb", "//:upb",
"//upb/util:compare", "//upb/util:compare",
"//upb/util:def_to_proto",
"@system_python//:python_headers", "@system_python//:python_headers",
], ],
) )

File diff suppressed because it is too large Load Diff

@ -30,21 +30,50 @@
#include <stdbool.h> #include <stdbool.h>
#include "protobuf.h" #include "python/python.h"
#include "upb/def.h" #include "upb/def.h"
PyObject *PyUpb_FieldDescriptor_GetOrCreateWrapper(const upb_fielddef *field, typedef enum {
PyObject *pool); kPyUpb_Descriptor = 0,
PyObject *PyUpb_FileDescriptor_GetOrCreateWrapper(const upb_filedef *file, kPyUpb_EnumDescriptor = 1,
PyObject *pool); kPyUpb_EnumValueDescriptor = 2,
kPyUpb_FieldDescriptor = 3,
kPyUpb_FileDescriptor = 4,
kPyUpb_MethodDescriptor = 5,
kPyUpb_OneofDescriptor = 6,
kPyUpb_ServiceDescriptor = 7,
kPyUpb_Descriptor_Count = 8,
} PyUpb_DescriptorType;
const upb_filedef *PyUpb_FileDescriptor_GetDef(PyObject *file); // Given a descriptor object |desc|, returns a Python message class object for
// the msgdef |m|, which must be from the same pool.
PyObject* PyUpb_Descriptor_GetClass(const upb_msgdef* m);
// Returns a Python wrapper object for the given def. This will return an
// existing object if one already exists, otherwise a new object will be
// created. The caller always owns a ref on the returned object.
PyObject* PyUpb_Descriptor_Get(const upb_msgdef* msgdef);
PyObject* PyUpb_EnumDescriptor_Get(const upb_enumdef* enumdef);
PyObject* PyUpb_FieldDescriptor_Get(const upb_fielddef* field);
PyObject* PyUpb_FileDescriptor_Get(const upb_filedef* file);
PyObject* PyUpb_OneofDescriptor_Get(const upb_oneofdef* oneof);
PyObject* PyUpb_EnumValueDescriptor_Get(const upb_enumvaldef* enumval);
PyObject* PyUpb_Descriptor_GetOrCreateWrapper(const upb_msgdef* msg);
PyObject* PyUpb_ServiceDescriptor_Get(const upb_servicedef* s);
// Returns the underlying |def| for a given wrapper object. The caller must
// have already verified that the given Python object is of the expected type.
const upb_filedef* PyUpb_FileDescriptor_GetDef(PyObject* file);
const upb_fielddef* PyUpb_FieldDescriptor_GetDef(PyObject* file);
const upb_msgdef* PyUpb_Descriptor_GetDef(PyObject* _self);
const void* PyUpb_AnyDescriptor_GetDef(PyObject* _self);
// Returns the underlying |def| for a given wrapper object. The caller must // Returns the underlying |def| for a given wrapper object. The caller must
// have already verified that the given Python object is of the expected type. // have already verified that the given Python object is of the expected type.
const upb_filedef *PyUpb_FileDescriptor_GetDef(PyObject *file); const upb_filedef* PyUpb_FileDescriptor_GetDef(PyObject* file);
const void *PyUpb_AnyDescriptor_GetDef(PyObject *_self); const void* PyUpb_AnyDescriptor_GetDef(PyObject* _self);
bool PyUpb_InitDescriptor(PyObject *m); // Module-level init.
bool PyUpb_InitDescriptor(PyObject* m);
#endif // PYUPB_DESCRIPTOR_H__ #endif // PYUPB_DESCRIPTOR_H__

@ -41,6 +41,11 @@ typedef struct {
PyObject* db; PyObject* db;
} PyUpb_DescriptorPool; } PyUpb_DescriptorPool;
PyObject* PyUpb_DescriptorPool_GetDefaultPool() {
PyUpb_ModuleState* s = PyUpb_ModuleState_Get();
return s->default_pool;
}
static PyObject* PyUpb_DescriptorPool_DoCreate(PyTypeObject* type, static PyObject* PyUpb_DescriptorPool_DoCreate(PyTypeObject* type,
PyObject* db) { PyObject* db) {
PyUpb_DescriptorPool* pool = PyObject_GC_New(PyUpb_DescriptorPool, type); PyUpb_DescriptorPool* pool = PyObject_GC_New(PyUpb_DescriptorPool, type);
@ -48,9 +53,14 @@ static PyObject* PyUpb_DescriptorPool_DoCreate(PyTypeObject* type,
pool->db = db; pool->db = db;
Py_XINCREF(pool->db); Py_XINCREF(pool->db);
PyObject_GC_Track(&pool->ob_base); PyObject_GC_Track(&pool->ob_base);
PyUpb_ObjCache_Add(pool->symtab, &pool->ob_base);
return &pool->ob_base; return &pool->ob_base;
} }
upb_symtab* PyUpb_DescriptorPool_GetSymtab(PyObject* pool) {
return ((PyUpb_DescriptorPool*)pool)->symtab;
}
static int PyUpb_DescriptorPool_Traverse(PyUpb_DescriptorPool* self, static int PyUpb_DescriptorPool_Traverse(PyUpb_DescriptorPool* self,
visitproc visit, void* arg) { visitproc visit, void* arg) {
Py_VISIT(self->db); Py_VISIT(self->db);
@ -62,10 +72,17 @@ static int PyUpb_DescriptorPool_Clear(PyUpb_DescriptorPool* self) {
return 0; return 0;
} }
PyObject* PyUpb_DescriptorPool_Get(const upb_symtab* symtab) {
PyObject* pool = PyUpb_ObjCache_Get(symtab);
assert(pool);
return pool;
}
static void PyUpb_DescriptorPool_Dealloc(PyUpb_DescriptorPool* self) { static void PyUpb_DescriptorPool_Dealloc(PyUpb_DescriptorPool* self) {
upb_symtab_free(self->symtab); upb_symtab_free(self->symtab);
PyUpb_DescriptorPool_Clear(self); PyUpb_DescriptorPool_Clear(self);
PyObject_GC_Del(self); PyUpb_ObjCache_Delete(self->symtab);
PyUpb_Dealloc(self);
} }
/* /*
@ -100,6 +117,7 @@ static PyObject* PyUpb_DescriptorPool_AddSerializedFile(
char* buf; char* buf;
Py_ssize_t size; Py_ssize_t size;
upb_arena* arena = upb_arena_new(); upb_arena* arena = upb_arena_new();
if (!arena) PYUPB_RETURN_OOM;
PyObject* result = NULL; PyObject* result = NULL;
if (self->db) { if (self->db) {
@ -107,7 +125,7 @@ static PyObject* PyUpb_DescriptorPool_AddSerializedFile(
PyExc_ValueError, PyExc_ValueError,
"Cannot call Add on a DescriptorPool that uses a DescriptorDatabase. " "Cannot call Add on a DescriptorPool that uses a DescriptorDatabase. "
"Add your file to the underlying database."); "Add your file to the underlying database.");
return NULL; goto done;
} }
if (PyBytes_AsStringAndSize(serialized_pb, &buf, &size) < 0) { if (PyBytes_AsStringAndSize(serialized_pb, &buf, &size) < 0) {
@ -132,7 +150,7 @@ static PyObject* PyUpb_DescriptorPool_AddSerializedFile(
goto done; goto done;
} }
result = PyUpb_FileDescriptor_GetOrCreateWrapper(filedef, _self); result = PyUpb_FileDescriptor_Get(filedef);
done: done:
upb_arena_free(arena); upb_arena_free(arena);
@ -157,7 +175,7 @@ static PyObject* PyUpb_DescriptorPool_FindExtensionByName(PyObject* _self,
return PyErr_Format(PyExc_KeyError, "Couldn't find extension %.200s", name); return PyErr_Format(PyExc_KeyError, "Couldn't find extension %.200s", name);
} }
return PyUpb_FieldDescriptor_GetOrCreateWrapper(field, _self); return PyUpb_FieldDescriptor_Get(field);
} }
static PyMethodDef PyUpb_DescriptorPool_Methods[] = { static PyMethodDef PyUpb_DescriptorPool_Methods[] = {

@ -32,6 +32,12 @@
#include "protobuf.h" #include "protobuf.h"
PyObject* PyUpb_DescriptorPool_GetSerializedPb(PyObject* _self,
const char* filename);
PyObject* PyUpb_DescriptorPool_Get(const upb_symtab* symtab);
upb_symtab* PyUpb_DescriptorPool_GetSymtab(PyObject* pool);
PyObject* PyUpb_DescriptorPool_GetDefaultPool(void);
bool PyUpb_InitDescriptorPool(PyObject* m); bool PyUpb_InitDescriptorPool(PyObject* m);
#endif // PYUPB_DESCRIPTOR_POOL_H__ #endif // PYUPB_DESCRIPTOR_POOL_H__

@ -28,6 +28,7 @@
#include "protobuf.h" #include "protobuf.h"
#include "descriptor.h" #include "descriptor.h"
#include "descriptor_containers.h"
#include "descriptor_pool.h" #include "descriptor_pool.h"
static void PyUpb_ModuleDealloc(void *module) { static void PyUpb_ModuleDealloc(void *module) {
@ -49,6 +50,11 @@ static struct PyModuleDef module_def = {PyModuleDef_HEAD_INIT,
// ModuleState // ModuleState
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
PyUpb_ModuleState* PyUpb_ModuleState_MaybeGet(void) {
PyObject* module = PyState_FindModule(&module_def);
return module ? PyModule_GetState(module) : NULL;
}
PyUpb_ModuleState *PyUpb_ModuleState_GetFromModule(PyObject *module) { PyUpb_ModuleState *PyUpb_ModuleState_GetFromModule(PyObject *module) {
PyUpb_ModuleState *state = PyModule_GetState(module); PyUpb_ModuleState *state = PyModule_GetState(module);
assert(state); assert(state);
@ -56,8 +62,9 @@ PyUpb_ModuleState *PyUpb_ModuleState_GetFromModule(PyObject *module) {
return state; return state;
} }
PyUpb_ModuleState *PyUpb_ModuleState_Get() { PyUpb_ModuleState *PyUpb_ModuleState_Get(void) {
PyObject *module = PyState_FindModule(&module_def); PyObject *module = PyState_FindModule(&module_def);
assert(module);
return PyUpb_ModuleState_GetFromModule(module); return PyUpb_ModuleState_GetFromModule(module);
} }
@ -72,7 +79,14 @@ void PyUpb_ObjCache_Add(const void *key, PyObject *py_obj) {
} }
void PyUpb_ObjCache_Delete(const void *key) { void PyUpb_ObjCache_Delete(const void *key) {
PyUpb_ModuleState *s = PyUpb_ModuleState_Get(); PyUpb_ModuleState *s = PyUpb_ModuleState_MaybeGet();
if (!s) {
// During the shutdown sequence, our object's Dealloc() methods can be
// called *after* our module Dealloc() method has been called. At that
// point our state will be NULL and there is nothing to delete out of the
// map.
return;
}
upb_value val; upb_value val;
upb_inttable_remove(&s->obj_cache, (uintptr_t)key, &val); upb_inttable_remove(&s->obj_cache, (uintptr_t)key, &val);
assert(upb_value_getptr(val)); assert(upb_value_getptr(val));
@ -90,6 +104,50 @@ PyObject *PyUpb_ObjCache_Get(const void *key) {
} }
} }
// -----------------------------------------------------------------------------
// Arena
// -----------------------------------------------------------------------------
typedef struct {
PyObject_HEAD
upb_arena* arena;
} PyUpb_Arena;
PyObject* PyUpb_Arena_New(void) {
PyUpb_ModuleState* state = PyUpb_ModuleState_Get();
PyUpb_Arena* arena = (void*)PyType_GenericAlloc(state->arena_type, 0);
arena->arena = upb_arena_new();
return &arena->ob_base;
}
static void PyUpb_Arena_Dealloc(PyObject* self) {
upb_arena_free(PyUpb_Arena_Get(self));
PyUpb_Dealloc(self);
}
upb_arena* PyUpb_Arena_Get(PyObject* arena) {
return ((PyUpb_Arena*)arena)->arena;
}
static PyType_Slot PyUpb_Arena_Slots[] = {
{Py_tp_dealloc, PyUpb_Arena_Dealloc},
{0, NULL},
};
static PyType_Spec PyUpb_Arena_Spec = {
PYUPB_MODULE_NAME ".Arena",
sizeof(PyUpb_Arena),
0, // itemsize
Py_TPFLAGS_DEFAULT,
PyUpb_Arena_Slots,
};
static bool PyUpb_InitArena(PyObject* m) {
PyUpb_ModuleState* state = PyUpb_ModuleState_GetFromModule(m);
state->arena_type = PyUpb_AddClass(m, &PyUpb_Arena_Spec);
return state->arena_type;
}
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// Utilities // Utilities
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
@ -127,6 +185,14 @@ const char *PyUpb_GetStrData(PyObject *obj) {
} }
} }
PyObject *PyUpb_Forbidden_New(PyObject *cls, PyObject *args, PyObject *kwds) {
PyObject *name = PyObject_GetAttrString(cls, "__name__");
PyErr_Format(PyExc_RuntimeError,
"Objects of type %U may not be created directly.", name);
Py_XDECREF(name);
return NULL;
}
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// Module Entry Point // Module Entry Point
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
@ -139,7 +205,8 @@ PyMODINIT_FUNC PyInit__message(void) {
state->obj_cache_arena = upb_arena_new(); state->obj_cache_arena = upb_arena_new();
upb_inttable_init(&state->obj_cache, state->obj_cache_arena); upb_inttable_init(&state->obj_cache, state->obj_cache_arena);
if (!PyUpb_InitDescriptorPool(m) || !PyUpb_InitDescriptor(m)) { if (!PyUpb_InitDescriptorContainers(m) || !PyUpb_InitDescriptorPool(m) ||
!PyUpb_InitDescriptor(m) || !PyUpb_InitArena(m)) {
Py_DECREF(m); Py_DECREF(m);
return NULL; return NULL;
} }

@ -30,19 +30,14 @@
#include <stdbool.h> #include <stdbool.h>
#define Py_LIMITED_API 0x03060000 #include "python/descriptor.h"
#include <Python.h> #include "python/python.h"
// This function was not officially added to the limited API until Python 3.10.
// But in practice it has been stable since Python 3.1. See:
// https://bugs.python.org/issue41784
PyAPI_FUNC(const char *)
PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *size);
#include "upb/table_internal.h" #include "upb/table_internal.h"
#define PYUPB_MODULE_NAME "google.protobuf.pyext._message" #define PYUPB_MODULE_NAME "google.protobuf.pyext._message"
#define PYUPB_RETURN_OOM return PyErr_SetNone(PyExc_MemoryError), NULL
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// ModuleState // ModuleState
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
@ -52,8 +47,7 @@ PyAPI_FUNC(const char *)
typedef struct { typedef struct {
// From descriptor.c // From descriptor.c
PyTypeObject *field_descriptor_type; PyTypeObject *descriptor_types[kPyUpb_Descriptor_Count];
PyTypeObject *file_descriptor_type;
// From descriptor_containers.c // From descriptor_containers.c
PyTypeObject *by_name_map_type; PyTypeObject *by_name_map_type;
@ -61,12 +55,16 @@ typedef struct {
PyTypeObject *descriptor_iterator_type; PyTypeObject *descriptor_iterator_type;
PyTypeObject *generic_sequence_type; PyTypeObject *generic_sequence_type;
// From descriptor_pool.c
PyObject *default_pool;
// From descriptor_pool.c // From descriptor_pool.c
PyTypeObject *descriptor_pool_type; PyTypeObject *descriptor_pool_type;
// From protobuf.c // From protobuf.c
upb_arena *obj_cache_arena; upb_arena *obj_cache_arena;
upb_inttable obj_cache; upb_inttable obj_cache;
PyTypeObject *arena_type;
} PyUpb_ModuleState; } PyUpb_ModuleState;
// Returns the global state object from the current interpreter. The current // Returns the global state object from the current interpreter. The current
@ -95,6 +93,13 @@ void PyUpb_ObjCache_Delete(const void *key);
// Returns a new reference to an object if it exists, otherwise returns NULL. // Returns a new reference to an object if it exists, otherwise returns NULL.
PyObject *PyUpb_ObjCache_Get(const void *key); PyObject *PyUpb_ObjCache_Get(const void *key);
// -----------------------------------------------------------------------------
// Arena
// -----------------------------------------------------------------------------
PyObject *PyUpb_Arena_New(void);
upb_arena *PyUpb_Arena_Get(PyObject *arena);
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// Utilities // Utilities
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
@ -104,6 +109,10 @@ PyTypeObject *AddObject(PyObject *m, const char *name, PyType_Spec *spec);
// Creates a Python type from `spec` and adds it to the given module `m`. // Creates a Python type from `spec` and adds it to the given module `m`.
PyTypeObject *PyUpb_AddClass(PyObject *m, PyType_Spec *spec); PyTypeObject *PyUpb_AddClass(PyObject *m, PyType_Spec *spec);
// A function that implements the tp_new slot for types that we do not allow
// users to create directly. This will immediately fail with an error message.
PyObject *PyUpb_Forbidden_New(PyObject *cls, PyObject *args, PyObject *kwds);
// Our standard dealloc func. It follows the guidance defined in: // Our standard dealloc func. It follows the guidance defined in:
// https://docs.python.org/3/c-api/typeobj.html#c.PyTypeObject.tp_dealloc // https://docs.python.org/3/c-api/typeobj.html#c.PyTypeObject.tp_dealloc
// However it tests Py_TPFLAGS_HEAPTYPE dynamically so that a single dealloc // However it tests Py_TPFLAGS_HEAPTYPE dynamically so that a single dealloc

@ -0,0 +1,42 @@
/*
* Copyright (c) 2009-2021, Google LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Google LLC nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef PYUPB_PYTHON_H__
#define PYUPB_PYTHON_H__
// We restrict ourselves to the limited API, so that we will be ABI-compatible
// with any version of Python >= 3.6.1 (3.6.1 introduce PySlice_Unpack())
#define Py_LIMITED_API 0x03060100
#include <Python.h>
// This function was not officially added to the limited API until Python 3.10.
// But in practice it has been stable since Python 3.1. See:
// https://bugs.python.org/issue41784
PyAPI_FUNC(const char *)
PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *size);
#endif // PYUPB_PYTHON_H__
Loading…
Cancel
Save