Fixed Python memory leak in map lookup.

Previously we were allocating memory on the message's arena every time we performed a `map[key]` or `map.get(key)` operation.  This is unnecessary, as the key's data is only needed ephemerally, for the duration of the lookup, and we can therefore alias the Python object's string data instead of copying it.

This required fixing a bug in the convert.c operation.  Previously in the `arena==NULL` case, if the user passes a bytes object instead of a unicode string, the code would return a pointer to a temporary Python object that had already been freed, leading to use-after-free.  I fixed this by referencing the bytes object's data directly, and using utf8_range to verify the UTF-8.

Fixes: https://github.com/protocolbuffers/protobuf/issues/14571
PiperOrigin-RevId: 578563555
pull/14583/head
Joshua Haberman 1 year ago committed by Joshua Haberman
parent 8defef58f4
commit 1711ebd455
  1. 1
      python/BUILD
  2. 32
      python/convert.c
  3. 1
      python/google/protobuf/internal/message_test.py
  4. 8
      python/map.c

@ -203,5 +203,6 @@ py_extension(
"//upb/util:compare",
"//upb/util:def_to_proto",
"//upb/util:required_fields",
"@utf8_range",
],
)

@ -35,6 +35,7 @@
#include "upb/message/map.h"
#include "upb/reflection/message.h"
#include "upb/util/compare.h"
#include "utf8_range.h"
// Must be last.
#include "upb/port/def.inc"
@ -259,20 +260,27 @@ bool PyUpb_PyToUpb(PyObject* obj, const upb_FieldDef* f, upb_MessageValue* val,
}
case kUpb_CType_String: {
Py_ssize_t size;
const char* ptr;
PyObject* unicode = NULL;
if (PyBytes_Check(obj)) {
unicode = obj = PyUnicode_FromEncodedObject(obj, "utf-8", NULL);
if (!obj) return false;
// Use the object's bytes if they are valid UTF-8.
char* ptr;
if (PyBytes_AsStringAndSize(obj, &ptr, &size) < 0) return false;
if (utf8_range2((const unsigned char*)ptr, size) != 0) {
// Invalid UTF-8. Try to convert the message to a Python Unicode
// object, even though we know this will fail, just to get the
// idiomatic Python error message.
obj = PyUnicode_FromEncodedObject(obj, "utf-8", NULL);
assert(!obj);
return false;
}
*val = PyUpb_MaybeCopyString(ptr, size, arena);
return true;
} else {
const char* ptr;
ptr = PyUnicode_AsUTF8AndSize(obj, &size);
if (PyErr_Occurred()) return false;
*val = PyUpb_MaybeCopyString(ptr, size, arena);
return true;
}
ptr = PyUnicode_AsUTF8AndSize(obj, &size);
if (PyErr_Occurred()) {
Py_XDECREF(unicode);
return false;
}
*val = PyUpb_MaybeCopyString(ptr, size, arena);
Py_XDECREF(unicode);
return true;
}
case kUpb_CType_Message:
PyErr_Format(PyExc_ValueError, "Message objects may not be assigned");

@ -48,7 +48,6 @@ UCS2_MAXUNICODE = 65535
warnings.simplefilter('error', DeprecationWarning)
@_parameterized.named_parameters(('_proto2', unittest_pb2),
('_proto3', unittest_proto3_arena_pb2))
@testing_refleaks.TestCase

@ -179,7 +179,7 @@ int PyUpb_MapContainer_AssignSubscript(PyObject* _self, PyObject* key,
const upb_FieldDef* val_f = upb_MessageDef_Field(entry_m, 1);
upb_Arena* arena = PyUpb_Arena_Get(self->arena);
upb_MessageValue u_key, u_val;
if (!PyUpb_PyToUpb(key, key_f, &u_key, arena)) return -1;
if (!PyUpb_PyToUpb(key, key_f, &u_key, NULL)) return -1;
if (val) {
if (!PyUpb_PyToUpb(val, val_f, &u_val, arena)) return -1;
@ -200,9 +200,8 @@ PyObject* PyUpb_MapContainer_Subscript(PyObject* _self, PyObject* key) {
const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(f);
const upb_FieldDef* key_f = upb_MessageDef_Field(entry_m, 0);
const upb_FieldDef* val_f = upb_MessageDef_Field(entry_m, 1);
upb_Arena* arena = PyUpb_Arena_Get(self->arena);
upb_MessageValue u_key, u_val;
if (!PyUpb_PyToUpb(key, key_f, &u_key, arena)) return NULL;
if (!PyUpb_PyToUpb(key, key_f, &u_key, NULL)) return NULL;
if (!map || !upb_Map_Get(map, u_key, &u_val)) {
map = PyUpb_MapContainer_EnsureReified(_self);
upb_Arena* arena = PyUpb_Arena_Get(self->arena);
@ -256,9 +255,8 @@ static PyObject* PyUpb_MapContainer_Get(PyObject* _self, PyObject* args,
const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(f);
const upb_FieldDef* key_f = upb_MessageDef_Field(entry_m, 0);
const upb_FieldDef* val_f = upb_MessageDef_Field(entry_m, 1);
upb_Arena* arena = PyUpb_Arena_Get(self->arena);
upb_MessageValue u_key, u_val;
if (!PyUpb_PyToUpb(key, key_f, &u_key, arena)) return NULL;
if (!PyUpb_PyToUpb(key, key_f, &u_key, NULL)) return NULL;
if (map && upb_Map_Get(map, u_key, &u_val)) {
return PyUpb_UpbToPy(u_val, val_f, self->arena);
}

Loading…
Cancel
Save