Added support for UnknownFieldSet.

PiperOrigin-RevId: 443143448
pull/13171/head
Joshua Haberman 3 years ago committed by Copybara-Service
parent 4978040db4
commit 3e0890c055
  1. 2
      bazel/workspace_deps.bzl
  2. 2
      python/BUILD
  3. 10
      python/pb_unit_tests/text_format_test_wrapper.py
  4. 16
      python/pb_unit_tests/unknown_fields_test_wrapper.py
  5. 4
      python/protobuf.c
  6. 4
      python/protobuf.h
  7. 404
      python/unknown_fields.c
  8. 39
      python/unknown_fields.h
  9. 5
      upb/def.h
  10. 2
      upb/msg_internal.h

@ -15,7 +15,7 @@ def upb_deps():
maybe(
git_repository,
name = "com_google_protobuf",
commit = "a69354f31b253856689ae765a9ea3217ec001873",
commit = "654c25352e831b991f668a2edd97ce6ea4e1f4c3",
remote = "https://github.com/protocolbuffers/protobuf.git",
patches = [
"//bazel:protobuf.patch",

@ -194,6 +194,8 @@ py_extension(
"python_api.h",
"repeated.c",
"repeated.h",
"unknown_fields.c",
"unknown_fields.h",
],
copts = UPB_DEFAULT_COPTS + select(LIMITED_API_FLAG_SELECT) + [
# The Python API requires patterns that are ISO C incompatible, like

@ -26,15 +26,5 @@
from google.protobuf.internal import text_format_test
import unittest
# These rely on the UnknownFields accessor, which we are trying to deprecate.
text_format_test.OnlyWorksWithProto2RightNowTests.testPrintUnknownFields.__unittest_expecting_failure__ = True
# begin:github_only
from google.protobuf.internal import _parameterized # copybara:strip_for_google3
sep = _parameterized._SEPARATOR
getattr(text_format_test.TextFormatMessageToStringTests, "testPrintUnknownFieldsEmbeddedMessageInBytes" + sep + "0").__unittest_expecting_failure__ = True
getattr(text_format_test.TextFormatMessageToStringTests, "testPrintUnknownFieldsEmbeddedMessageInBytes" + sep + "1").__unittest_expecting_failure__ = True
# end:github_only
if __name__ == '__main__':
unittest.main(module=text_format_test, verbosity=2)

@ -26,15 +26,15 @@
from google.protobuf.internal import unknown_fields_test
import unittest
unknown_fields_test.UnknownEnumValuesTest.testCheckUnknownFieldValueForEnum.__unittest_expecting_failure__ = True
unknown_fields_test.UnknownFieldsAccessorsTest.testCheckUnknownFieldValue.__unittest_expecting_failure__ = True
unknown_fields_test.UnknownFieldsAccessorsTest.testClear.__unittest_expecting_failure__ = True
unknown_fields_test.UnknownFieldsAccessorsTest.testMergeFrom.__unittest_expecting_failure__ = True
unknown_fields_test.UnknownFieldsAccessorsTest.testSubUnknownFields.__unittest_expecting_failure__ = True
# begin:google_only
# # Accesses deprecated internal accessor.
# unknown_fields_test.UnknownFieldsAccessorsTest.testCheckUnknownFieldValue.__unittest_expecting_failure__ = True
# end:google_only
# begin:github_only
# Until internal test changes propagate to unknown_fields_test.py
unknown_fields_test.UnknownFieldsAccessorsTest.testUnknownExtensions.__unittest_expecting_failure__ = True
unknown_fields_test.UnknownFieldsAccessorsTest.testUnknownField.__unittest_expecting_failure__ = True
unknown_fields_test.UnknownFieldsAccessorsTest.testUnknownFieldsNoMemoryLeak.__unittest_expecting_failure__ = True
unknown_fields_test.UnknownFieldsTest.testSerializeMessageSetWireFormatUnknownExtension.__unittest_expecting_failure__ = True
# end:github_only
if __name__ == '__main__':
unittest.main(module=unknown_fields_test, verbosity=2)

@ -34,6 +34,7 @@
#include "python/map.h"
#include "python/message.h"
#include "python/repeated.h"
#include "python/unknown_fields.h"
static void PyUpb_ModuleDealloc(void* module) {
PyUpb_ModuleState* s = PyModule_GetState(module);
@ -335,7 +336,8 @@ PyMODINIT_FUNC PyInit__message(void) {
if (!PyUpb_InitDescriptorContainers(m) || !PyUpb_InitDescriptorPool(m) ||
!PyUpb_InitDescriptor(m) || !PyUpb_InitArena(m) ||
!PyUpb_InitExtensionDict(m) || !PyUpb_Map_Init(m) ||
!PyUpb_InitMessage(m) || !PyUpb_Repeated_Init(m)) {
!PyUpb_InitMessage(m) || !PyUpb_Repeated_Init(m) ||
!PyUpb_UnknownFields_Init(m)) {
Py_DECREF(m);
return NULL;
}

@ -108,6 +108,10 @@ typedef struct {
// From repeated.c
PyTypeObject* repeated_composite_container_type;
PyTypeObject* repeated_scalar_container_type;
// From unknown_fields.c
PyTypeObject* unknown_fields_type;
PyObject* unknown_field_type;
} PyUpb_ModuleState;
// Returns the global state object from the current interpreter. The current

@ -0,0 +1,404 @@
/*
* Copyright (c) 2009-2021, Google LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Google LLC nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "python/unknown_fields.h"
#include "python/message.h"
#include "python/protobuf.h"
static const char* PyUpb_DecodeVarint(const char* ptr, const char* end,
uint64_t* val) {
*val = 0;
for (int i = 0; ptr < end && i < 10; i++, ptr++) {
uint64_t byte = (uint8_t)*ptr;
*val |= (byte & 0x7f) << (i * 7);
if ((byte & 0x80) == 0) {
return ptr + 1;
}
}
return NULL;
}
// -----------------------------------------------------------------------------
// UnknownFieldSet
// -----------------------------------------------------------------------------
typedef struct {
PyObject_HEAD;
PyObject* fields;
} PyUpb_UnknownFieldSet;
static void PyUpb_UnknownFieldSet_Dealloc(PyObject* _self) {
PyUpb_UnknownFieldSet* self = (PyUpb_UnknownFieldSet*)_self;
Py_XDECREF(self->fields);
PyUpb_Dealloc(self);
}
PyUpb_UnknownFieldSet* PyUpb_UnknownFieldSet_NewBare() {
PyUpb_ModuleState* s = PyUpb_ModuleState_Get();
PyUpb_UnknownFieldSet* self =
(void*)PyType_GenericAlloc(s->unknown_fields_type, 0);
return self;
}
// Generic functions to skip a value or group.
static const char* PyUpb_UnknownFieldSet_SkipGroup(const char* ptr,
const char* end,
int group_number);
static const char* PyUpb_UnknownFieldSet_SkipField(const char* ptr,
const char* end,
uint32_t tag) {
int field_number = tag >> 3;
int wire_type = tag & 7;
switch (wire_type) {
case kUpb_WireType_Varint: {
uint64_t val;
return PyUpb_DecodeVarint(ptr, end, &val);
}
case kUpb_WireType_64Bit:
if (end - ptr < 8) return NULL;
return ptr + 8;
case kUpb_WireType_32Bit:
if (end - ptr < 4) return NULL;
return ptr + 4;
case kUpb_WireType_Delimited: {
uint64_t size;
ptr = PyUpb_DecodeVarint(ptr, end, &size);
if (!ptr || end - ptr < size) return NULL;
return ptr + size;
}
case kUpb_WireType_StartGroup:
return PyUpb_UnknownFieldSet_SkipGroup(ptr, end, field_number);
case kUpb_WireType_EndGroup:
return NULL;
default:
assert(0);
return NULL;
}
}
static const char* PyUpb_UnknownFieldSet_SkipGroup(const char* ptr,
const char* end,
int group_number) {
uint32_t end_tag = (group_number << 3) | kUpb_WireType_EndGroup;
while (true) {
if (ptr == end) return NULL;
uint64_t tag;
ptr = PyUpb_DecodeVarint(ptr, end, &tag);
if (!ptr) return NULL;
if (tag == end_tag) return ptr;
ptr = PyUpb_UnknownFieldSet_SkipField(ptr, end, tag);
if (!ptr) return NULL;
}
return ptr;
}
// For MessageSet the established behavior is for UnknownFieldSet to interpret
// the MessageSet wire format:
// message MessageSet {
// repeated group Item = 1 {
// required int32 type_id = 2;
// required bytes message = 3;
// }
// }
//
// And create unknown fields like:
// UnknownField(type_id, WIRE_TYPE_DELIMITED, message)
//
// For any unknown fields that are unexpected per the wire format defined above,
// we drop them on the floor.
enum {
kUpb_MessageSet_StartItemTag = (1 << 3) | kUpb_WireType_StartGroup,
kUpb_MessageSet_EndItemTag = (1 << 3) | kUpb_WireType_EndGroup,
kUpb_MessageSet_TypeIdTag = (2 << 3) | kUpb_WireType_Varint,
kUpb_MessageSet_MessageTag = (3 << 3) | kUpb_WireType_Delimited,
};
static const char* PyUpb_UnknownFieldSet_BuildMessageSetItem(
PyUpb_UnknownFieldSet* self, const char* ptr, const char* end) {
PyUpb_ModuleState* s = PyUpb_ModuleState_Get();
int type_id = 0;
PyObject* msg = NULL;
while (true) {
if (ptr == end) goto err;
uint64_t tag;
ptr = PyUpb_DecodeVarint(ptr, end, &tag);
if (!ptr) goto err;
switch (tag) {
case kUpb_MessageSet_EndItemTag:
goto done;
case kUpb_MessageSet_TypeIdTag: {
uint64_t tmp;
ptr = PyUpb_DecodeVarint(ptr, end, &tmp);
if (!ptr) goto err;
if (!type_id) type_id = tmp;
break;
}
case kUpb_MessageSet_MessageTag: {
uint64_t size;
ptr = PyUpb_DecodeVarint(ptr, end, &size);
if (!ptr || end - ptr < size) goto err;
if (!msg) {
msg = PyBytes_FromStringAndSize(ptr, size);
if (!msg) goto err;
} else {
// already saw a message here so deliberately skipping the duplicate
}
ptr += size;
break;
}
default:
ptr = PyUpb_UnknownFieldSet_SkipField(ptr, end, tag);
if (!ptr) goto err;
}
}
done:
if (type_id && msg) {
PyObject* field = PyObject_CallFunction(
s->unknown_field_type, "iiO", type_id, kUpb_WireType_Delimited, msg);
PyList_Append(self->fields, field);
}
Py_XDECREF(msg);
return ptr;
err:
Py_XDECREF(msg);
return NULL;
}
static const char* PyUpb_UnknownFieldSet_BuildMessageSet(
PyUpb_UnknownFieldSet* self, const char* ptr, const char* end) {
self->fields = PyList_New(0);
while (ptr < end) {
uint64_t tag;
ptr = PyUpb_DecodeVarint(ptr, end, &tag);
if (!ptr) goto err;
if (tag == kUpb_MessageSet_StartItemTag) {
ptr = PyUpb_UnknownFieldSet_BuildMessageSetItem(self, ptr, end);
} else {
ptr = PyUpb_UnknownFieldSet_SkipField(ptr, end, tag);
}
if (!ptr) goto err;
}
return ptr;
err:
Py_DECREF(self->fields);
self->fields = NULL;
return NULL;
}
static const char* PyUpb_UnknownFieldSet_Build(PyUpb_UnknownFieldSet* self,
const char* ptr, const char* end,
int group_number);
static const char* PyUpb_UnknownFieldSet_BuildValue(
PyUpb_UnknownFieldSet* self, const char* ptr, const char* end,
int field_number, int wire_type, int group_number, PyObject** data) {
switch (wire_type) {
case kUpb_WireType_Varint: {
uint64_t val;
ptr = PyUpb_DecodeVarint(ptr, end, &val);
if (!ptr) return NULL;
*data = PyLong_FromUnsignedLongLong(val);
return ptr;
}
case kUpb_WireType_64Bit: {
if (end - ptr < 8) return NULL;
uint64_t val;
memcpy(&val, ptr, 8);
*data = PyLong_FromUnsignedLongLong(val);
return ptr + 8;
}
case kUpb_WireType_32Bit: {
if (end - ptr < 4) return NULL;
uint32_t val;
memcpy(&val, ptr, 4);
*data = PyLong_FromUnsignedLongLong(val);
return ptr + 4;
}
case kUpb_WireType_Delimited: {
uint64_t size;
ptr = PyUpb_DecodeVarint(ptr, end, &size);
if (!ptr || end - ptr < size) return NULL;
*data = PyBytes_FromStringAndSize(ptr, size);
return ptr + size;
}
case kUpb_WireType_StartGroup: {
PyUpb_UnknownFieldSet* sub = PyUpb_UnknownFieldSet_NewBare();
if (!sub) return NULL;
*data = &sub->ob_base;
return PyUpb_UnknownFieldSet_Build(sub, ptr, end, field_number);
}
default:
assert(0);
*data = NULL;
return NULL;
}
}
// For non-MessageSet we just build the unknown fields exactly as they exist on
// the wire.
static const char* PyUpb_UnknownFieldSet_Build(PyUpb_UnknownFieldSet* self,
const char* ptr, const char* end,
int group_number) {
PyUpb_ModuleState* s = PyUpb_ModuleState_Get();
self->fields = PyList_New(0);
while (ptr < end) {
uint64_t tag;
ptr = PyUpb_DecodeVarint(ptr, end, &tag);
if (!ptr) goto err;
PyObject* data = NULL;
int field_number = tag >> 3;
int wire_type = tag & 7;
if (wire_type == kUpb_WireType_EndGroup) {
if (field_number != group_number) return NULL;
return ptr;
}
ptr = PyUpb_UnknownFieldSet_BuildValue(self, ptr, end, field_number,
wire_type, group_number, &data);
if (!ptr) {
Py_XDECREF(data);
goto err;
}
assert(data);
PyObject* field = PyObject_CallFunction(s->unknown_field_type, "iiN",
field_number, wire_type, data);
PyList_Append(self->fields, field);
}
return ptr;
err:
Py_DECREF(self->fields);
self->fields = NULL;
return NULL;
}
static PyObject* PyUpb_UnknownFieldSet_New(PyTypeObject* type, PyObject* args,
PyObject* kwargs) {
char* kwlist[] = {"message", 0};
PyObject* py_msg = NULL;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", kwlist, &py_msg)) {
return NULL;
}
if (!PyUpb_CMessage_Verify(py_msg)) return NULL;
PyUpb_UnknownFieldSet* self = PyUpb_UnknownFieldSet_NewBare();
upb_Message* msg = PyUpb_CMessage_GetIfReified(py_msg);
if (!msg) return &self->ob_base;
size_t size;
const char* ptr = upb_Message_GetUnknown(msg, &size);
if (size == 0) return &self->ob_base;
const char* end = ptr + size;
const upb_MessageDef* msgdef = PyUpb_CMessage_GetMsgdef(py_msg);
bool ok;
if (upb_MessageDef_IsMessageSet(msgdef)) {
ok = PyUpb_UnknownFieldSet_BuildMessageSet(self, ptr, end) == end;
} else {
ok = PyUpb_UnknownFieldSet_Build(self, ptr, end, -1) == end;
}
if (!ok) {
Py_DECREF(&self->ob_base);
return NULL;
}
return &self->ob_base;
}
static Py_ssize_t PyUpb_UnknownFieldSet_Length(PyObject* _self) {
PyUpb_UnknownFieldSet* self = (PyUpb_UnknownFieldSet*)_self;
return self->fields ? PyObject_Length(self->fields) : 0;
}
static PyObject* PyUpb_UnknownFieldSet_GetItem(PyObject* _self,
Py_ssize_t index) {
PyUpb_UnknownFieldSet* self = (PyUpb_UnknownFieldSet*)_self;
if (!self->fields) {
PyErr_Format(PyExc_IndexError, "list index (%zd) out of range", index);
return NULL;
}
PyObject* ret = PyList_GetItem(self->fields, index);
if (ret) Py_INCREF(ret);
return ret;
}
static PyType_Slot PyUpb_UnknownFieldSet_Slots[] = {
{Py_tp_new, &PyUpb_UnknownFieldSet_New},
{Py_tp_dealloc, &PyUpb_UnknownFieldSet_Dealloc},
{Py_sq_length, PyUpb_UnknownFieldSet_Length},
{Py_sq_item, PyUpb_UnknownFieldSet_GetItem},
{Py_tp_hash, PyObject_HashNotImplemented},
{0, NULL},
};
static PyType_Spec PyUpb_UnknownFieldSet_Spec = {
PYUPB_MODULE_NAME ".UnknownFieldSet", // tp_name
sizeof(PyUpb_UnknownFieldSet), // tp_basicsize
0, // tp_itemsize
Py_TPFLAGS_DEFAULT, // tp_flags
PyUpb_UnknownFieldSet_Slots,
};
// -----------------------------------------------------------------------------
// Top Level
// -----------------------------------------------------------------------------
PyObject* PyUpb_UnknownFieldSet_CreateNamedTuple(void) {
PyObject* mod = NULL;
PyObject* namedtuple = NULL;
PyObject* ret = NULL;
mod = PyImport_ImportModule("collections");
if (!mod) goto done;
namedtuple = PyObject_GetAttrString(mod, "namedtuple");
if (!namedtuple) goto done;
ret = PyObject_CallFunction(namedtuple, "s[sss]", "PyUnknownField",
"field_number", "wire_type", "data");
done:
Py_XDECREF(mod);
Py_XDECREF(namedtuple);
return ret;
}
bool PyUpb_UnknownFields_Init(PyObject* m) {
PyUpb_ModuleState* s = PyUpb_ModuleState_GetFromModule(m);
s->unknown_fields_type = PyUpb_AddClass(m, &PyUpb_UnknownFieldSet_Spec);
s->unknown_field_type = PyUpb_UnknownFieldSet_CreateNamedTuple();
return s->unknown_fields_type && s->unknown_field_type;
}

@ -0,0 +1,39 @@
/*
* Copyright (c) 2009-2021, Google LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Google LLC nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef PYUPB_UNKNOWN_FIELDS_H__
#define PYUPB_UNKNOWN_FIELDS_H__
#include <stdbool.h>
#include "python/python_api.h"
PyObject* PyUpb_UnknownFields_New(PyObject* msg);
bool PyUpb_UnknownFields_Init(PyObject* m);
#endif // PYUPB_UNKNOWN_FIELDS_H__

@ -210,6 +210,11 @@ UPB_INLINE bool upb_MessageDef_IsMapEntry(const upb_MessageDef* m) {
return google_protobuf_MessageOptions_map_entry(upb_MessageDef_Options(m));
}
UPB_INLINE bool upb_MessageDef_IsMessageSet(const upb_MessageDef* m) {
return google_protobuf_MessageOptions_message_set_wire_format(
upb_MessageDef_Options(m));
}
/* Nested entities. */
int upb_MessageDef_NestedMessageCount(const upb_MessageDef* m);
int upb_MessageDef_NestedEnumCount(const upb_MessageDef* m);

@ -160,7 +160,7 @@ typedef enum {
* message MessageSet {
* repeated group Item = 1 {
* required int32 type_id = 2;
* required string message = 3;
* required bytes message = 3;
* }
* }
*/

Loading…
Cancel
Save