Python JSON parser: Ignore invalid enum string values if ignore_unknown_fields is set (#15887)

# Motivation

This PR fixes failing conformance tests for python with name `IgnoreUnknownEnumStringValue`.

The JSON parsing spec was discussed in https://github.com/protocolbuffers/protobuf/issues/7392.

Recent equivalent changes for other languages:
* Swift: https://github.com/apple/swift-protobuf/pull/1345
* C#: https://github.com/protocolbuffers/protobuf/pull/15758

# Changes

- 1st commit is a noop  refactoring to make relevant _ConvertScalarFieldValue invocations localized
- 2nd commit introduces the child exception of `ParseError` named `EnumStringValueParseError` which is suppressed if `ignore_unknown_fields` is set
- 3rd commit updates the conformance test failure lists

Closes #15887

COPYBARA_INTEGRATE_REVIEW=https://github.com/protocolbuffers/protobuf/pull/15887 from noom:anton/7392/fix-python-test fbcc93a232
PiperOrigin-RevId: 619288323
pull/16317/head
Anton Grbin 8 months ago committed by Copybara-Service
parent 7e033c0be1
commit 86abf35ef5
  1. 20
      conformance/failure_list_python.txt
  2. 20
      conformance/failure_list_python_cpp.txt
  3. 20
      conformance/failure_list_python_upb.txt
  4. 76
      python/google/protobuf/internal/json_format_test.py
  5. 86
      python/google/protobuf/json_format.py
  6. 9
      src/google/protobuf/util/json_format.proto
  7. 4
      src/google/protobuf/util/json_format_proto3.proto

@ -1,20 +0,0 @@
Recommended.Proto3.JsonInput.IgnoreUnknownEnumStringValueInMapValue.ProtobufOutput
Recommended.Proto3.JsonInput.IgnoreUnknownEnumStringValueInOptionalField.ProtobufOutput
Recommended.Proto3.JsonInput.IgnoreUnknownEnumStringValueInRepeatedField.ProtobufOutput
Recommended.Editions_Proto3.JsonInput.IgnoreUnknownEnumStringValueInMapValue.ProtobufOutput
Recommended.Editions_Proto3.JsonInput.IgnoreUnknownEnumStringValueInOptionalField.ProtobufOutput
Recommended.Editions_Proto3.JsonInput.IgnoreUnknownEnumStringValueInRepeatedField.ProtobufOutput
Recommended.Editions_Proto2.JsonInput.IgnoreUnknownEnumStringValueInMapValue.ProtobufOutput
Recommended.Editions_Proto2.JsonInput.IgnoreUnknownEnumStringValueInOptionalField.ProtobufOutput
Recommended.Editions_Proto2.JsonInput.IgnoreUnknownEnumStringValueInRepeatedField.ProtobufOutput
Recommended.Proto2.JsonInput.IgnoreUnknownEnumStringValueInMapValue.ProtobufOutput
Recommended.Proto2.JsonInput.IgnoreUnknownEnumStringValueInOptionalField.ProtobufOutput
Recommended.Proto2.JsonInput.IgnoreUnknownEnumStringValueInRepeatedField.ProtobufOutput
Recommended.Editions_Proto2.JsonInput.IgnoreUnknownEnumStringValueInRepeatedPart.ProtobufOutput
Recommended.Editions_Proto3.JsonInput.IgnoreUnknownEnumStringValueInRepeatedPart.ProtobufOutput
Recommended.Proto2.JsonInput.IgnoreUnknownEnumStringValueInRepeatedPart.ProtobufOutput
Recommended.Proto3.JsonInput.IgnoreUnknownEnumStringValueInRepeatedPart.ProtobufOutput
Recommended.Editions_Proto2.JsonInput.IgnoreUnknownEnumStringValueInMapPart.ProtobufOutput
Recommended.Editions_Proto3.JsonInput.IgnoreUnknownEnumStringValueInMapPart.ProtobufOutput
Recommended.Proto2.JsonInput.IgnoreUnknownEnumStringValueInMapPart.ProtobufOutput
Recommended.Proto3.JsonInput.IgnoreUnknownEnumStringValueInMapPart.ProtobufOutput

@ -6,23 +6,3 @@
#
# TODO: insert links to corresponding bugs tracking the issue.
# Should we use GitHub issues or the Google-internal bug tracker?
Recommended.Proto3.JsonInput.IgnoreUnknownEnumStringValueInMapValue.ProtobufOutput
Recommended.Proto3.JsonInput.IgnoreUnknownEnumStringValueInOptionalField.ProtobufOutput
Recommended.Proto3.JsonInput.IgnoreUnknownEnumStringValueInRepeatedField.ProtobufOutput
Recommended.Editions_Proto3.JsonInput.IgnoreUnknownEnumStringValueInMapValue.ProtobufOutput
Recommended.Editions_Proto3.JsonInput.IgnoreUnknownEnumStringValueInOptionalField.ProtobufOutput
Recommended.Editions_Proto3.JsonInput.IgnoreUnknownEnumStringValueInRepeatedField.ProtobufOutput
Recommended.Editions_Proto2.JsonInput.IgnoreUnknownEnumStringValueInMapValue.ProtobufOutput
Recommended.Editions_Proto2.JsonInput.IgnoreUnknownEnumStringValueInOptionalField.ProtobufOutput
Recommended.Editions_Proto2.JsonInput.IgnoreUnknownEnumStringValueInRepeatedField.ProtobufOutput
Recommended.Proto2.JsonInput.IgnoreUnknownEnumStringValueInMapValue.ProtobufOutput
Recommended.Proto2.JsonInput.IgnoreUnknownEnumStringValueInOptionalField.ProtobufOutput
Recommended.Proto2.JsonInput.IgnoreUnknownEnumStringValueInRepeatedField.ProtobufOutput
Recommended.Editions_Proto2.JsonInput.IgnoreUnknownEnumStringValueInRepeatedPart.ProtobufOutput
Recommended.Editions_Proto3.JsonInput.IgnoreUnknownEnumStringValueInRepeatedPart.ProtobufOutput
Recommended.Proto2.JsonInput.IgnoreUnknownEnumStringValueInRepeatedPart.ProtobufOutput
Recommended.Proto3.JsonInput.IgnoreUnknownEnumStringValueInRepeatedPart.ProtobufOutput
Recommended.Editions_Proto2.JsonInput.IgnoreUnknownEnumStringValueInMapPart.ProtobufOutput
Recommended.Editions_Proto3.JsonInput.IgnoreUnknownEnumStringValueInMapPart.ProtobufOutput
Recommended.Proto2.JsonInput.IgnoreUnknownEnumStringValueInMapPart.ProtobufOutput
Recommended.Proto3.JsonInput.IgnoreUnknownEnumStringValueInMapPart.ProtobufOutput

@ -1,20 +0,0 @@
Recommended.Proto3.JsonInput.IgnoreUnknownEnumStringValueInMapValue.ProtobufOutput
Recommended.Proto3.JsonInput.IgnoreUnknownEnumStringValueInOptionalField.ProtobufOutput
Recommended.Proto3.JsonInput.IgnoreUnknownEnumStringValueInRepeatedField.ProtobufOutput
Recommended.Editions_Proto3.JsonInput.IgnoreUnknownEnumStringValueInMapValue.ProtobufOutput
Recommended.Editions_Proto3.JsonInput.IgnoreUnknownEnumStringValueInOptionalField.ProtobufOutput
Recommended.Editions_Proto3.JsonInput.IgnoreUnknownEnumStringValueInRepeatedField.ProtobufOutput
Recommended.Proto2.JsonInput.IgnoreUnknownEnumStringValueInMapValue.ProtobufOutput
Recommended.Proto2.JsonInput.IgnoreUnknownEnumStringValueInOptionalField.ProtobufOutput
Recommended.Proto2.JsonInput.IgnoreUnknownEnumStringValueInRepeatedField.ProtobufOutput
Recommended.Editions_Proto2.JsonInput.IgnoreUnknownEnumStringValueInMapValue.ProtobufOutput
Recommended.Editions_Proto2.JsonInput.IgnoreUnknownEnumStringValueInOptionalField.ProtobufOutput
Recommended.Editions_Proto2.JsonInput.IgnoreUnknownEnumStringValueInRepeatedField.ProtobufOutput
Recommended.Editions_Proto2.JsonInput.IgnoreUnknownEnumStringValueInRepeatedPart.ProtobufOutput
Recommended.Editions_Proto3.JsonInput.IgnoreUnknownEnumStringValueInRepeatedPart.ProtobufOutput
Recommended.Proto2.JsonInput.IgnoreUnknownEnumStringValueInRepeatedPart.ProtobufOutput
Recommended.Proto3.JsonInput.IgnoreUnknownEnumStringValueInRepeatedPart.ProtobufOutput
Recommended.Editions_Proto2.JsonInput.IgnoreUnknownEnumStringValueInMapPart.ProtobufOutput
Recommended.Editions_Proto3.JsonInput.IgnoreUnknownEnumStringValueInMapPart.ProtobufOutput
Recommended.Proto2.JsonInput.IgnoreUnknownEnumStringValueInMapPart.ProtobufOutput
Recommended.Proto3.JsonInput.IgnoreUnknownEnumStringValueInMapPart.ProtobufOutput

@ -1008,7 +1008,7 @@ class JsonFormatTest(JsonFormatBase):
# Proto3 accepts numeric unknown enums.
text = '{"enumValue": 12345}'
json_format.Parse(text, message)
# Proto2 does not accept unknown enums.
# Proto2 does not accept numeric unknown enums.
message = unittest_pb2.TestAllTypes()
self.assertRaisesRegex(
json_format.ParseError,
@ -1020,6 +1020,80 @@ class JsonFormatTest(JsonFormatBase):
message,
)
def testParseUnknownEnumStringValue_Scalar_Proto2(self):
message = json_format_pb2.TestNumbers()
text = '{"a": "UNKNOWN_STRING_VALUE"}'
json_format.Parse(text, message, ignore_unknown_fields=True)
self.assertFalse(message.HasField('a'))
def testParseErrorForUnknownEnumValue_ScalarWithoutIgnore_Proto2(self):
message = json_format_pb2.TestNumbers()
self.assertRaisesRegex(
json_format.ParseError,
'Invalid enum value',
json_format.Parse, '{"a": "UNKNOWN_STRING_VALUE"}', message)
def testParseUnknownEnumStringValue_Repeated_Proto2(self):
message = json_format_pb2.TestRepeatedEnum()
text = '{"repeatedEnum": ["UNKNOWN_STRING_VALUE", "BUFFER"]}'
json_format.Parse(text, message, ignore_unknown_fields=True)
self.assertEqual(len(message.repeated_enum), 1)
self.assertTrue(message.repeated_enum[0] == json_format_pb2.BUFFER)
def testParseUnknownEnumStringValue_Map_Proto2(self):
message = json_format_pb2.TestMapOfEnums()
text = '{"enumMap": {"key1": "BUFFER", "key2": "UNKNOWN_STRING_VALUE"}}'
json_format.Parse(text, message, ignore_unknown_fields=True)
self.assertTrue(message.enum_map['key1'] == json_format_pb2.BUFFER)
self.assertFalse('key2' in message.enum_map)
def testParseUnknownEnumStringValue_ExtensionField_Proto2(self):
message = json_format_pb2.TestMessageWithExtension()
text = """
{"[protobuf_unittest.TestExtension.enum_ext]": "UNKNOWN_STRING_VALUE"}
"""
json_format.Parse(text, message, ignore_unknown_fields=True)
self.assertFalse(json_format_pb2.TestExtension.enum_ext in
message.Extensions)
def testParseUnknownEnumStringValue_ExtensionFieldWithoutIgnore_Proto2(self):
message = json_format_pb2.TestMessageWithExtension()
text = """
{"[protobuf_unittest.TestExtension.enum_ext]": "UNKNOWN_STRING_VALUE"}
"""
self.assertRaisesRegex(
json_format.ParseError,
'Invalid enum value',
json_format.Parse, text, message)
def testParseUnknownEnumStringValue_Scalar_Proto3(self):
message = json_format_proto3_pb2.TestMessage()
text = '{"enumValue": "UNKNOWN_STRING_VALUE"}'
json_format.Parse(text, message, ignore_unknown_fields=True)
self.assertEqual(message.enum_value, 0)
def testParseUnknownEnumStringValue_Repeated_Proto3(self):
message = json_format_proto3_pb2.TestMessage()
text = '{"repeatedEnumValue": ["UNKNOWN_STRING_VALUE", "FOO"]}'
json_format.Parse(text, message, ignore_unknown_fields=True)
self.assertEqual(len(message.repeated_enum_value), 1)
self.assertTrue(message.repeated_enum_value[0] ==
json_format_proto3_pb2.FOO)
def testParseUnknownEnumStringValue_Map_Proto3(self):
message = json_format_proto3_pb2.MapOfEnums()
text = '{"map": {"key1": "FOO", "key2": "UNKNOWN_STRING_VALUE"}}'
json_format.Parse(text, message, ignore_unknown_fields=True)
self.assertTrue(message.map['key1'] == json_format_proto3_pb2.FOO)
self.assertFalse('key2' in message.map)
def testBytes(self):
message = json_format_proto3_pb2.TestMessage()
# Test url base64

@ -70,6 +70,12 @@ class ParseError(Error):
"""Thrown in case of parsing error."""
class EnumStringValueParseError(ParseError):
"""Thrown if unknown string enum value is encountered.
This exception is suppressed if ignore_unknown_fields is set.
"""
def MessageToJson(
message,
preserving_proto_field_name=False,
@ -658,11 +664,8 @@ class _Parser(object):
path, name, index
)
)
getattr(message, field.name).append(
_ConvertScalarFieldValue(
item, field, '{0}.{1}[{2}]'.format(path, name, index)
)
)
self._ConvertAndAppendScalar(
message, field, item, '{0}.{1}[{2}]'.format(path, name, index))
elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
if field.is_extension:
sub_message = message.Extensions[field]
@ -672,17 +675,9 @@ class _Parser(object):
self.ConvertMessage(value, sub_message, '{0}.{1}'.format(path, name))
else:
if field.is_extension:
message.Extensions[field] = _ConvertScalarFieldValue(
value, field, '{0}.{1}'.format(path, name)
)
self._ConvertAndSetScalarExtension(message, field, value, '{0}.{1}'.format(path, name))
else:
setattr(
message,
field.name,
_ConvertScalarFieldValue(
value, field, '{0}.{1}'.format(path, name)
),
)
self._ConvertAndSetScalar(message, field, value, '{0}.{1}'.format(path, name))
except ParseError as e:
if field and field.containing_oneof is None:
raise ParseError(
@ -795,11 +790,7 @@ class _Parser(object):
def _ConvertWrapperMessage(self, value, message, path):
"""Convert a JSON representation into Wrapper message."""
field = message.DESCRIPTOR.fields_by_name['value']
setattr(
message,
'value',
_ConvertScalarFieldValue(value, field, path='{0}.value'.format(path)),
)
self._ConvertAndSetScalar(message, field, value, path='{0}.value'.format(path))
def _ConvertMapFieldValue(self, value, message, field, path):
"""Convert map field value for a message map field.
@ -832,9 +823,51 @@ class _Parser(object):
'{0}[{1}]'.format(path, key_value),
)
else:
getattr(message, field.name)[key_value] = _ConvertScalarFieldValue(
value[key], value_field, path='{0}[{1}]'.format(path, key_value)
)
self._ConvertAndSetScalarToMapKey(
message,
field,
key_value,
value[key],
path='{0}[{1}]'.format(path, key_value))
def _ConvertAndSetScalarExtension(self, message, extension_field, js_value, path):
"""Convert scalar from js_value and assign it to message.Extensions[extension_field]."""
try:
message.Extensions[extension_field] = _ConvertScalarFieldValue(
js_value, extension_field, path)
except EnumStringValueParseError:
if not self.ignore_unknown_fields:
raise
def _ConvertAndSetScalar(self, message, field, js_value, path):
"""Convert scalar from js_value and assign it to message.field."""
try:
setattr(
message,
field.name,
_ConvertScalarFieldValue(js_value, field, path))
except EnumStringValueParseError:
if not self.ignore_unknown_fields:
raise
def _ConvertAndAppendScalar(self, message, repeated_field, js_value, path):
"""Convert scalar from js_value and append it to message.repeated_field."""
try:
getattr(message, repeated_field.name).append(
_ConvertScalarFieldValue(js_value, repeated_field, path))
except EnumStringValueParseError:
if not self.ignore_unknown_fields:
raise
def _ConvertAndSetScalarToMapKey(self, message, map_field, converted_key, js_value, path):
"""Convert scalar from 'js_value' and add it to message.map_field[converted_key]."""
try:
getattr(message, map_field.name)[converted_key] = _ConvertScalarFieldValue(
js_value, map_field.message_type.fields_by_name['value'], path,
)
except EnumStringValueParseError:
if not self.ignore_unknown_fields:
raise
def _ConvertScalarFieldValue(value, field, path, require_str=False):
@ -851,6 +884,7 @@ def _ConvertScalarFieldValue(value, field, path, require_str=False):
Raises:
ParseError: In case of convert problems.
EnumStringValueParseError: In case of unknown enum string value.
"""
try:
if field.cpp_type in _INT_TYPES:
@ -882,7 +916,9 @@ def _ConvertScalarFieldValue(value, field, path, require_str=False):
number = int(value)
enum_value = field.enum_type.values_by_number.get(number, None)
except ValueError as e:
raise ParseError(
# Since parsing to integer failed and lookup in values_by_name didn't
# find this name, we have an enum string value which is unknown.
raise EnumStringValueParseError(
'Invalid enum value {0} for enum type {1}'.format(
value, field.enum_type.full_name
)
@ -897,6 +933,8 @@ def _ConvertScalarFieldValue(value, field, path, require_str=False):
else:
return number
return enum_value.number
except EnumStringValueParseError as e:
raise EnumStringValueParseError('{0} at {1}'.format(e, path)) from e
except ParseError as e:
raise ParseError('{0} at {1}'.format(e, path)) from e

@ -101,6 +101,7 @@ message TestMessageWithExtension {
message TestExtension {
extend TestMessageWithExtension {
optional TestExtension ext = 100;
optional EnumValue enum_ext = 101;
}
optional string value = 1;
}
@ -114,3 +115,11 @@ enum EnumValue {
message TestDefaultEnumValue {
optional EnumValue enum_value = 1 [default = DEFAULT];
}
message TestMapOfEnums {
map<string, EnumValue> enum_map = 1;
}
message TestRepeatedEnum {
repeated EnumValue repeated_enum = 1;
}

@ -253,6 +253,10 @@ message MapOfObjects {
map<string, M> map = 1;
}
message MapOfEnums {
map<string, EnumType> map = 1;
}
message MapIn {
string other = 1;
repeated string things = 2;

Loading…
Cancel
Save