From 86abf35ef5ee5b1004ec11bebb36d84c2ef6645e Mon Sep 17 00:00:00 2001 From: Anton Grbin Date: Tue, 26 Mar 2024 13:14:25 -0700 Subject: [PATCH] Python JSON parser: Ignore invalid enum string values if ignore_unknown_fields is set (#15887) # Motivation This PR fixes failing conformance tests for python with name `IgnoreUnknownEnumStringValue`. The JSON parsing spec was discussed in https://github.com/protocolbuffers/protobuf/issues/7392. Recent equivalent changes for other languages: * Swift: https://github.com/apple/swift-protobuf/pull/1345 * C#: https://github.com/protocolbuffers/protobuf/pull/15758 # Changes - 1st commit is a noop refactoring to make relevant _ConvertScalarFieldValue invocations localized - 2nd commit introduces the child exception of `ParseError` named `EnumStringValueParseError` which is suppressed if `ignore_unknown_fields` is set - 3rd commit updates the conformance test failure lists Closes #15887 COPYBARA_INTEGRATE_REVIEW=https://github.com/protocolbuffers/protobuf/pull/15887 from noom:anton/7392/fix-python-test fbcc93a232a8e7c858b7cb62c1eaef1c6a54fe20 PiperOrigin-RevId: 619288323 --- conformance/failure_list_python.txt | 20 ----- conformance/failure_list_python_cpp.txt | 20 ----- conformance/failure_list_python_upb.txt | 20 ----- .../protobuf/internal/json_format_test.py | 76 +++++++++++++++- python/google/protobuf/json_format.py | 86 +++++++++++++------ src/google/protobuf/util/json_format.proto | 9 ++ .../protobuf/util/json_format_proto3.proto | 4 + 7 files changed, 150 insertions(+), 85 deletions(-) diff --git a/conformance/failure_list_python.txt b/conformance/failure_list_python.txt index fd10110dc3..e69de29bb2 100644 --- a/conformance/failure_list_python.txt +++ b/conformance/failure_list_python.txt @@ -1,20 +0,0 @@ -Recommended.Proto3.JsonInput.IgnoreUnknownEnumStringValueInMapValue.ProtobufOutput -Recommended.Proto3.JsonInput.IgnoreUnknownEnumStringValueInOptionalField.ProtobufOutput -Recommended.Proto3.JsonInput.IgnoreUnknownEnumStringValueInRepeatedField.ProtobufOutput -Recommended.Editions_Proto3.JsonInput.IgnoreUnknownEnumStringValueInMapValue.ProtobufOutput -Recommended.Editions_Proto3.JsonInput.IgnoreUnknownEnumStringValueInOptionalField.ProtobufOutput -Recommended.Editions_Proto3.JsonInput.IgnoreUnknownEnumStringValueInRepeatedField.ProtobufOutput -Recommended.Editions_Proto2.JsonInput.IgnoreUnknownEnumStringValueInMapValue.ProtobufOutput -Recommended.Editions_Proto2.JsonInput.IgnoreUnknownEnumStringValueInOptionalField.ProtobufOutput -Recommended.Editions_Proto2.JsonInput.IgnoreUnknownEnumStringValueInRepeatedField.ProtobufOutput -Recommended.Proto2.JsonInput.IgnoreUnknownEnumStringValueInMapValue.ProtobufOutput -Recommended.Proto2.JsonInput.IgnoreUnknownEnumStringValueInOptionalField.ProtobufOutput -Recommended.Proto2.JsonInput.IgnoreUnknownEnumStringValueInRepeatedField.ProtobufOutput -Recommended.Editions_Proto2.JsonInput.IgnoreUnknownEnumStringValueInRepeatedPart.ProtobufOutput -Recommended.Editions_Proto3.JsonInput.IgnoreUnknownEnumStringValueInRepeatedPart.ProtobufOutput -Recommended.Proto2.JsonInput.IgnoreUnknownEnumStringValueInRepeatedPart.ProtobufOutput -Recommended.Proto3.JsonInput.IgnoreUnknownEnumStringValueInRepeatedPart.ProtobufOutput -Recommended.Editions_Proto2.JsonInput.IgnoreUnknownEnumStringValueInMapPart.ProtobufOutput -Recommended.Editions_Proto3.JsonInput.IgnoreUnknownEnumStringValueInMapPart.ProtobufOutput -Recommended.Proto2.JsonInput.IgnoreUnknownEnumStringValueInMapPart.ProtobufOutput -Recommended.Proto3.JsonInput.IgnoreUnknownEnumStringValueInMapPart.ProtobufOutput diff --git a/conformance/failure_list_python_cpp.txt b/conformance/failure_list_python_cpp.txt index 3d5ff185f6..5c93405160 100644 --- a/conformance/failure_list_python_cpp.txt +++ b/conformance/failure_list_python_cpp.txt @@ -6,23 +6,3 @@ # # TODO: insert links to corresponding bugs tracking the issue. # Should we use GitHub issues or the Google-internal bug tracker? -Recommended.Proto3.JsonInput.IgnoreUnknownEnumStringValueInMapValue.ProtobufOutput -Recommended.Proto3.JsonInput.IgnoreUnknownEnumStringValueInOptionalField.ProtobufOutput -Recommended.Proto3.JsonInput.IgnoreUnknownEnumStringValueInRepeatedField.ProtobufOutput -Recommended.Editions_Proto3.JsonInput.IgnoreUnknownEnumStringValueInMapValue.ProtobufOutput -Recommended.Editions_Proto3.JsonInput.IgnoreUnknownEnumStringValueInOptionalField.ProtobufOutput -Recommended.Editions_Proto3.JsonInput.IgnoreUnknownEnumStringValueInRepeatedField.ProtobufOutput -Recommended.Editions_Proto2.JsonInput.IgnoreUnknownEnumStringValueInMapValue.ProtobufOutput -Recommended.Editions_Proto2.JsonInput.IgnoreUnknownEnumStringValueInOptionalField.ProtobufOutput -Recommended.Editions_Proto2.JsonInput.IgnoreUnknownEnumStringValueInRepeatedField.ProtobufOutput -Recommended.Proto2.JsonInput.IgnoreUnknownEnumStringValueInMapValue.ProtobufOutput -Recommended.Proto2.JsonInput.IgnoreUnknownEnumStringValueInOptionalField.ProtobufOutput -Recommended.Proto2.JsonInput.IgnoreUnknownEnumStringValueInRepeatedField.ProtobufOutput -Recommended.Editions_Proto2.JsonInput.IgnoreUnknownEnumStringValueInRepeatedPart.ProtobufOutput -Recommended.Editions_Proto3.JsonInput.IgnoreUnknownEnumStringValueInRepeatedPart.ProtobufOutput -Recommended.Proto2.JsonInput.IgnoreUnknownEnumStringValueInRepeatedPart.ProtobufOutput -Recommended.Proto3.JsonInput.IgnoreUnknownEnumStringValueInRepeatedPart.ProtobufOutput -Recommended.Editions_Proto2.JsonInput.IgnoreUnknownEnumStringValueInMapPart.ProtobufOutput -Recommended.Editions_Proto3.JsonInput.IgnoreUnknownEnumStringValueInMapPart.ProtobufOutput -Recommended.Proto2.JsonInput.IgnoreUnknownEnumStringValueInMapPart.ProtobufOutput -Recommended.Proto3.JsonInput.IgnoreUnknownEnumStringValueInMapPart.ProtobufOutput diff --git a/conformance/failure_list_python_upb.txt b/conformance/failure_list_python_upb.txt index 7a90ac8d35..e69de29bb2 100644 --- a/conformance/failure_list_python_upb.txt +++ b/conformance/failure_list_python_upb.txt @@ -1,20 +0,0 @@ -Recommended.Proto3.JsonInput.IgnoreUnknownEnumStringValueInMapValue.ProtobufOutput -Recommended.Proto3.JsonInput.IgnoreUnknownEnumStringValueInOptionalField.ProtobufOutput -Recommended.Proto3.JsonInput.IgnoreUnknownEnumStringValueInRepeatedField.ProtobufOutput -Recommended.Editions_Proto3.JsonInput.IgnoreUnknownEnumStringValueInMapValue.ProtobufOutput -Recommended.Editions_Proto3.JsonInput.IgnoreUnknownEnumStringValueInOptionalField.ProtobufOutput -Recommended.Editions_Proto3.JsonInput.IgnoreUnknownEnumStringValueInRepeatedField.ProtobufOutput -Recommended.Proto2.JsonInput.IgnoreUnknownEnumStringValueInMapValue.ProtobufOutput -Recommended.Proto2.JsonInput.IgnoreUnknownEnumStringValueInOptionalField.ProtobufOutput -Recommended.Proto2.JsonInput.IgnoreUnknownEnumStringValueInRepeatedField.ProtobufOutput -Recommended.Editions_Proto2.JsonInput.IgnoreUnknownEnumStringValueInMapValue.ProtobufOutput -Recommended.Editions_Proto2.JsonInput.IgnoreUnknownEnumStringValueInOptionalField.ProtobufOutput -Recommended.Editions_Proto2.JsonInput.IgnoreUnknownEnumStringValueInRepeatedField.ProtobufOutput -Recommended.Editions_Proto2.JsonInput.IgnoreUnknownEnumStringValueInRepeatedPart.ProtobufOutput -Recommended.Editions_Proto3.JsonInput.IgnoreUnknownEnumStringValueInRepeatedPart.ProtobufOutput -Recommended.Proto2.JsonInput.IgnoreUnknownEnumStringValueInRepeatedPart.ProtobufOutput -Recommended.Proto3.JsonInput.IgnoreUnknownEnumStringValueInRepeatedPart.ProtobufOutput -Recommended.Editions_Proto2.JsonInput.IgnoreUnknownEnumStringValueInMapPart.ProtobufOutput -Recommended.Editions_Proto3.JsonInput.IgnoreUnknownEnumStringValueInMapPart.ProtobufOutput -Recommended.Proto2.JsonInput.IgnoreUnknownEnumStringValueInMapPart.ProtobufOutput -Recommended.Proto3.JsonInput.IgnoreUnknownEnumStringValueInMapPart.ProtobufOutput diff --git a/python/google/protobuf/internal/json_format_test.py b/python/google/protobuf/internal/json_format_test.py index d1a0777d15..6c5a5e32c8 100644 --- a/python/google/protobuf/internal/json_format_test.py +++ b/python/google/protobuf/internal/json_format_test.py @@ -1008,7 +1008,7 @@ class JsonFormatTest(JsonFormatBase): # Proto3 accepts numeric unknown enums. text = '{"enumValue": 12345}' json_format.Parse(text, message) - # Proto2 does not accept unknown enums. + # Proto2 does not accept numeric unknown enums. message = unittest_pb2.TestAllTypes() self.assertRaisesRegex( json_format.ParseError, @@ -1020,6 +1020,80 @@ class JsonFormatTest(JsonFormatBase): message, ) + def testParseUnknownEnumStringValue_Scalar_Proto2(self): + message = json_format_pb2.TestNumbers() + text = '{"a": "UNKNOWN_STRING_VALUE"}' + json_format.Parse(text, message, ignore_unknown_fields=True) + + self.assertFalse(message.HasField('a')) + + def testParseErrorForUnknownEnumValue_ScalarWithoutIgnore_Proto2(self): + message = json_format_pb2.TestNumbers() + self.assertRaisesRegex( + json_format.ParseError, + 'Invalid enum value', + json_format.Parse, '{"a": "UNKNOWN_STRING_VALUE"}', message) + + def testParseUnknownEnumStringValue_Repeated_Proto2(self): + message = json_format_pb2.TestRepeatedEnum() + text = '{"repeatedEnum": ["UNKNOWN_STRING_VALUE", "BUFFER"]}' + json_format.Parse(text, message, ignore_unknown_fields=True) + + self.assertEqual(len(message.repeated_enum), 1) + self.assertTrue(message.repeated_enum[0] == json_format_pb2.BUFFER) + + def testParseUnknownEnumStringValue_Map_Proto2(self): + message = json_format_pb2.TestMapOfEnums() + text = '{"enumMap": {"key1": "BUFFER", "key2": "UNKNOWN_STRING_VALUE"}}' + json_format.Parse(text, message, ignore_unknown_fields=True) + + self.assertTrue(message.enum_map['key1'] == json_format_pb2.BUFFER) + self.assertFalse('key2' in message.enum_map) + + def testParseUnknownEnumStringValue_ExtensionField_Proto2(self): + message = json_format_pb2.TestMessageWithExtension() + text = """ + {"[protobuf_unittest.TestExtension.enum_ext]": "UNKNOWN_STRING_VALUE"} + """ + json_format.Parse(text, message, ignore_unknown_fields=True) + + self.assertFalse(json_format_pb2.TestExtension.enum_ext in + message.Extensions) + + def testParseUnknownEnumStringValue_ExtensionFieldWithoutIgnore_Proto2(self): + message = json_format_pb2.TestMessageWithExtension() + text = """ + {"[protobuf_unittest.TestExtension.enum_ext]": "UNKNOWN_STRING_VALUE"} + """ + self.assertRaisesRegex( + json_format.ParseError, + 'Invalid enum value', + json_format.Parse, text, message) + + def testParseUnknownEnumStringValue_Scalar_Proto3(self): + message = json_format_proto3_pb2.TestMessage() + text = '{"enumValue": "UNKNOWN_STRING_VALUE"}' + + json_format.Parse(text, message, ignore_unknown_fields=True) + self.assertEqual(message.enum_value, 0) + + def testParseUnknownEnumStringValue_Repeated_Proto3(self): + message = json_format_proto3_pb2.TestMessage() + text = '{"repeatedEnumValue": ["UNKNOWN_STRING_VALUE", "FOO"]}' + json_format.Parse(text, message, ignore_unknown_fields=True) + + self.assertEqual(len(message.repeated_enum_value), 1) + self.assertTrue(message.repeated_enum_value[0] == + json_format_proto3_pb2.FOO) + + def testParseUnknownEnumStringValue_Map_Proto3(self): + message = json_format_proto3_pb2.MapOfEnums() + text = '{"map": {"key1": "FOO", "key2": "UNKNOWN_STRING_VALUE"}}' + json_format.Parse(text, message, ignore_unknown_fields=True) + + self.assertTrue(message.map['key1'] == json_format_proto3_pb2.FOO) + self.assertFalse('key2' in message.map) + def testBytes(self): message = json_format_proto3_pb2.TestMessage() # Test url base64 diff --git a/python/google/protobuf/json_format.py b/python/google/protobuf/json_format.py index 79f5078fa5..5ef75b9055 100644 --- a/python/google/protobuf/json_format.py +++ b/python/google/protobuf/json_format.py @@ -70,6 +70,12 @@ class ParseError(Error): """Thrown in case of parsing error.""" +class EnumStringValueParseError(ParseError): + """Thrown if unknown string enum value is encountered. + This exception is suppressed if ignore_unknown_fields is set. + """ + + def MessageToJson( message, preserving_proto_field_name=False, @@ -658,11 +664,8 @@ class _Parser(object): path, name, index ) ) - getattr(message, field.name).append( - _ConvertScalarFieldValue( - item, field, '{0}.{1}[{2}]'.format(path, name, index) - ) - ) + self._ConvertAndAppendScalar( + message, field, item, '{0}.{1}[{2}]'.format(path, name, index)) elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: if field.is_extension: sub_message = message.Extensions[field] @@ -672,17 +675,9 @@ class _Parser(object): self.ConvertMessage(value, sub_message, '{0}.{1}'.format(path, name)) else: if field.is_extension: - message.Extensions[field] = _ConvertScalarFieldValue( - value, field, '{0}.{1}'.format(path, name) - ) + self._ConvertAndSetScalarExtension(message, field, value, '{0}.{1}'.format(path, name)) else: - setattr( - message, - field.name, - _ConvertScalarFieldValue( - value, field, '{0}.{1}'.format(path, name) - ), - ) + self._ConvertAndSetScalar(message, field, value, '{0}.{1}'.format(path, name)) except ParseError as e: if field and field.containing_oneof is None: raise ParseError( @@ -795,11 +790,7 @@ class _Parser(object): def _ConvertWrapperMessage(self, value, message, path): """Convert a JSON representation into Wrapper message.""" field = message.DESCRIPTOR.fields_by_name['value'] - setattr( - message, - 'value', - _ConvertScalarFieldValue(value, field, path='{0}.value'.format(path)), - ) + self._ConvertAndSetScalar(message, field, value, path='{0}.value'.format(path)) def _ConvertMapFieldValue(self, value, message, field, path): """Convert map field value for a message map field. @@ -832,9 +823,51 @@ class _Parser(object): '{0}[{1}]'.format(path, key_value), ) else: - getattr(message, field.name)[key_value] = _ConvertScalarFieldValue( - value[key], value_field, path='{0}[{1}]'.format(path, key_value) - ) + self._ConvertAndSetScalarToMapKey( + message, + field, + key_value, + value[key], + path='{0}[{1}]'.format(path, key_value)) + + def _ConvertAndSetScalarExtension(self, message, extension_field, js_value, path): + """Convert scalar from js_value and assign it to message.Extensions[extension_field].""" + try: + message.Extensions[extension_field] = _ConvertScalarFieldValue( + js_value, extension_field, path) + except EnumStringValueParseError: + if not self.ignore_unknown_fields: + raise + + def _ConvertAndSetScalar(self, message, field, js_value, path): + """Convert scalar from js_value and assign it to message.field.""" + try: + setattr( + message, + field.name, + _ConvertScalarFieldValue(js_value, field, path)) + except EnumStringValueParseError: + if not self.ignore_unknown_fields: + raise + + def _ConvertAndAppendScalar(self, message, repeated_field, js_value, path): + """Convert scalar from js_value and append it to message.repeated_field.""" + try: + getattr(message, repeated_field.name).append( + _ConvertScalarFieldValue(js_value, repeated_field, path)) + except EnumStringValueParseError: + if not self.ignore_unknown_fields: + raise + + def _ConvertAndSetScalarToMapKey(self, message, map_field, converted_key, js_value, path): + """Convert scalar from 'js_value' and add it to message.map_field[converted_key].""" + try: + getattr(message, map_field.name)[converted_key] = _ConvertScalarFieldValue( + js_value, map_field.message_type.fields_by_name['value'], path, + ) + except EnumStringValueParseError: + if not self.ignore_unknown_fields: + raise def _ConvertScalarFieldValue(value, field, path, require_str=False): @@ -851,6 +884,7 @@ def _ConvertScalarFieldValue(value, field, path, require_str=False): Raises: ParseError: In case of convert problems. + EnumStringValueParseError: In case of unknown enum string value. """ try: if field.cpp_type in _INT_TYPES: @@ -882,7 +916,9 @@ def _ConvertScalarFieldValue(value, field, path, require_str=False): number = int(value) enum_value = field.enum_type.values_by_number.get(number, None) except ValueError as e: - raise ParseError( + # Since parsing to integer failed and lookup in values_by_name didn't + # find this name, we have an enum string value which is unknown. + raise EnumStringValueParseError( 'Invalid enum value {0} for enum type {1}'.format( value, field.enum_type.full_name ) @@ -897,6 +933,8 @@ def _ConvertScalarFieldValue(value, field, path, require_str=False): else: return number return enum_value.number + except EnumStringValueParseError as e: + raise EnumStringValueParseError('{0} at {1}'.format(e, path)) from e except ParseError as e: raise ParseError('{0} at {1}'.format(e, path)) from e diff --git a/src/google/protobuf/util/json_format.proto b/src/google/protobuf/util/json_format.proto index 7cb311139b..dfaa3422c4 100644 --- a/src/google/protobuf/util/json_format.proto +++ b/src/google/protobuf/util/json_format.proto @@ -101,6 +101,7 @@ message TestMessageWithExtension { message TestExtension { extend TestMessageWithExtension { optional TestExtension ext = 100; + optional EnumValue enum_ext = 101; } optional string value = 1; } @@ -114,3 +115,11 @@ enum EnumValue { message TestDefaultEnumValue { optional EnumValue enum_value = 1 [default = DEFAULT]; } + +message TestMapOfEnums { + map enum_map = 1; +} + +message TestRepeatedEnum { + repeated EnumValue repeated_enum = 1; +} diff --git a/src/google/protobuf/util/json_format_proto3.proto b/src/google/protobuf/util/json_format_proto3.proto index e631c2a193..dee5e0412f 100644 --- a/src/google/protobuf/util/json_format_proto3.proto +++ b/src/google/protobuf/util/json_format_proto3.proto @@ -253,6 +253,10 @@ message MapOfObjects { map map = 1; } +message MapOfEnums { + map map = 1; +} + message MapIn { string other = 1; repeated string things = 2;