Fix UnicodeDecodeError exception catching.

`str_lines` is a generator object, it won't be executed until Tokenizer() uses it.

PiperOrigin-RevId: 499824005
pull/11471/head
Protobuf Team Bot 2 years ago committed by Copybara-Service
parent b4811c3ffb
commit 258d67c0d3
  1. 6
      python/google/protobuf/internal/text_format_test.py
  2. 4
      python/google/protobuf/text_format.py

@ -786,6 +786,12 @@ class TextFormatParserTests(TextFormatBase):
self.assertEqual(message_module.TestAllTypes(), message) self.assertEqual(message_module.TestAllTypes(), message)
def testParseInvalidUtf8(self, message_module): def testParseInvalidUtf8(self, message_module):
message = message_module.TestAllTypes()
text = b'invalid<\xc3\xc3>'
with self.assertRaises(text_format.ParseError):
text_format.Parse(text, message)
def testParseInvalidUtf8Value(self, message_module):
message = message_module.TestAllTypes() message = message_module.TestAllTypes()
text = 'repeated_string: "\\xc3\\xc3"' text = 'repeated_string: "\\xc3\\xc3"'
with self.assertRaises(text_format.ParseError) as e: with self.assertRaises(text_format.ParseError) as e:

@ -859,9 +859,9 @@ class _Parser(object):
str_lines = ( str_lines = (
line if isinstance(line, str) else line.decode('utf-8') line if isinstance(line, str) else line.decode('utf-8')
for line in lines) for line in lines)
except UnicodeDecodeError as e:
raise self._StringParseError(e)
tokenizer = Tokenizer(str_lines) tokenizer = Tokenizer(str_lines)
except UnicodeDecodeError as e:
raise ParseError from e
if message: if message:
self.root_type = message.DESCRIPTOR.full_name self.root_type = message.DESCRIPTOR.full_name
while not tokenizer.AtEnd(): while not tokenizer.AtEnd():

Loading…
Cancel
Save