mtest: fix unencodable XML chars

Replace unencodable XML chars with their printable representation, so
that, xmllint can parse test outputs without error.

Closes #9894

Co-authored-by: Tristan Partin <tristan@partin.io>
pull/11583/head
Nazir Bilal Yavuz 2 years ago committed by Dylan Baker
parent 61984bcfa3
commit bd3d2cf918
  1. 31
      mesonbuild/mtest.py
  2. 4
      test cases/unit/110 replace unencodable xml chars/meson.build
  3. 37
      test cases/unit/110 replace unencodable xml chars/script.py
  4. 51
      unittests/allplatformstests.py

@ -72,6 +72,26 @@ GNU_ERROR_RETURNCODE = 99
# Exit if 3 Ctrl-C's are received within one second # Exit if 3 Ctrl-C's are received within one second
MAX_CTRLC = 3 MAX_CTRLC = 3
# Define unencodable xml characters' regex for replacing them with their
# printable representation
UNENCODABLE_XML_UNICHRS: T.List[T.Tuple[int, int]] = [
(0x00, 0x08), (0x0B, 0x0C), (0x0E, 0x1F), (0x7F, 0x84),
(0x86, 0x9F), (0xFDD0, 0xFDEF), (0xFFFE, 0xFFFF)]
# Not narrow build
if sys.maxunicode >= 0x10000:
UNENCODABLE_XML_UNICHRS.extend([
(0x1FFFE, 0x1FFFF), (0x2FFFE, 0x2FFFF),
(0x3FFFE, 0x3FFFF), (0x4FFFE, 0x4FFFF),
(0x5FFFE, 0x5FFFF), (0x6FFFE, 0x6FFFF),
(0x7FFFE, 0x7FFFF), (0x8FFFE, 0x8FFFF),
(0x9FFFE, 0x9FFFF), (0xAFFFE, 0xAFFFF),
(0xBFFFE, 0xBFFFF), (0xCFFFE, 0xCFFFF),
(0xDFFFE, 0xDFFFF), (0xEFFFE, 0xEFFFF),
(0xFFFFE, 0xFFFFF), (0x10FFFE, 0x10FFFF)])
UNENCODABLE_XML_CHR_RANGES = [fr'{chr(low)}-{chr(high)}' for (low, high) in UNENCODABLE_XML_UNICHRS]
UNENCODABLE_XML_CHRS_RE = re.compile('([' + ''.join(UNENCODABLE_XML_CHR_RANGES) + '])')
def is_windows() -> bool: def is_windows() -> bool:
platname = platform.system().lower() platname = platform.system().lower()
return platname == 'windows' return platname == 'windows'
@ -1148,14 +1168,21 @@ class TestRunRust(TestRun):
TestRun.PROTOCOL_TO_CLASS[TestProtocol.RUST] = TestRunRust TestRun.PROTOCOL_TO_CLASS[TestProtocol.RUST] = TestRunRust
# Check unencodable characters in xml output and replace them with
# their printable representation
def replace_unencodable_xml_chars(original_str: str) -> str:
# [1:-1] is needed for removing `'` characters from both start and end
# of the string
replacement_lambda = lambda illegal_chr: repr(illegal_chr.group())[1:-1]
return UNENCODABLE_XML_CHRS_RE.sub(replacement_lambda, original_str)
def decode(stream: T.Union[None, bytes]) -> str: def decode(stream: T.Union[None, bytes]) -> str:
if stream is None: if stream is None:
return '' return ''
try: try:
return stream.decode('utf-8') return replace_unencodable_xml_chars(stream.decode('utf-8'))
except UnicodeDecodeError: except UnicodeDecodeError:
return stream.decode('iso-8859-1', errors='ignore') return replace_unencodable_xml_chars(stream.decode('iso-8859-1', errors='ignore'))
async def read_decode(reader: asyncio.StreamReader, async def read_decode(reader: asyncio.StreamReader,
queue: T.Optional['asyncio.Queue[T.Optional[str]]'], queue: T.Optional['asyncio.Queue[T.Optional[str]]'],

@ -0,0 +1,4 @@
project('replace unencodable xml chars')
test_script = find_program('script.py')
test('main', test_script)

@ -0,0 +1,37 @@
#!/usr/bin/env python3
import sys
# Print base string(\nHello Meson\n) to see valid chars are not replaced
print('\n\x48\x65\x6c\x6c\x6f\x20\x4d\x65\x73\x6f\x6e\n')
# Print invalid input from all known unencodable chars
print(
'\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f\x10\x11'
'\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x7f')
# Cover for potential encoding issues
try:
print(
'\x80\x81\x82\x83\x84\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f'
'\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e'
'\x9f\ufdd0\ufdd1\ufdd2\ufdd3\ufdd4\ufdd5\ufdd6\ufdd7\ufdd8'
'\ufdd9\ufdda\ufddb\ufddc\ufddd\ufdde\ufddf\ufde0\ufde1'
'\ufde2\ufde3\ufde4\ufde5\ufde6\ufde7\ufde8\ufde9\ufdea'
'\ufdeb\ufdec\ufded\ufdee\ufdef\ufffe\uffff')
except:
pass
# Cover for potential encoding issues
try:
if sys.maxunicode >= 0x10000:
print(
'\U0001fffe\U0001ffff\U0002fffe\U0002ffff'
'\U0003fffe\U0003ffff\U0004fffe\U0004ffff'
'\U0005fffe\U0005ffff\U0006fffe\U0006ffff'
'\U0007fffe\U0007ffff\U0008fffe\U0008ffff'
'\U0009fffe\U0009ffff\U000afffe\U000affff'
'\U000bfffe\U000bffff\U000cfffe\U000cffff'
'\U000dfffe\U000dffff\U000efffe\U000effff'
'\U000ffffe\U000fffff\U0010fffe\U0010ffff')
except:
pass

@ -59,6 +59,7 @@ from mesonbuild.linkers import linkers
from mesonbuild.dependencies.pkgconfig import PkgConfigDependency from mesonbuild.dependencies.pkgconfig import PkgConfigDependency
from mesonbuild.build import Target, ConfigurationData, Executable, SharedLibrary, StaticLibrary from mesonbuild.build import Target, ConfigurationData, Executable, SharedLibrary, StaticLibrary
from mesonbuild import mtest
import mesonbuild.modules.pkgconfig import mesonbuild.modules.pkgconfig
from mesonbuild.scripts import destdir_join from mesonbuild.scripts import destdir_join
@ -398,6 +399,56 @@ class AllPlatformTests(BasePlatformTests):
self.assertTrue(compdb[3]['file'].endswith("libfile4.c")) self.assertTrue(compdb[3]['file'].endswith("libfile4.c"))
# FIXME: We don't have access to the linker command # FIXME: We don't have access to the linker command
def test_replace_unencodable_xml_chars(self):
'''
Test that unencodable xml chars are replaced with their
printable representation
https://github.com/mesonbuild/meson/issues/9894
'''
# Create base string(\nHello Meson\n) to see valid chars are not replaced
base_string_invalid = '\n\x48\x65\x6c\x6c\x6f\x20\x4d\x65\x73\x6f\x6e\n'
base_string_valid = '\nHello Meson\n'
# Create invalid input from all known unencodable chars
invalid_string = (
'\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f\x10\x11'
'\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x7f'
'\x80\x81\x82\x83\x84\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f'
'\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e'
'\x9f\ufdd0\ufdd1\ufdd2\ufdd3\ufdd4\ufdd5\ufdd6\ufdd7\ufdd8'
'\ufdd9\ufdda\ufddb\ufddc\ufddd\ufdde\ufddf\ufde0\ufde1'
'\ufde2\ufde3\ufde4\ufde5\ufde6\ufde7\ufde8\ufde9\ufdea'
'\ufdeb\ufdec\ufded\ufdee\ufdef\ufffe\uffff')
if sys.maxunicode >= 0x10000:
invalid_string = invalid_string + (
'\U0001fffe\U0001ffff\U0002fffe\U0002ffff'
'\U0003fffe\U0003ffff\U0004fffe\U0004ffff'
'\U0005fffe\U0005ffff\U0006fffe\U0006ffff'
'\U0007fffe\U0007ffff\U0008fffe\U0008ffff'
'\U0009fffe\U0009ffff\U000afffe\U000affff'
'\U000bfffe\U000bffff\U000cfffe\U000cffff'
'\U000dfffe\U000dffff\U000efffe\U000effff'
'\U000ffffe\U000fffff\U0010fffe\U0010ffff')
valid_string = base_string_valid + repr(invalid_string)[1:-1] + base_string_valid
invalid_string = base_string_invalid + invalid_string + base_string_invalid
broken_xml_stream = invalid_string.encode()
decoded_broken_stream = mtest.decode(broken_xml_stream)
self.assertEqual(decoded_broken_stream, valid_string)
def test_replace_unencodable_xml_chars_unit(self):
'''
Test that unencodable xml chars are replaced with their
printable representation
https://github.com/mesonbuild/meson/issues/9894
'''
if not shutil.which('xmllint'):
raise SkipTest('xmllint not installed')
testdir = os.path.join(self.unit_test_dir, '110 replace unencodable xml chars')
self.init(testdir)
self.run_tests()
junit_xml_logs = Path(self.logdir, 'testlog.junit.xml')
subprocess.run(['xmllint', junit_xml_logs], check=True)
def test_run_target_files_path(self): def test_run_target_files_path(self):
''' '''
Test that run_targets are run from the correct directory Test that run_targets are run from the correct directory

Loading…
Cancel
Save