mtest: fix unencodable XML chars

Replace unencodable XML chars with their printable representation, so
that, xmllint can parse test outputs without error.

Closes #9894

Co-authored-by: Tristan Partin <tristan@partin.io>
pull/11583/head
Nazir Bilal Yavuz 2 years ago committed by Dylan Baker
parent 61984bcfa3
commit bd3d2cf918
  1. 31
      mesonbuild/mtest.py
  2. 4
      test cases/unit/110 replace unencodable xml chars/meson.build
  3. 37
      test cases/unit/110 replace unencodable xml chars/script.py
  4. 51
      unittests/allplatformstests.py

@ -72,6 +72,26 @@ GNU_ERROR_RETURNCODE = 99
# Exit if 3 Ctrl-C's are received within one second
MAX_CTRLC = 3
# Define unencodable xml characters' regex for replacing them with their
# printable representation
UNENCODABLE_XML_UNICHRS: T.List[T.Tuple[int, int]] = [
(0x00, 0x08), (0x0B, 0x0C), (0x0E, 0x1F), (0x7F, 0x84),
(0x86, 0x9F), (0xFDD0, 0xFDEF), (0xFFFE, 0xFFFF)]
# Not narrow build
if sys.maxunicode >= 0x10000:
UNENCODABLE_XML_UNICHRS.extend([
(0x1FFFE, 0x1FFFF), (0x2FFFE, 0x2FFFF),
(0x3FFFE, 0x3FFFF), (0x4FFFE, 0x4FFFF),
(0x5FFFE, 0x5FFFF), (0x6FFFE, 0x6FFFF),
(0x7FFFE, 0x7FFFF), (0x8FFFE, 0x8FFFF),
(0x9FFFE, 0x9FFFF), (0xAFFFE, 0xAFFFF),
(0xBFFFE, 0xBFFFF), (0xCFFFE, 0xCFFFF),
(0xDFFFE, 0xDFFFF), (0xEFFFE, 0xEFFFF),
(0xFFFFE, 0xFFFFF), (0x10FFFE, 0x10FFFF)])
UNENCODABLE_XML_CHR_RANGES = [fr'{chr(low)}-{chr(high)}' for (low, high) in UNENCODABLE_XML_UNICHRS]
UNENCODABLE_XML_CHRS_RE = re.compile('([' + ''.join(UNENCODABLE_XML_CHR_RANGES) + '])')
def is_windows() -> bool:
platname = platform.system().lower()
return platname == 'windows'
@ -1148,14 +1168,21 @@ class TestRunRust(TestRun):
TestRun.PROTOCOL_TO_CLASS[TestProtocol.RUST] = TestRunRust
# Check unencodable characters in xml output and replace them with
# their printable representation
def replace_unencodable_xml_chars(original_str: str) -> str:
# [1:-1] is needed for removing `'` characters from both start and end
# of the string
replacement_lambda = lambda illegal_chr: repr(illegal_chr.group())[1:-1]
return UNENCODABLE_XML_CHRS_RE.sub(replacement_lambda, original_str)
def decode(stream: T.Union[None, bytes]) -> str:
if stream is None:
return ''
try:
return stream.decode('utf-8')
return replace_unencodable_xml_chars(stream.decode('utf-8'))
except UnicodeDecodeError:
return stream.decode('iso-8859-1', errors='ignore')
return replace_unencodable_xml_chars(stream.decode('iso-8859-1', errors='ignore'))
async def read_decode(reader: asyncio.StreamReader,
queue: T.Optional['asyncio.Queue[T.Optional[str]]'],

@ -0,0 +1,4 @@
project('replace unencodable xml chars')
test_script = find_program('script.py')
test('main', test_script)

@ -0,0 +1,37 @@
#!/usr/bin/env python3
import sys
# Print base string(\nHello Meson\n) to see valid chars are not replaced
print('\n\x48\x65\x6c\x6c\x6f\x20\x4d\x65\x73\x6f\x6e\n')
# Print invalid input from all known unencodable chars
print(
'\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f\x10\x11'
'\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x7f')
# Cover for potential encoding issues
try:
print(
'\x80\x81\x82\x83\x84\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f'
'\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e'
'\x9f\ufdd0\ufdd1\ufdd2\ufdd3\ufdd4\ufdd5\ufdd6\ufdd7\ufdd8'
'\ufdd9\ufdda\ufddb\ufddc\ufddd\ufdde\ufddf\ufde0\ufde1'
'\ufde2\ufde3\ufde4\ufde5\ufde6\ufde7\ufde8\ufde9\ufdea'
'\ufdeb\ufdec\ufded\ufdee\ufdef\ufffe\uffff')
except:
pass
# Cover for potential encoding issues
try:
if sys.maxunicode >= 0x10000:
print(
'\U0001fffe\U0001ffff\U0002fffe\U0002ffff'
'\U0003fffe\U0003ffff\U0004fffe\U0004ffff'
'\U0005fffe\U0005ffff\U0006fffe\U0006ffff'
'\U0007fffe\U0007ffff\U0008fffe\U0008ffff'
'\U0009fffe\U0009ffff\U000afffe\U000affff'
'\U000bfffe\U000bffff\U000cfffe\U000cffff'
'\U000dfffe\U000dffff\U000efffe\U000effff'
'\U000ffffe\U000fffff\U0010fffe\U0010ffff')
except:
pass

@ -59,6 +59,7 @@ from mesonbuild.linkers import linkers
from mesonbuild.dependencies.pkgconfig import PkgConfigDependency
from mesonbuild.build import Target, ConfigurationData, Executable, SharedLibrary, StaticLibrary
from mesonbuild import mtest
import mesonbuild.modules.pkgconfig
from mesonbuild.scripts import destdir_join
@ -398,6 +399,56 @@ class AllPlatformTests(BasePlatformTests):
self.assertTrue(compdb[3]['file'].endswith("libfile4.c"))
# FIXME: We don't have access to the linker command
def test_replace_unencodable_xml_chars(self):
'''
Test that unencodable xml chars are replaced with their
printable representation
https://github.com/mesonbuild/meson/issues/9894
'''
# Create base string(\nHello Meson\n) to see valid chars are not replaced
base_string_invalid = '\n\x48\x65\x6c\x6c\x6f\x20\x4d\x65\x73\x6f\x6e\n'
base_string_valid = '\nHello Meson\n'
# Create invalid input from all known unencodable chars
invalid_string = (
'\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f\x10\x11'
'\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x7f'
'\x80\x81\x82\x83\x84\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f'
'\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e'
'\x9f\ufdd0\ufdd1\ufdd2\ufdd3\ufdd4\ufdd5\ufdd6\ufdd7\ufdd8'
'\ufdd9\ufdda\ufddb\ufddc\ufddd\ufdde\ufddf\ufde0\ufde1'
'\ufde2\ufde3\ufde4\ufde5\ufde6\ufde7\ufde8\ufde9\ufdea'
'\ufdeb\ufdec\ufded\ufdee\ufdef\ufffe\uffff')
if sys.maxunicode >= 0x10000:
invalid_string = invalid_string + (
'\U0001fffe\U0001ffff\U0002fffe\U0002ffff'
'\U0003fffe\U0003ffff\U0004fffe\U0004ffff'
'\U0005fffe\U0005ffff\U0006fffe\U0006ffff'
'\U0007fffe\U0007ffff\U0008fffe\U0008ffff'
'\U0009fffe\U0009ffff\U000afffe\U000affff'
'\U000bfffe\U000bffff\U000cfffe\U000cffff'
'\U000dfffe\U000dffff\U000efffe\U000effff'
'\U000ffffe\U000fffff\U0010fffe\U0010ffff')
valid_string = base_string_valid + repr(invalid_string)[1:-1] + base_string_valid
invalid_string = base_string_invalid + invalid_string + base_string_invalid
broken_xml_stream = invalid_string.encode()
decoded_broken_stream = mtest.decode(broken_xml_stream)
self.assertEqual(decoded_broken_stream, valid_string)
def test_replace_unencodable_xml_chars_unit(self):
'''
Test that unencodable xml chars are replaced with their
printable representation
https://github.com/mesonbuild/meson/issues/9894
'''
if not shutil.which('xmllint'):
raise SkipTest('xmllint not installed')
testdir = os.path.join(self.unit_test_dir, '110 replace unencodable xml chars')
self.init(testdir)
self.run_tests()
junit_xml_logs = Path(self.logdir, 'testlog.junit.xml')
subprocess.run(['xmllint', junit_xml_logs], check=True)
def test_run_target_files_path(self):
'''
Test that run_targets are run from the correct directory

Loading…
Cancel
Save