From 7993747e139744fe6c8fb0f5cd324627629ec970 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20Cr=C3=AAte?= Date: Fri, 4 Jan 2019 08:42:01 -0500 Subject: [PATCH 1/2] Save surrogates as-is in log files When python sees an invalid character in a filename for the current locale, instead of clobbering it, it saves is as an invalid codepoint called a surrogate. We need to explicitly instruct the encoder to write those out as-is. In the JSON file, we replace them instead to produce valid json. --- mesonbuild/mtest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mesonbuild/mtest.py b/mesonbuild/mtest.py index 8ce95383b..b4bd4f233 100644 --- a/mesonbuild/mtest.py +++ b/mesonbuild/mtest.py @@ -647,8 +647,8 @@ Timeout: %4d self.logfilename = logfile_base + '.txt' self.jsonlogfilename = logfile_base + '.json' - self.jsonlogfile = open(self.jsonlogfilename, 'w', encoding='utf-8') - self.logfile = open(self.logfilename, 'w', encoding='utf-8') + self.jsonlogfile = open(self.jsonlogfilename, 'w', encoding='utf-8', errors='replace') + self.logfile = open(self.logfilename, 'w', encoding='utf-8', errors='surrogateescape') self.logfile.write('Log of Meson test suite run on %s\n\n' % datetime.datetime.now().isoformat()) From 92b343f2f73ad02f8c06a3468719a0d892e9050e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20Cr=C3=AAte?= Date: Fri, 4 Jan 2019 17:27:38 -0500 Subject: [PATCH 2/2] mesonmain: Force to output UTF-8 even when the locale isn't Otherwise Python gets all confused and it makes testing difficult. Also minimally emulate the behaviour of the normal object to make the rest of the code happy. --- mesonbuild/mesonmain.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/mesonbuild/mesonmain.py b/mesonbuild/mesonmain.py index c11d04410..69c3f9f57 100644 --- a/mesonbuild/mesonmain.py +++ b/mesonbuild/mesonmain.py @@ -17,6 +17,7 @@ import os.path import importlib import traceback import argparse +import codecs from . import mesonlib from . import mlog @@ -148,6 +149,17 @@ def run_script_command(script_name, script_args): mlog.exception(e) return 1 +def ensure_stdout_accepts_unicode(): + if sys.stdout.encoding and not sys.stdout.encoding.upper().startswith('UTF-'): + if sys.version_info >= (3, 7): + sys.stdout.reconfigure(errors='surrogateescape') + else: + sys.stdout = codecs.getwriter('utf-8')(sys.stdout.detach(), + errors='surrogateescape') + sys.stdout.encoding = 'UTF-8' + if not hasattr(sys.stdout, 'buffer'): + sys.stdout.buffer = sys.stdout.raw if hasattr(sys.stdout, 'raw') else sys.stdout + def run(original_args, mainfile): if sys.version_info < (3, 5): print('Meson works correctly only with python 3.5+.') @@ -155,6 +167,11 @@ def run(original_args, mainfile): print('Please update your environment') return 1 + # Meson gets confused if stdout can't output Unicode, if the + # locale isn't Unicode, just force stdout to accept it. This tries + # to emulate enough of PEP 540 to work elsewhere. + ensure_stdout_accepts_unicode() + # https://github.com/mesonbuild/meson/issues/3653 if sys.platform.lower() == 'msys': mlog.error('This python3 seems to be msys/python on MSYS2 Windows, which is known to have path semantics incompatible with Meson')