From ccc4ce28cc9077d77a0bc9e72b1177eba1be7186 Mon Sep 17 00:00:00 2001 From: Jon Turney Date: Sun, 28 Apr 2019 21:06:36 +0100 Subject: [PATCH] consistent invalid escape sequence behaviour * docs: document unrecognized escape sequence behaviour [skip ci] Document that unrecognized escape sequence behaviour is like python, not C. * Don't try to decode invalid hex escape sequences Don't try to decode escape sequences which should contain a sequence of hex digits, but don't, throwing a python exception. These will treated literally instead. * Extend test case to cover invalid escape sequences --- docs/markdown/Syntax.md | 3 +++ mesonbuild/mparser.py | 12 ++++++------ .../common/185 escape and unicode/meson.build | 14 ++++++++++++++ 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/docs/markdown/Syntax.md b/docs/markdown/Syntax.md index cf56dd3f4..aadb14afe 100644 --- a/docs/markdown/Syntax.md +++ b/docs/markdown/Syntax.md @@ -116,6 +116,9 @@ The full list of escape sequences is: As in python and C, up to three octal digits are accepted in `\ooo`. +Unrecognized escape sequences are left in the string unchanged, i.e., the +backslash is left in the string. + #### String concatenation Strings can be concatenated to form a new string using the `+` symbol. diff --git a/mesonbuild/mparser.py b/mesonbuild/mparser.py index 17783ce2d..4305a7cb4 100644 --- a/mesonbuild/mparser.py +++ b/mesonbuild/mparser.py @@ -21,12 +21,12 @@ from . import mlog # This is the regex for the supported escape sequences of a regular string # literal, like 'abc\x00' ESCAPE_SEQUENCE_SINGLE_RE = re.compile(r''' - ( \\U........ # 8-digit hex escapes - | \\u.... # 4-digit hex escapes - | \\x.. # 2-digit hex escapes - | \\[0-7]{1,3} # Octal escapes - | \\N\{[^}]+\} # Unicode characters by name - | \\[\\'abfnrtv] # Single-character escapes + ( \\U[A-Fa-f0-9]{8} # 8-digit hex escapes + | \\u[A-Fa-f0-9]{4} # 4-digit hex escapes + | \\x[A-Fa-f0-9]{2} # 2-digit hex escapes + | \\[0-7]{1,3} # Octal escapes + | \\N\{[^}]+\} # Unicode characters by name + | \\[\\'abfnrtv] # Single-character escapes )''', re.UNICODE | re.VERBOSE) class MesonUnicodeDecodeError(MesonException): diff --git a/test cases/common/185 escape and unicode/meson.build b/test cases/common/185 escape and unicode/meson.build index 65377b6a3..e4fe628ae 100644 --- a/test cases/common/185 escape and unicode/meson.build +++ b/test cases/common/185 escape and unicode/meson.build @@ -22,3 +22,17 @@ foreach l : find_file_list.stdout().strip('\x00').split('\x00') endforeach test('second', executable('second', found_files_hex + [gen_file])) + +# Unrecognized and malformed escape sequences are literal + +malformed = [ + [ '\c', 'c' ], + [ '\Uabcdefghi', 'Uabcdefghi'], + [ '\u123 ', 'u123 '], + [ '\xqr', 'xqr'], +] + +foreach m : malformed + assert(m[0].endswith(m[1]), 'bad escape sequence had unexpected end') + assert(m[0].startswith('\\'), 'bad escape sequence had unexpected start') +endforeach