From 36aab0f4b2a05a1ae4c8603e57b4c3c684fe8ea8 Mon Sep 17 00:00:00 2001 From: Tim 'mithro' Ansell Date: Sat, 3 Mar 2018 15:00:55 -0800 Subject: [PATCH] Complete python escape sequences aware strings Fixes #3169 --- docs/markdown/Syntax.md | 20 +++++++++-- mesonbuild/mparser.py | 34 ++++++++++++++++--- .../common/190 escape and unicode/file.c.in | 5 +++ .../common/190 escape and unicode/file.py | 10 ++++++ .../common/190 escape and unicode/find.py | 9 +++++ .../common/190 escape and unicode/fun.c | 3 ++ .../common/190 escape and unicode/main.c | 12 +++++++ .../common/190 escape and unicode/meson.build | 25 ++++++++++++++ test cases/common/33 try compile/meson.build | 4 +-- test cases/common/39 tryrun/meson.build | 4 +-- .../common/42 string operations/meson.build | 18 +++++----- 11 files changed, 125 insertions(+), 19 deletions(-) create mode 100644 test cases/common/190 escape and unicode/file.c.in create mode 100644 test cases/common/190 escape and unicode/file.py create mode 100644 test cases/common/190 escape and unicode/find.py create mode 100644 test cases/common/190 escape and unicode/fun.c create mode 100644 test cases/common/190 escape and unicode/main.c create mode 100644 test cases/common/190 escape and unicode/meson.build diff --git a/docs/markdown/Syntax.md b/docs/markdown/Syntax.md index 1005100fe..01c8c6e95 100644 --- a/docs/markdown/Syntax.md +++ b/docs/markdown/Syntax.md @@ -90,8 +90,24 @@ single quote do it like this: single quote = 'contains a \' character' ``` -Similarly `\n` gets converted to a newline and `\\` to a single -backslash. +The full list of escape sequences is: + +* `\\` Backslash +* `\'` Single quote +* `\a` Bell +* `\b` Backspace +* `\f` Formfeed +* `\n` Newline +* `\r` Carriage Return +* `\t` Horizontal Tab +* `\v` Vertical Tab +* `\ooo` Character with octal value ooo +* `\xhh` Character with hex value hh +* `\uxxxx` Character with 16-bit hex value xxxx +* `\Uxxxxxxxx` Character with 32-bit hex value xxxxxxxx +* `\N{name}` Character named name in Unicode database + +As in python and C, up to three octal digits are accepted in `\ooo`. #### String concatenation diff --git a/mesonbuild/mparser.py b/mesonbuild/mparser.py index 0e7524c3c..bf7c271d0 100644 --- a/mesonbuild/mparser.py +++ b/mesonbuild/mparser.py @@ -13,9 +13,36 @@ # limitations under the License. import re +import codecs from .mesonlib import MesonException from . import mlog +# This is the regex for the supported escape sequences of a regular string +# literal, like 'abc\x00' +ESCAPE_SEQUENCE_SINGLE_RE = re.compile(r''' + ( \\U........ # 8-digit hex escapes + | \\u.... # 4-digit hex escapes + | \\x.. # 2-digit hex escapes + | \\[0-7]{1,3} # Octal escapes + | \\N\{[^}]+\} # Unicode characters by name + | \\[\\'abfnrtv] # Single-character escapes + )''', re.UNICODE | re.VERBOSE) + +# This is the regex for the supported escape sequences of a multiline string +# literal, like '''abc\x00'''. The only difference is that single quote (') +# doesn't require escaping. +ESCAPE_SEQUENCE_MULTI_RE = re.compile(r''' + ( \\U........ # 8-digit hex escapes + | \\u.... # 4-digit hex escapes + | \\x.. # 2-digit hex escapes + | \\[0-7]{1,3} # Octal escapes + | \\N\{[^}]+\} # Unicode characters by name + | \\[\\abfnrtv] # Single-character escapes + )''', re.UNICODE | re.VERBOSE) + +def decode_match(match): + return codecs.decode(match.group(0), 'unicode_escape') + class ParseException(MesonException): def __init__(self, text, line, lineno, colno): # Format as error message, followed by the line with the error, followed by a caret to show the error column. @@ -112,7 +139,6 @@ class Lexer: par_count = 0 bracket_count = 0 col = 0 - newline_rx = re.compile(r'(? 1: lineno += len(lines) - 1 diff --git a/test cases/common/190 escape and unicode/file.c.in b/test cases/common/190 escape and unicode/file.c.in new file mode 100644 index 000000000..413ed4297 --- /dev/null +++ b/test cases/common/190 escape and unicode/file.c.in @@ -0,0 +1,5 @@ +#include +const char* does_it_work() { + printf("{NAME}\n"); + return "yes it does"; +} diff --git a/test cases/common/190 escape and unicode/file.py b/test cases/common/190 escape and unicode/file.py new file mode 100644 index 000000000..af67a0950 --- /dev/null +++ b/test cases/common/190 escape and unicode/file.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python3 + +import sys +import os + +with open(sys.argv[1]) as fh: + content = fh.read().replace("{NAME}", sys.argv[2]) + +with open(os.path.join(sys.argv[3]), 'w') as fh: + fh.write(content) diff --git a/test cases/common/190 escape and unicode/find.py b/test cases/common/190 escape and unicode/find.py new file mode 100644 index 000000000..34a3eb835 --- /dev/null +++ b/test cases/common/190 escape and unicode/find.py @@ -0,0 +1,9 @@ +#!/usr/bin/env python3 + +import os +import sys + +for fh in os.listdir('.'): + if os.path.isfile(fh): + if fh.endswith('.c'): + sys.stdout.write(fh + '\0') diff --git a/test cases/common/190 escape and unicode/fun.c b/test cases/common/190 escape and unicode/fun.c new file mode 100644 index 000000000..8eeb8ea00 --- /dev/null +++ b/test cases/common/190 escape and unicode/fun.c @@ -0,0 +1,3 @@ +int a_fun() { + return 1; +} diff --git a/test cases/common/190 escape and unicode/main.c b/test cases/common/190 escape and unicode/main.c new file mode 100644 index 000000000..0bcde169c --- /dev/null +++ b/test cases/common/190 escape and unicode/main.c @@ -0,0 +1,12 @@ +#include + +const char* does_it_work(); + +int a_fun(); + +int main() { + if(strcmp(does_it_work(), "yes it does") != 0) { + return -a_fun(); + } + return 0; +} diff --git a/test cases/common/190 escape and unicode/meson.build b/test cases/common/190 escape and unicode/meson.build new file mode 100644 index 000000000..be1107326 --- /dev/null +++ b/test cases/common/190 escape and unicode/meson.build @@ -0,0 +1,25 @@ +project('180 escape', 'c') + +gen = generator(find_program('file.py'), arguments:['@INPUT@', 'erd\u0151', '@OUTPUT@'], output: '@BASENAME@') + +gen_file = gen.process('file.c.in') + +find_file_list = run_command(find_program('find.py')) +assert(find_file_list.returncode() == 0, 'Didn\'t find any files.') + +# Strings should support both octal \ooo and hex \xhh encodings + +found_files_oct = [] +foreach l : find_file_list.stdout().strip('\0').split('\000') + found_files_oct += [files(l)] +endforeach + +test('first', executable('first', found_files_oct + [gen_file])) + +found_files_hex = [] +foreach l : find_file_list.stdout().strip('\x00').split('\x00') + found_files_hex += [files(l)] +endforeach + +test('second', executable('second', found_files_hex + [gen_file])) + diff --git a/test cases/common/33 try compile/meson.build b/test cases/common/33 try compile/meson.build index 09ca395be..cb1037d30 100644 --- a/test cases/common/33 try compile/meson.build +++ b/test cases/common/33 try compile/meson.build @@ -1,11 +1,11 @@ project('try compile', 'c', 'cpp') code = '''#include -void func() { printf("Something.\n"); } +void func() { printf("Something.\\n"); } ''' breakcode = '''#include -void func() { printf("This won't work.\n"); } +void func() { printf("This won't work.\\n"); } ''' foreach compiler : [meson.get_compiler('c'), meson.get_compiler('cpp')] diff --git a/test cases/common/39 tryrun/meson.build b/test cases/common/39 tryrun/meson.build index c64446ff0..daf5be798 100644 --- a/test cases/common/39 tryrun/meson.build +++ b/test cases/common/39 tryrun/meson.build @@ -13,8 +13,8 @@ endif ok_code = '''#include int main(int argc, char **argv) { - printf("%s\n", "stdout"); - fprintf(stderr, "%s\n", "stderr"); + printf("%s\\n", "stdout"); + fprintf(stderr, "%s\\n", "stderr"); return 0; } ''' diff --git a/test cases/common/42 string operations/meson.build b/test cases/common/42 string operations/meson.build index a43de707e..1c289eb1b 100644 --- a/test cases/common/42 string operations/meson.build +++ b/test cases/common/42 string operations/meson.build @@ -77,21 +77,21 @@ assert('"1.1.20"'.strip('"') == '1.1.20', '" badly stripped') assert('"1.1.20"'.strip('".') == '1.1.20', '". badly stripped') assert('"1.1.20" '.strip('" ') == '1.1.20', '". badly stripped') -bs_b = '''\b''' -bs_bs_b = '''\\b''' +bs_c = '''\c''' +bs_bs_c = '''\\\c''' nl = ''' ''' -bs_n = '''\n''' +bs_n = '''\\n''' bs_nl = '''\ ''' -bs_bs_n = '''\\n''' -bs_bs_nl = '''\\ +bs_bs_n = '''\\\\n''' +bs_bs_nl = '''\\\\ ''' -assert('\b' == bs_b, 'Single backslash broken') -assert('\\b' == bs_b, 'Double backslash broken') -assert('\\\b' == bs_bs_b, 'Three backslash broken') -assert('\\\\b' == bs_bs_b, 'Four backslash broken') +assert('\c' == bs_c, 'Single backslash broken') +assert('\\c' == bs_c, 'Double backslash broken') +assert('\\\c' == bs_bs_c, 'Three backslash broken') +assert('\\\\c' == bs_bs_c, 'Four backslash broken') assert('\n' == nl, 'Newline escape broken') assert('\\n' == bs_n, 'Double backslash broken before n') assert('\\\n' == bs_nl, 'Three backslash broken before n')