Complete python escape sequences aware strings

Fixes #3169
pull/3421/head
Tim 'mithro' Ansell 7 years ago committed by Nirbheek Chauhan
parent 6089631a1b
commit 36aab0f4b2
  1. 20
      docs/markdown/Syntax.md
  2. 34
      mesonbuild/mparser.py
  3. 5
      test cases/common/190 escape and unicode/file.c.in
  4. 10
      test cases/common/190 escape and unicode/file.py
  5. 9
      test cases/common/190 escape and unicode/find.py
  6. 3
      test cases/common/190 escape and unicode/fun.c
  7. 12
      test cases/common/190 escape and unicode/main.c
  8. 25
      test cases/common/190 escape and unicode/meson.build
  9. 4
      test cases/common/33 try compile/meson.build
  10. 4
      test cases/common/39 tryrun/meson.build
  11. 18
      test cases/common/42 string operations/meson.build

@ -90,8 +90,24 @@ single quote do it like this:
single quote = 'contains a \' character'
```
Similarly `\n` gets converted to a newline and `\\` to a single
backslash.
The full list of escape sequences is:
* `\\` Backslash
* `\'` Single quote
* `\a` Bell
* `\b` Backspace
* `\f` Formfeed
* `\n` Newline
* `\r` Carriage Return
* `\t` Horizontal Tab
* `\v` Vertical Tab
* `\ooo` Character with octal value ooo
* `\xhh` Character with hex value hh
* `\uxxxx` Character with 16-bit hex value xxxx
* `\Uxxxxxxxx` Character with 32-bit hex value xxxxxxxx
* `\N{name}` Character named name in Unicode database
As in python and C, up to three octal digits are accepted in `\ooo`.
#### String concatenation

@ -13,9 +13,36 @@
# limitations under the License.
import re
import codecs
from .mesonlib import MesonException
from . import mlog
# This is the regex for the supported escape sequences of a regular string
# literal, like 'abc\x00'
ESCAPE_SEQUENCE_SINGLE_RE = re.compile(r'''
( \\U........ # 8-digit hex escapes
| \\u.... # 4-digit hex escapes
| \\x.. # 2-digit hex escapes
| \\[0-7]{1,3} # Octal escapes
| \\N\{[^}]+\} # Unicode characters by name
| \\[\\'abfnrtv] # Single-character escapes
)''', re.UNICODE | re.VERBOSE)
# This is the regex for the supported escape sequences of a multiline string
# literal, like '''abc\x00'''. The only difference is that single quote (')
# doesn't require escaping.
ESCAPE_SEQUENCE_MULTI_RE = re.compile(r'''
( \\U........ # 8-digit hex escapes
| \\u.... # 4-digit hex escapes
| \\x.. # 2-digit hex escapes
| \\[0-7]{1,3} # Octal escapes
| \\N\{[^}]+\} # Unicode characters by name
| \\[\\abfnrtv] # Single-character escapes
)''', re.UNICODE | re.VERBOSE)
def decode_match(match):
return codecs.decode(match.group(0), 'unicode_escape')
class ParseException(MesonException):
def __init__(self, text, line, lineno, colno):
# Format as error message, followed by the line with the error, followed by a caret to show the error column.
@ -112,7 +139,6 @@ class Lexer:
par_count = 0
bracket_count = 0
col = 0
newline_rx = re.compile(r'(?<!\\)((?:\\\\)*)\\n')
while loc < len(self.code):
matched = False
value = None
@ -145,12 +171,12 @@ class Lexer:
if match_text.find("\n") != -1:
mlog.warning("""Newline character in a string detected, use ''' (three single quotes) for multiline strings instead.
This will become a hard error in a future Meson release.""", self.getline(line_start), lineno, col)
value = match_text[1:-1].replace(r"\'", "'")
value = newline_rx.sub(r'\1\n', value)
value = value.replace(r" \\ ".strip(), r" \ ".strip())
value = match_text[1:-1]
value = ESCAPE_SEQUENCE_SINGLE_RE.sub(decode_match, value)
elif tid == 'multiline_string':
tid = 'string'
value = match_text[3:-3]
value = ESCAPE_SEQUENCE_MULTI_RE.sub(decode_match, value)
lines = match_text.split('\n')
if len(lines) > 1:
lineno += len(lines) - 1

@ -0,0 +1,5 @@
#include<stdio.h>
const char* does_it_work() {
printf("{NAME}\n");
return "yes it does";
}

@ -0,0 +1,10 @@
#!/usr/bin/env python3
import sys
import os
with open(sys.argv[1]) as fh:
content = fh.read().replace("{NAME}", sys.argv[2])
with open(os.path.join(sys.argv[3]), 'w') as fh:
fh.write(content)

@ -0,0 +1,9 @@
#!/usr/bin/env python3
import os
import sys
for fh in os.listdir('.'):
if os.path.isfile(fh):
if fh.endswith('.c'):
sys.stdout.write(fh + '\0')

@ -0,0 +1,3 @@
int a_fun() {
return 1;
}

@ -0,0 +1,12 @@
#include <string.h>
const char* does_it_work();
int a_fun();
int main() {
if(strcmp(does_it_work(), "yes it does") != 0) {
return -a_fun();
}
return 0;
}

@ -0,0 +1,25 @@
project('180 escape', 'c')
gen = generator(find_program('file.py'), arguments:['@INPUT@', 'erd\u0151', '@OUTPUT@'], output: '@BASENAME@')
gen_file = gen.process('file.c.in')
find_file_list = run_command(find_program('find.py'))
assert(find_file_list.returncode() == 0, 'Didn\'t find any files.')
# Strings should support both octal \ooo and hex \xhh encodings
found_files_oct = []
foreach l : find_file_list.stdout().strip('\0').split('\000')
found_files_oct += [files(l)]
endforeach
test('first', executable('first', found_files_oct + [gen_file]))
found_files_hex = []
foreach l : find_file_list.stdout().strip('\x00').split('\x00')
found_files_hex += [files(l)]
endforeach
test('second', executable('second', found_files_hex + [gen_file]))

@ -1,11 +1,11 @@
project('try compile', 'c', 'cpp')
code = '''#include<stdio.h>
void func() { printf("Something.\n"); }
void func() { printf("Something.\\n"); }
'''
breakcode = '''#include<nonexisting.h>
void func() { printf("This won't work.\n"); }
void func() { printf("This won't work.\\n"); }
'''
foreach compiler : [meson.get_compiler('c'), meson.get_compiler('cpp')]

@ -13,8 +13,8 @@ endif
ok_code = '''#include<stdio.h>
int main(int argc, char **argv) {
printf("%s\n", "stdout");
fprintf(stderr, "%s\n", "stderr");
printf("%s\\n", "stdout");
fprintf(stderr, "%s\\n", "stderr");
return 0;
}
'''

@ -77,21 +77,21 @@ assert('"1.1.20"'.strip('"') == '1.1.20', '" badly stripped')
assert('"1.1.20"'.strip('".') == '1.1.20', '". badly stripped')
assert('"1.1.20" '.strip('" ') == '1.1.20', '". badly stripped')
bs_b = '''\b'''
bs_bs_b = '''\\b'''
bs_c = '''\c'''
bs_bs_c = '''\\\c'''
nl = '''
'''
bs_n = '''\n'''
bs_n = '''\\n'''
bs_nl = '''\
'''
bs_bs_n = '''\\n'''
bs_bs_nl = '''\\
bs_bs_n = '''\\\\n'''
bs_bs_nl = '''\\\\
'''
assert('\b' == bs_b, 'Single backslash broken')
assert('\\b' == bs_b, 'Double backslash broken')
assert('\\\b' == bs_bs_b, 'Three backslash broken')
assert('\\\\b' == bs_bs_b, 'Four backslash broken')
assert('\c' == bs_c, 'Single backslash broken')
assert('\\c' == bs_c, 'Double backslash broken')
assert('\\\c' == bs_bs_c, 'Three backslash broken')
assert('\\\\c' == bs_bs_c, 'Four backslash broken')
assert('\n' == nl, 'Newline escape broken')
assert('\\n' == bs_n, 'Double backslash broken before n')
assert('\\\n' == bs_nl, 'Three backslash broken before n')

Loading…
Cancel
Save