parser: preserve escape chars in strings

use separate Node for multiline strings
pull/12152/head
Charles Brunet 2 years ago
parent a730a2fe21
commit 35936283d2
  1. 2
      mesonbuild/ast/interpreter.py
  2. 8
      mesonbuild/ast/introspection.py
  3. 12
      mesonbuild/ast/printer.py
  4. 6
      mesonbuild/ast/visitor.py
  5. 2
      mesonbuild/cmake/interpreter.py
  6. 2
      mesonbuild/coredata.py
  7. 2
      mesonbuild/interpreter/interpreter.py
  8. 4
      mesonbuild/interpreterbase/helpers.py
  9. 20
      mesonbuild/interpreterbase/interpreterbase.py
  10. 4
      mesonbuild/mintro.py
  11. 33
      mesonbuild/mparser.py
  12. 4
      mesonbuild/optinterpreter.py
  13. 24
      mesonbuild/rewriter.py

@ -239,7 +239,7 @@ class AstInterpreter(InterpreterBase):
def evaluate_dictstatement(self, node: mparser.DictNode) -> TYPE_nkwargs:
def resolve_key(node: mparser.BaseNode) -> str:
if isinstance(node, mparser.StringNode):
if isinstance(node, mparser.BaseStringNode):
return node.value
return '__AST_UNKNOWN__'
arguments, kwargs = self.reduce_arguments(node.args, key_resolver=resolve_key)

@ -27,7 +27,7 @@ from ..build import Executable, Jar, SharedLibrary, SharedModule, StaticLibrary
from ..compilers import detect_compiler_for
from ..interpreterbase import InvalidArguments
from ..mesonlib import MachineChoice, OptionKey
from ..mparser import BaseNode, ArithmeticNode, ArrayNode, ElementaryNode, IdNode, FunctionNode, StringNode
from ..mparser import BaseNode, ArithmeticNode, ArrayNode, ElementaryNode, IdNode, FunctionNode, BaseStringNode
from .interpreter import AstInterpreter
if T.TYPE_CHECKING:
@ -128,7 +128,7 @@ class IntrospectionInterpreter(AstInterpreter):
if not self.is_subproject() and 'subproject_dir' in kwargs:
spdirname = kwargs['subproject_dir']
if isinstance(spdirname, StringNode):
if isinstance(spdirname, BaseStringNode):
assert isinstance(spdirname.value, str)
self.subproject_dir = spdirname.value
if not self.is_subproject():
@ -174,7 +174,7 @@ class IntrospectionInterpreter(AstInterpreter):
for l in self.flatten_args(raw_langs):
if isinstance(l, str):
langs.append(l)
elif isinstance(l, StringNode):
elif isinstance(l, BaseStringNode):
langs.append(l.value)
for lang in sorted(langs, key=compilers.sort_clink):
@ -263,7 +263,7 @@ class IntrospectionInterpreter(AstInterpreter):
# Pop the first element if the function is a build target function
if isinstance(curr, FunctionNode) and curr.func_name in BUILD_TARGET_FUNCTIONS:
arg_nodes.pop(0)
elementary_nodes = [x for x in arg_nodes if isinstance(x, (str, StringNode))]
elementary_nodes = [x for x in arg_nodes if isinstance(x, (str, BaseStringNode))]
inqueue += [x for x in arg_nodes if isinstance(x, (FunctionNode, ArrayNode, IdNode, ArithmeticNode))]
if elementary_nodes:
res += [curr]

@ -84,7 +84,17 @@ class AstPrinter(AstVisitor):
def visit_FormatStringNode(self, node: mparser.FormatStringNode) -> None:
assert isinstance(node.value, str)
self.append("f'" + node.value + "'", node)
self.append("f'" + self.escape(node.value) + "'", node)
node.lineno = self.curr_line or node.lineno
def visit_MultilineStringNode(self, node: mparser.StringNode) -> None:
assert isinstance(node.value, str)
self.append("'''" + node.value + "'''", node)
node.lineno = self.curr_line or node.lineno
def visit_FormatMultilineStringNode(self, node: mparser.FormatStringNode) -> None:
assert isinstance(node.value, str)
self.append("f'''" + node.value + "'''", node)
node.lineno = self.curr_line or node.lineno
def visit_ContinueNode(self, node: mparser.ContinueNode) -> None:

@ -43,6 +43,12 @@ class AstVisitor:
def visit_FormatStringNode(self, node: mparser.FormatStringNode) -> None:
self.visit_default_func(node)
def visit_MultilineStringNode(self, node: mparser.StringNode) -> None:
self.visit_default_func(node)
def visit_FormatMultilineStringNode(self, node: mparser.FormatStringNode) -> None:
self.visit_default_func(node)
def visit_ContinueNode(self, node: mparser.ContinueNode) -> None:
self.visit_default_func(node)

@ -960,7 +960,7 @@ class CMakeInterpreter:
return Token(tid, self.subdir.as_posix(), 0, 0, 0, None, val)
def string(value: str) -> StringNode:
return StringNode(token(val=value))
return StringNode(token(val=value), escape=False)
def id_node(value: str) -> IdNode:
return IdNode(token(val=value))

@ -1098,7 +1098,7 @@ class MachineFileParser():
return section
def _evaluate_statement(self, node: mparser.BaseNode) -> T.Union[str, bool, int, T.List[str]]:
if isinstance(node, (mparser.StringNode)):
if isinstance(node, (mparser.BaseStringNode)):
return node.value
elif isinstance(node, mparser.BooleanNode):
return node.value

@ -536,7 +536,7 @@ class Interpreter(InterpreterBase, HoldableObject):
assert isinstance(kw, mparser.IdNode), 'for mypy'
if kw.value == 'meson_version':
# mypy does not understand "and isinstance"
if isinstance(val, mparser.StringNode):
if isinstance(val, mparser.BaseStringNode):
self.handle_meson_version(val.value, val)
def get_build_def_files(self) -> mesonlib.OrderedSet[str]:

@ -25,7 +25,7 @@ if T.TYPE_CHECKING:
from .baseobjects import TYPE_var, TYPE_kwargs, SubProject
def flatten(args: T.Union['TYPE_var', T.List['TYPE_var']]) -> T.List['TYPE_var']:
if isinstance(args, mparser.StringNode):
if isinstance(args, mparser.BaseStringNode):
assert isinstance(args.value, str)
return [args.value]
if not isinstance(args, collections.abc.Sequence):
@ -35,7 +35,7 @@ def flatten(args: T.Union['TYPE_var', T.List['TYPE_var']]) -> T.List['TYPE_var']
if isinstance(a, list):
rest = flatten(a)
result = result + rest
elif isinstance(a, mparser.StringNode):
elif isinstance(a, mparser.BaseStringNode):
result.append(a.value)
else:
result.append(a)

@ -196,8 +196,13 @@ class InterpreterBase:
self.assignment(cur)
elif isinstance(cur, mparser.MethodNode):
return self.method_call(cur)
elif isinstance(cur, mparser.StringNode):
return self._holderify(cur.value)
elif isinstance(cur, mparser.BaseStringNode):
if isinstance(cur, mparser.MultilineFormatStringNode):
return self.evaluate_multiline_fstring(cur)
elif isinstance(cur, mparser.FormatStringNode):
return self.evaluate_fstring(cur)
else:
return self._holderify(cur.value)
elif isinstance(cur, mparser.BooleanNode):
return self._holderify(cur.value)
elif isinstance(cur, mparser.IfClauseNode):
@ -230,11 +235,6 @@ class InterpreterBase:
return self.evaluate_indexing(cur)
elif isinstance(cur, mparser.TernaryNode):
return self.evaluate_ternary(cur)
elif isinstance(cur, mparser.FormatStringNode):
if isinstance(cur, mparser.MultilineFormatStringNode):
return self.evaluate_multiline_fstring(cur)
else:
return self.evaluate_fstring(cur)
elif isinstance(cur, mparser.ContinueNode):
raise ContinueRequest()
elif isinstance(cur, mparser.BreakNode):
@ -256,7 +256,7 @@ class InterpreterBase:
@FeatureNew('dict', '0.47.0')
def evaluate_dictstatement(self, cur: mparser.DictNode) -> InterpreterObject:
def resolve_key(key: mparser.BaseNode) -> str:
if not isinstance(key, mparser.StringNode):
if not isinstance(key, mparser.BaseStringNode):
FeatureNew.single_use('Dictionary entry using non literal key', '0.53.0', self.subproject)
key_holder = self.evaluate_statement(key)
if key_holder is None:
@ -428,9 +428,7 @@ class InterpreterBase:
return self.evaluate_fstring(node)
@FeatureNew('format strings', '0.58.0')
def evaluate_fstring(self, node: mparser.FormatStringNode) -> InterpreterObject:
assert isinstance(node, mparser.FormatStringNode)
def evaluate_fstring(self, node: T.Union[mparser.FormatStringNode, mparser.MultilineFormatStringNode]) -> InterpreterObject:
def replace(match: T.Match[str]) -> str:
var = str(match.group(1))
try:

@ -36,7 +36,7 @@ from .dependencies import Dependency
from . import environment
from .interpreterbase import ObjectHolder
from .mesonlib import OptionKey
from .mparser import FunctionNode, ArrayNode, ArgumentNode, StringNode
from .mparser import FunctionNode, ArrayNode, ArgumentNode, BaseStringNode
if T.TYPE_CHECKING:
import argparse
@ -194,7 +194,7 @@ def list_targets_from_source(intr: IntrospectionInterpreter) -> T.List[T.Dict[st
elif isinstance(n, ArgumentNode):
args = n.arguments
for j in args:
if isinstance(j, StringNode):
if isinstance(j, BaseStringNode):
assert isinstance(j.value, str)
res += [Path(j.value)]
elif isinstance(j, str):

@ -193,23 +193,15 @@ class Lexer:
elif tid == 'dblquote':
raise ParseException('Double quotes are not supported. Use single quotes.', self.getline(line_start), lineno, col)
elif tid in {'string', 'fstring'}:
# Handle here and not on the regexp to give a better error message.
if match_text.find("\n") != -1:
msg = ("Newline character in a string detected, use ''' (three single quotes) "
"for multiline strings instead.\n"
"This will become a hard error in a future Meson release.")
mlog.warning(mlog.code_line(msg, self.getline(line_start), col), location=BaseNode(lineno, col, filename))
value = match_text[2 if tid == 'fstring' else 1:-1]
value = ESCAPE_SEQUENCE_SINGLE_RE.sub(decode_match, value)
elif tid in {'multiline_string', 'multiline_fstring'}:
# For multiline strings, parse out the value and pass
# through the normal string logic.
# For multiline format strings, we have to emit a
# different AST node so we can add a feature check,
# but otherwise, it follows the normal fstring logic.
if tid == 'multiline_string':
value = match_text[3:-3]
tid = 'string'
else:
value = match_text[4:-3]
lines = match_text.split('\n')
@ -295,13 +287,30 @@ class NumberNode(ElementaryNode[int]):
self.value = int(token.value, base=0)
self.bytespan = token.bytespan
class StringNode(ElementaryNode[str]):
class BaseStringNode(ElementaryNode[str]):
pass
class FormatStringNode(ElementaryNode[str]):
@dataclass(unsafe_hash=True)
class StringNode(BaseStringNode):
raw_value: str = field(hash=False)
def __init__(self, token: Token[str], escape: bool = True):
super().__init__(token)
self.value = ESCAPE_SEQUENCE_SINGLE_RE.sub(decode_match, token.value) if escape else token.value
self.raw_value = token.value
class FormatStringNode(StringNode):
pass
class MultilineFormatStringNode(FormatStringNode):
@dataclass(unsafe_hash=True)
class MultilineStringNode(BaseStringNode):
def __init__(self, token: Token[str]):
super().__init__(token)
self.value = token.value
class MultilineFormatStringNode(MultilineStringNode):
pass
class ContinueNode(ElementaryNode):
@ -819,6 +828,8 @@ class Parser:
return StringNode(t)
if self.accept('fstring'):
return FormatStringNode(t)
if self.accept('multiline_string'):
return MultilineStringNode(t)
if self.accept('multiline_fstring'):
return MultilineFormatStringNode(t)
return EmptyNode(self.current.lineno, self.current.colno, self.current.filename)

@ -115,7 +115,7 @@ class OptionInterpreter:
return arg
if isinstance(arg, mparser.ParenthesizedNode):
return self.reduce_single(arg.inner)
elif isinstance(arg, (mparser.StringNode, mparser.BooleanNode,
elif isinstance(arg, (mparser.BaseStringNode, mparser.BooleanNode,
mparser.NumberNode)):
return arg.value
elif isinstance(arg, mparser.ArrayNode):
@ -123,7 +123,7 @@ class OptionInterpreter:
elif isinstance(arg, mparser.DictNode):
d = {}
for k, v in arg.args.kwargs.items():
if not isinstance(k, mparser.StringNode):
if not isinstance(k, mparser.BaseStringNode):
raise OptionException('Dictionary keys must be a string literal')
d[k.value] = self.reduce_single(v)
return d

@ -28,7 +28,7 @@ from .ast import IntrospectionInterpreter, BUILD_TARGET_FUNCTIONS, AstConditionL
from mesonbuild.mesonlib import MesonException, setup_vsenv
from . import mlog, environment
from functools import wraps
from .mparser import Token, ArrayNode, ArgumentNode, AssignmentNode, BooleanNode, ElementaryNode, IdNode, FunctionNode, StringNode
from .mparser import Token, ArrayNode, ArgumentNode, AssignmentNode, BaseStringNode, BooleanNode, ElementaryNode, IdNode, FunctionNode, StringNode
import json, os, re, sys
import typing as T
@ -267,12 +267,12 @@ class MTypeStrList(MTypeList):
return StringNode(Token('', '', 0, 0, 0, None, str(value)))
def _check_is_equal(self, node, value) -> bool:
if isinstance(node, StringNode):
if isinstance(node, BaseStringNode):
return node.value == value
return False
def _check_regex_matches(self, node, regex: str) -> bool:
if isinstance(node, StringNode):
if isinstance(node, BaseStringNode):
return re.match(regex, node.value) is not None
return False
@ -292,7 +292,7 @@ class MTypeIDList(MTypeList):
return False
def _check_regex_matches(self, node, regex: str) -> bool:
if isinstance(node, StringNode):
if isinstance(node, BaseStringNode):
return re.match(regex, node.value) is not None
return False
@ -652,7 +652,7 @@ class Rewriter:
src_list = []
for i in target['sources']:
for j in arg_list_from_node(i):
if isinstance(j, StringNode):
if isinstance(j, BaseStringNode):
src_list += [j.value]
# Generate the new String nodes
@ -686,7 +686,7 @@ class Rewriter:
def find_node(src):
for i in target['sources']:
for j in arg_list_from_node(i):
if isinstance(j, StringNode):
if isinstance(j, BaseStringNode):
if j.value == src:
return i, j
return None, None
@ -745,7 +745,7 @@ class Rewriter:
extra_files_list = []
for i in target['extra_files']:
for j in arg_list_from_node(i):
if isinstance(j, StringNode):
if isinstance(j, BaseStringNode):
extra_files_list += [j.value]
# Generate the new String nodes
@ -776,7 +776,7 @@ class Rewriter:
def find_node(src):
for i in target['extra_files']:
for j in arg_list_from_node(i):
if isinstance(j, StringNode):
if isinstance(j, BaseStringNode):
if j.value == src:
return i, j
return None, None
@ -845,12 +845,12 @@ class Rewriter:
src_list = []
for i in target['sources']:
for j in arg_list_from_node(i):
if isinstance(j, StringNode):
if isinstance(j, BaseStringNode):
src_list += [j.value]
extra_files_list = []
for i in target['extra_files']:
for j in arg_list_from_node(i):
if isinstance(j, StringNode):
if isinstance(j, BaseStringNode):
extra_files_list += [j.value]
test_data = {
'name': target['name'],
@ -865,8 +865,8 @@ class Rewriter:
alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
path_sorter = lambda key: ([(key.count('/') <= idx, alphanum_key(x)) for idx, x in enumerate(key.split('/'))])
unknown = [x for x in i.arguments if not isinstance(x, StringNode)]
sources = [x for x in i.arguments if isinstance(x, StringNode)]
unknown = [x for x in i.arguments if not isinstance(x, BaseStringNode)]
sources = [x for x in i.arguments if isinstance(x, BaseStringNode)]
sources = sorted(sources, key=lambda x: path_sorter(x.value))
i.arguments = unknown + sources

Loading…
Cancel
Save