From 35936283d24ed5a0aa76b184a7489d637d3e49c4 Mon Sep 17 00:00:00 2001 From: Charles Brunet Date: Wed, 23 Aug 2023 09:24:05 -0400 Subject: [PATCH] parser: preserve escape chars in strings use separate Node for multiline strings --- mesonbuild/ast/interpreter.py | 2 +- mesonbuild/ast/introspection.py | 8 ++--- mesonbuild/ast/printer.py | 12 ++++++- mesonbuild/ast/visitor.py | 6 ++++ mesonbuild/cmake/interpreter.py | 2 +- mesonbuild/coredata.py | 2 +- mesonbuild/interpreter/interpreter.py | 2 +- mesonbuild/interpreterbase/helpers.py | 4 +-- mesonbuild/interpreterbase/interpreterbase.py | 20 +++++------ mesonbuild/mintro.py | 4 +-- mesonbuild/mparser.py | 33 ++++++++++++------- mesonbuild/optinterpreter.py | 4 +-- mesonbuild/rewriter.py | 24 +++++++------- 13 files changed, 74 insertions(+), 49 deletions(-) diff --git a/mesonbuild/ast/interpreter.py b/mesonbuild/ast/interpreter.py index 9e098d0be..501be7dfe 100644 --- a/mesonbuild/ast/interpreter.py +++ b/mesonbuild/ast/interpreter.py @@ -239,7 +239,7 @@ class AstInterpreter(InterpreterBase): def evaluate_dictstatement(self, node: mparser.DictNode) -> TYPE_nkwargs: def resolve_key(node: mparser.BaseNode) -> str: - if isinstance(node, mparser.StringNode): + if isinstance(node, mparser.BaseStringNode): return node.value return '__AST_UNKNOWN__' arguments, kwargs = self.reduce_arguments(node.args, key_resolver=resolve_key) diff --git a/mesonbuild/ast/introspection.py b/mesonbuild/ast/introspection.py index e8055c529..50aa7444c 100644 --- a/mesonbuild/ast/introspection.py +++ b/mesonbuild/ast/introspection.py @@ -27,7 +27,7 @@ from ..build import Executable, Jar, SharedLibrary, SharedModule, StaticLibrary from ..compilers import detect_compiler_for from ..interpreterbase import InvalidArguments from ..mesonlib import MachineChoice, OptionKey -from ..mparser import BaseNode, ArithmeticNode, ArrayNode, ElementaryNode, IdNode, FunctionNode, StringNode +from ..mparser import BaseNode, ArithmeticNode, ArrayNode, ElementaryNode, IdNode, FunctionNode, BaseStringNode from .interpreter import AstInterpreter if T.TYPE_CHECKING: @@ -128,7 +128,7 @@ class IntrospectionInterpreter(AstInterpreter): if not self.is_subproject() and 'subproject_dir' in kwargs: spdirname = kwargs['subproject_dir'] - if isinstance(spdirname, StringNode): + if isinstance(spdirname, BaseStringNode): assert isinstance(spdirname.value, str) self.subproject_dir = spdirname.value if not self.is_subproject(): @@ -174,7 +174,7 @@ class IntrospectionInterpreter(AstInterpreter): for l in self.flatten_args(raw_langs): if isinstance(l, str): langs.append(l) - elif isinstance(l, StringNode): + elif isinstance(l, BaseStringNode): langs.append(l.value) for lang in sorted(langs, key=compilers.sort_clink): @@ -263,7 +263,7 @@ class IntrospectionInterpreter(AstInterpreter): # Pop the first element if the function is a build target function if isinstance(curr, FunctionNode) and curr.func_name in BUILD_TARGET_FUNCTIONS: arg_nodes.pop(0) - elementary_nodes = [x for x in arg_nodes if isinstance(x, (str, StringNode))] + elementary_nodes = [x for x in arg_nodes if isinstance(x, (str, BaseStringNode))] inqueue += [x for x in arg_nodes if isinstance(x, (FunctionNode, ArrayNode, IdNode, ArithmeticNode))] if elementary_nodes: res += [curr] diff --git a/mesonbuild/ast/printer.py b/mesonbuild/ast/printer.py index ebf63afeb..4626e5eed 100644 --- a/mesonbuild/ast/printer.py +++ b/mesonbuild/ast/printer.py @@ -84,7 +84,17 @@ class AstPrinter(AstVisitor): def visit_FormatStringNode(self, node: mparser.FormatStringNode) -> None: assert isinstance(node.value, str) - self.append("f'" + node.value + "'", node) + self.append("f'" + self.escape(node.value) + "'", node) + node.lineno = self.curr_line or node.lineno + + def visit_MultilineStringNode(self, node: mparser.StringNode) -> None: + assert isinstance(node.value, str) + self.append("'''" + node.value + "'''", node) + node.lineno = self.curr_line or node.lineno + + def visit_FormatMultilineStringNode(self, node: mparser.FormatStringNode) -> None: + assert isinstance(node.value, str) + self.append("f'''" + node.value + "'''", node) node.lineno = self.curr_line or node.lineno def visit_ContinueNode(self, node: mparser.ContinueNode) -> None: diff --git a/mesonbuild/ast/visitor.py b/mesonbuild/ast/visitor.py index 51a662024..69b93c7f0 100644 --- a/mesonbuild/ast/visitor.py +++ b/mesonbuild/ast/visitor.py @@ -43,6 +43,12 @@ class AstVisitor: def visit_FormatStringNode(self, node: mparser.FormatStringNode) -> None: self.visit_default_func(node) + def visit_MultilineStringNode(self, node: mparser.StringNode) -> None: + self.visit_default_func(node) + + def visit_FormatMultilineStringNode(self, node: mparser.FormatStringNode) -> None: + self.visit_default_func(node) + def visit_ContinueNode(self, node: mparser.ContinueNode) -> None: self.visit_default_func(node) diff --git a/mesonbuild/cmake/interpreter.py b/mesonbuild/cmake/interpreter.py index f577ba774..80dfff50a 100644 --- a/mesonbuild/cmake/interpreter.py +++ b/mesonbuild/cmake/interpreter.py @@ -960,7 +960,7 @@ class CMakeInterpreter: return Token(tid, self.subdir.as_posix(), 0, 0, 0, None, val) def string(value: str) -> StringNode: - return StringNode(token(val=value)) + return StringNode(token(val=value), escape=False) def id_node(value: str) -> IdNode: return IdNode(token(val=value)) diff --git a/mesonbuild/coredata.py b/mesonbuild/coredata.py index f151c7b81..f91c5837f 100644 --- a/mesonbuild/coredata.py +++ b/mesonbuild/coredata.py @@ -1098,7 +1098,7 @@ class MachineFileParser(): return section def _evaluate_statement(self, node: mparser.BaseNode) -> T.Union[str, bool, int, T.List[str]]: - if isinstance(node, (mparser.StringNode)): + if isinstance(node, (mparser.BaseStringNode)): return node.value elif isinstance(node, mparser.BooleanNode): return node.value diff --git a/mesonbuild/interpreter/interpreter.py b/mesonbuild/interpreter/interpreter.py index 9b005cb78..0814739f5 100644 --- a/mesonbuild/interpreter/interpreter.py +++ b/mesonbuild/interpreter/interpreter.py @@ -536,7 +536,7 @@ class Interpreter(InterpreterBase, HoldableObject): assert isinstance(kw, mparser.IdNode), 'for mypy' if kw.value == 'meson_version': # mypy does not understand "and isinstance" - if isinstance(val, mparser.StringNode): + if isinstance(val, mparser.BaseStringNode): self.handle_meson_version(val.value, val) def get_build_def_files(self) -> mesonlib.OrderedSet[str]: diff --git a/mesonbuild/interpreterbase/helpers.py b/mesonbuild/interpreterbase/helpers.py index f2ee1b1a9..917969b26 100644 --- a/mesonbuild/interpreterbase/helpers.py +++ b/mesonbuild/interpreterbase/helpers.py @@ -25,7 +25,7 @@ if T.TYPE_CHECKING: from .baseobjects import TYPE_var, TYPE_kwargs, SubProject def flatten(args: T.Union['TYPE_var', T.List['TYPE_var']]) -> T.List['TYPE_var']: - if isinstance(args, mparser.StringNode): + if isinstance(args, mparser.BaseStringNode): assert isinstance(args.value, str) return [args.value] if not isinstance(args, collections.abc.Sequence): @@ -35,7 +35,7 @@ def flatten(args: T.Union['TYPE_var', T.List['TYPE_var']]) -> T.List['TYPE_var'] if isinstance(a, list): rest = flatten(a) result = result + rest - elif isinstance(a, mparser.StringNode): + elif isinstance(a, mparser.BaseStringNode): result.append(a.value) else: result.append(a) diff --git a/mesonbuild/interpreterbase/interpreterbase.py b/mesonbuild/interpreterbase/interpreterbase.py index f35da33bd..eab96efb1 100644 --- a/mesonbuild/interpreterbase/interpreterbase.py +++ b/mesonbuild/interpreterbase/interpreterbase.py @@ -196,8 +196,13 @@ class InterpreterBase: self.assignment(cur) elif isinstance(cur, mparser.MethodNode): return self.method_call(cur) - elif isinstance(cur, mparser.StringNode): - return self._holderify(cur.value) + elif isinstance(cur, mparser.BaseStringNode): + if isinstance(cur, mparser.MultilineFormatStringNode): + return self.evaluate_multiline_fstring(cur) + elif isinstance(cur, mparser.FormatStringNode): + return self.evaluate_fstring(cur) + else: + return self._holderify(cur.value) elif isinstance(cur, mparser.BooleanNode): return self._holderify(cur.value) elif isinstance(cur, mparser.IfClauseNode): @@ -230,11 +235,6 @@ class InterpreterBase: return self.evaluate_indexing(cur) elif isinstance(cur, mparser.TernaryNode): return self.evaluate_ternary(cur) - elif isinstance(cur, mparser.FormatStringNode): - if isinstance(cur, mparser.MultilineFormatStringNode): - return self.evaluate_multiline_fstring(cur) - else: - return self.evaluate_fstring(cur) elif isinstance(cur, mparser.ContinueNode): raise ContinueRequest() elif isinstance(cur, mparser.BreakNode): @@ -256,7 +256,7 @@ class InterpreterBase: @FeatureNew('dict', '0.47.0') def evaluate_dictstatement(self, cur: mparser.DictNode) -> InterpreterObject: def resolve_key(key: mparser.BaseNode) -> str: - if not isinstance(key, mparser.StringNode): + if not isinstance(key, mparser.BaseStringNode): FeatureNew.single_use('Dictionary entry using non literal key', '0.53.0', self.subproject) key_holder = self.evaluate_statement(key) if key_holder is None: @@ -428,9 +428,7 @@ class InterpreterBase: return self.evaluate_fstring(node) @FeatureNew('format strings', '0.58.0') - def evaluate_fstring(self, node: mparser.FormatStringNode) -> InterpreterObject: - assert isinstance(node, mparser.FormatStringNode) - + def evaluate_fstring(self, node: T.Union[mparser.FormatStringNode, mparser.MultilineFormatStringNode]) -> InterpreterObject: def replace(match: T.Match[str]) -> str: var = str(match.group(1)) try: diff --git a/mesonbuild/mintro.py b/mesonbuild/mintro.py index 039153532..cf4a53ab0 100644 --- a/mesonbuild/mintro.py +++ b/mesonbuild/mintro.py @@ -36,7 +36,7 @@ from .dependencies import Dependency from . import environment from .interpreterbase import ObjectHolder from .mesonlib import OptionKey -from .mparser import FunctionNode, ArrayNode, ArgumentNode, StringNode +from .mparser import FunctionNode, ArrayNode, ArgumentNode, BaseStringNode if T.TYPE_CHECKING: import argparse @@ -194,7 +194,7 @@ def list_targets_from_source(intr: IntrospectionInterpreter) -> T.List[T.Dict[st elif isinstance(n, ArgumentNode): args = n.arguments for j in args: - if isinstance(j, StringNode): + if isinstance(j, BaseStringNode): assert isinstance(j.value, str) res += [Path(j.value)] elif isinstance(j, str): diff --git a/mesonbuild/mparser.py b/mesonbuild/mparser.py index f9b896759..614479466 100644 --- a/mesonbuild/mparser.py +++ b/mesonbuild/mparser.py @@ -193,23 +193,15 @@ class Lexer: elif tid == 'dblquote': raise ParseException('Double quotes are not supported. Use single quotes.', self.getline(line_start), lineno, col) elif tid in {'string', 'fstring'}: - # Handle here and not on the regexp to give a better error message. if match_text.find("\n") != -1: msg = ("Newline character in a string detected, use ''' (three single quotes) " "for multiline strings instead.\n" "This will become a hard error in a future Meson release.") mlog.warning(mlog.code_line(msg, self.getline(line_start), col), location=BaseNode(lineno, col, filename)) value = match_text[2 if tid == 'fstring' else 1:-1] - value = ESCAPE_SEQUENCE_SINGLE_RE.sub(decode_match, value) elif tid in {'multiline_string', 'multiline_fstring'}: - # For multiline strings, parse out the value and pass - # through the normal string logic. - # For multiline format strings, we have to emit a - # different AST node so we can add a feature check, - # but otherwise, it follows the normal fstring logic. if tid == 'multiline_string': value = match_text[3:-3] - tid = 'string' else: value = match_text[4:-3] lines = match_text.split('\n') @@ -295,13 +287,30 @@ class NumberNode(ElementaryNode[int]): self.value = int(token.value, base=0) self.bytespan = token.bytespan -class StringNode(ElementaryNode[str]): +class BaseStringNode(ElementaryNode[str]): pass -class FormatStringNode(ElementaryNode[str]): +@dataclass(unsafe_hash=True) +class StringNode(BaseStringNode): + + raw_value: str = field(hash=False) + + def __init__(self, token: Token[str], escape: bool = True): + super().__init__(token) + self.value = ESCAPE_SEQUENCE_SINGLE_RE.sub(decode_match, token.value) if escape else token.value + self.raw_value = token.value + +class FormatStringNode(StringNode): pass -class MultilineFormatStringNode(FormatStringNode): +@dataclass(unsafe_hash=True) +class MultilineStringNode(BaseStringNode): + + def __init__(self, token: Token[str]): + super().__init__(token) + self.value = token.value + +class MultilineFormatStringNode(MultilineStringNode): pass class ContinueNode(ElementaryNode): @@ -819,6 +828,8 @@ class Parser: return StringNode(t) if self.accept('fstring'): return FormatStringNode(t) + if self.accept('multiline_string'): + return MultilineStringNode(t) if self.accept('multiline_fstring'): return MultilineFormatStringNode(t) return EmptyNode(self.current.lineno, self.current.colno, self.current.filename) diff --git a/mesonbuild/optinterpreter.py b/mesonbuild/optinterpreter.py index 37ef55ab0..9eeb3fdc6 100644 --- a/mesonbuild/optinterpreter.py +++ b/mesonbuild/optinterpreter.py @@ -115,7 +115,7 @@ class OptionInterpreter: return arg if isinstance(arg, mparser.ParenthesizedNode): return self.reduce_single(arg.inner) - elif isinstance(arg, (mparser.StringNode, mparser.BooleanNode, + elif isinstance(arg, (mparser.BaseStringNode, mparser.BooleanNode, mparser.NumberNode)): return arg.value elif isinstance(arg, mparser.ArrayNode): @@ -123,7 +123,7 @@ class OptionInterpreter: elif isinstance(arg, mparser.DictNode): d = {} for k, v in arg.args.kwargs.items(): - if not isinstance(k, mparser.StringNode): + if not isinstance(k, mparser.BaseStringNode): raise OptionException('Dictionary keys must be a string literal') d[k.value] = self.reduce_single(v) return d diff --git a/mesonbuild/rewriter.py b/mesonbuild/rewriter.py index a9b2e881c..0d7b111c4 100644 --- a/mesonbuild/rewriter.py +++ b/mesonbuild/rewriter.py @@ -28,7 +28,7 @@ from .ast import IntrospectionInterpreter, BUILD_TARGET_FUNCTIONS, AstConditionL from mesonbuild.mesonlib import MesonException, setup_vsenv from . import mlog, environment from functools import wraps -from .mparser import Token, ArrayNode, ArgumentNode, AssignmentNode, BooleanNode, ElementaryNode, IdNode, FunctionNode, StringNode +from .mparser import Token, ArrayNode, ArgumentNode, AssignmentNode, BaseStringNode, BooleanNode, ElementaryNode, IdNode, FunctionNode, StringNode import json, os, re, sys import typing as T @@ -267,12 +267,12 @@ class MTypeStrList(MTypeList): return StringNode(Token('', '', 0, 0, 0, None, str(value))) def _check_is_equal(self, node, value) -> bool: - if isinstance(node, StringNode): + if isinstance(node, BaseStringNode): return node.value == value return False def _check_regex_matches(self, node, regex: str) -> bool: - if isinstance(node, StringNode): + if isinstance(node, BaseStringNode): return re.match(regex, node.value) is not None return False @@ -292,7 +292,7 @@ class MTypeIDList(MTypeList): return False def _check_regex_matches(self, node, regex: str) -> bool: - if isinstance(node, StringNode): + if isinstance(node, BaseStringNode): return re.match(regex, node.value) is not None return False @@ -652,7 +652,7 @@ class Rewriter: src_list = [] for i in target['sources']: for j in arg_list_from_node(i): - if isinstance(j, StringNode): + if isinstance(j, BaseStringNode): src_list += [j.value] # Generate the new String nodes @@ -686,7 +686,7 @@ class Rewriter: def find_node(src): for i in target['sources']: for j in arg_list_from_node(i): - if isinstance(j, StringNode): + if isinstance(j, BaseStringNode): if j.value == src: return i, j return None, None @@ -745,7 +745,7 @@ class Rewriter: extra_files_list = [] for i in target['extra_files']: for j in arg_list_from_node(i): - if isinstance(j, StringNode): + if isinstance(j, BaseStringNode): extra_files_list += [j.value] # Generate the new String nodes @@ -776,7 +776,7 @@ class Rewriter: def find_node(src): for i in target['extra_files']: for j in arg_list_from_node(i): - if isinstance(j, StringNode): + if isinstance(j, BaseStringNode): if j.value == src: return i, j return None, None @@ -845,12 +845,12 @@ class Rewriter: src_list = [] for i in target['sources']: for j in arg_list_from_node(i): - if isinstance(j, StringNode): + if isinstance(j, BaseStringNode): src_list += [j.value] extra_files_list = [] for i in target['extra_files']: for j in arg_list_from_node(i): - if isinstance(j, StringNode): + if isinstance(j, BaseStringNode): extra_files_list += [j.value] test_data = { 'name': target['name'], @@ -865,8 +865,8 @@ class Rewriter: alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)] path_sorter = lambda key: ([(key.count('/') <= idx, alphanum_key(x)) for idx, x in enumerate(key.split('/'))]) - unknown = [x for x in i.arguments if not isinstance(x, StringNode)] - sources = [x for x in i.arguments if isinstance(x, StringNode)] + unknown = [x for x in i.arguments if not isinstance(x, BaseStringNode)] + sources = [x for x in i.arguments if isinstance(x, BaseStringNode)] sources = sorted(sources, key=lambda x: path_sorter(x.value)) i.arguments = unknown + sources