parser: preserve whitespaces and comments

1 year ago · 11ef2a536c
parent 5b29eff8ad
commit 11ef2a536c
1 changed files with 60 additions and 7 deletions
--- a/mesonbuild/mparser.py
+++ b/mesonbuild/mparser.py
@ -116,7 +116,7 @@ class Lexer:
            self.keywords.update({'testcase', 'endtestcase'})
        self.token_specification = [
            # Need to be sorted longest to shortest.
-            ('ignore', re.compile(r'[ \t]')),
+            ('whitespace', re.compile(r'[ \t]+')),
            ('multiline_fstring', re.compile(r"f'''(.|\n)*?'''", re.M)),
            ('fstring', re.compile(r"f'([^'\\]|(\\.))*'")),
            ('id', re.compile('[_a-zA-Z][_0-9a-zA-Z]*')),
@ -178,9 +178,7 @@ class Lexer:
                    span_end = loc
                    bytespan = (span_start, span_end)
                    value = mo.group()
-                    if tid in {'ignore', 'comment'}:
-                        break
-                    elif tid == 'lparen':
+                    if tid == 'lparen':
                        par_count += 1
                    elif tid == 'rparen':
                        par_count -= 1
@ -210,12 +208,12 @@ class Lexer:
                    elif tid == 'eol_cont':
                        lineno += 1
                        line_start = loc
-                        break
+                        tid = 'whitespace'
                    elif tid == 'eol':
                        lineno += 1
                        line_start = loc
                        if par_count > 0 or bracket_count > 0 or curl_count > 0:
-                            break
+                            tid = 'whitespace'
                    elif tid == 'id':
                        if value in self.keywords:
                            tid = value
@ -235,6 +233,7 @@ class BaseNode:
    filename: str = field(hash=False)
    end_lineno: int = field(hash=False)
    end_colno: int = field(hash=False)
+    whitespaces: T.Optional[WhitespaceNode] = field(hash=False)

    def __init__(self, lineno: int, colno: int, filename: str,
                 end_lineno: T.Optional[int] = None, end_colno: T.Optional[int] = None) -> None:
@ -257,6 +256,26 @@ class BaseNode:
            if callable(func):
                func(self)

+    def append_whitespaces(self, token: Token) -> None:
+        if self.whitespaces is None:
+            self.whitespaces = WhitespaceNode(token)
+        else:
+            self.whitespaces.append(token)
+
+
+@dataclass(unsafe_hash=True)
+class WhitespaceNode(BaseNode):
+
+    value: str
+
+    def __init__(self, token: Token[str]):
+        super().__init__(token.lineno, token.colno, token.filename)
+        self.value = ''
+        self.append(token)
+
+    def append(self, token: Token[str]) -> None:
+        self.value += token.value
+
@dataclass(unsafe_hash=True)
 class ElementaryNode(T.Generic[TV_TokenTypes], BaseNode):

@ -456,6 +475,7 @@ class UMinusNode(UnaryOperatorNode):
@dataclass(unsafe_hash=True)
 class CodeBlockNode(BaseNode):

+    pre_whitespaces: T.Optional[WhitespaceNode] = field(hash=False)
    lines: T.List[BaseNode] = field(hash=False)

    def __init__(self, token: Token[TV_TokenTypes]):
@ -463,6 +483,14 @@ class CodeBlockNode(BaseNode):
        self.pre_whitespaces = None
        self.lines = []

+    def append_whitespaces(self, token: Token) -> None:
+        if self.lines:
+            self.lines[-1].append_whitespaces(token)
+        elif self.pre_whitespaces is None:
+            self.pre_whitespaces = WhitespaceNode(token)
+        else:
+            self.pre_whitespaces.append(token)
+
@dataclass(unsafe_hash=True)
 class IndexNode(BaseNode):

@ -669,12 +697,16 @@ class Parser:
        self.stream = self.lexer.lex(filename)
        self.current: Token = Token('eof', '', 0, 0, 0, (0, 0), None)
        self.previous = self.current
+        self.current_ws: T.List[Token] = []

        self.getsym()
        self.in_ternary = False

    def create_node(self, node_type: T.Type[BaseNodeT], *args: T.Any, **kwargs: T.Any) -> BaseNodeT:
        node = node_type(*args, **kwargs)
+        for ws_token in self.current_ws:
+            node.append_whitespaces(ws_token)
+        self.current_ws = []
        return node

    def getsym(self) -> None:
@ -682,6 +714,12 @@ class Parser:
        try:
            self.current = next(self.stream)

+            while self.current.tid in {'eol', 'comment', 'whitespace'}:
+                self.current_ws.append(self.current)
+                if self.current.tid == 'eol':
+                    break
+                self.current = next(self.stream)
+
        except StopIteration:
            self.current = Token('eof', '', self.current.line_start, self.current.lineno, self.current.colno + self.current.bytespan[1] - self.current.bytespan[0], (0, 0), None)

@ -782,11 +820,17 @@ class Parser:
                operator = self.create_node(SymbolNode, self.previous)
                return self.create_node(ComparisonNode, operator_type, left, operator, self.e5())
        if self.accept('not'):
+            ws = self.current_ws.copy()
            not_token = self.previous
            if self.accept('in'):
                in_token = self.previous
+                self.current_ws = self.current_ws[len(ws):]  # remove whitespaces between not and in
+                temp_node = EmptyNode(in_token.lineno, in_token.colno, in_token.filename)
+                for w in ws:
+                    temp_node.append_whitespaces(w)
+
                not_token.bytespan = (not_token.bytespan[0], in_token.bytespan[1])
-                not_token.value += in_token.value
+                not_token.value += temp_node.whitespaces.value + in_token.value
                operator = self.create_node(SymbolNode, not_token)
                return self.create_node(ComparisonNode, 'notin', left, operator, self.e5())
        return left
@ -1054,6 +1098,10 @@ class Parser:

        try:
            while cond:
+                for ws_token in self.current_ws:
+                    block.append_whitespaces(ws_token)
+                self.current_ws = []
+
                curline = self.line()

                if not isinstance(curline, EmptyNode):
@ -1065,4 +1113,9 @@ class Parser:
            e.ast = block
            raise

+        # Remaining whitespaces will not be catched since there are no more nodes
+        for ws_token in self.current_ws:
+            block.append_whitespaces(ws_token)
+        self.current_ws = []
+
        return block