The Meson Build System
http://mesonbuild.com/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1111 lines
40 KiB
1111 lines
40 KiB
# SPDX-License-Identifier: Apache-2.0 |
|
# Copyright 2014-2017 The Meson development team |
|
|
|
from __future__ import annotations |
|
from dataclasses import dataclass, field |
|
import re |
|
import codecs |
|
import os |
|
import typing as T |
|
|
|
from .mesonlib import MesonException |
|
from . import mlog |
|
|
|
if T.TYPE_CHECKING: |
|
from typing_extensions import Literal |
|
|
|
from .ast import AstVisitor |
|
|
|
BaseNodeT = T.TypeVar('BaseNodeT', bound='BaseNode') |
|
|
|
# This is the regex for the supported escape sequences of a regular string |
|
# literal, like 'abc\x00' |
|
ESCAPE_SEQUENCE_SINGLE_RE = re.compile(r''' |
|
( \\U[A-Fa-f0-9]{8} # 8-digit hex escapes |
|
| \\u[A-Fa-f0-9]{4} # 4-digit hex escapes |
|
| \\x[A-Fa-f0-9]{2} # 2-digit hex escapes |
|
| \\[0-7]{1,3} # Octal escapes |
|
| \\N\{[^}]+\} # Unicode characters by name |
|
| \\[\\'abfnrtv] # Single-character escapes |
|
)''', re.UNICODE | re.VERBOSE) |
|
|
|
def decode_match(match: T.Match[str]) -> str: |
|
return codecs.decode(match.group(0).encode(), 'unicode_escape') |
|
|
|
class ParseException(MesonException): |
|
|
|
ast: T.Optional[CodeBlockNode] = None |
|
|
|
def __init__(self, text: str, line: str, lineno: int, colno: int) -> None: |
|
# Format as error message, followed by the line with the error, followed by a caret to show the error column. |
|
super().__init__(mlog.code_line(text, line, colno)) |
|
self.lineno = lineno |
|
self.colno = colno |
|
|
|
class BlockParseException(ParseException): |
|
def __init__( |
|
self, |
|
text: str, |
|
line: str, |
|
lineno: int, |
|
colno: int, |
|
start_line: str, |
|
start_lineno: int, |
|
start_colno: int, |
|
) -> None: |
|
# This can be formatted in two ways - one if the block start and end are on the same line, and a different way if they are on different lines. |
|
|
|
if lineno == start_lineno: |
|
# If block start and end are on the same line, it is formatted as: |
|
# Error message |
|
# Followed by the line with the error |
|
# Followed by a caret to show the block start |
|
# Followed by underscores |
|
# Followed by a caret to show the block end. |
|
MesonException.__init__(self, "{}\n{}\n{}".format(text, line, '{}^{}^'.format(' ' * start_colno, '_' * (colno - start_colno - 1)))) |
|
else: |
|
# If block start and end are on different lines, it is formatted as: |
|
# Error message |
|
# Followed by the line with the error |
|
# Followed by a caret to show the error column. |
|
# Followed by a message saying where the block started. |
|
# Followed by the line of the block start. |
|
# Followed by a caret for the block start. |
|
MesonException.__init__(self, "%s\n%s\n%s\nFor a block that started at %d,%d\n%s\n%s" % (text, line, '%s^' % (' ' * colno), start_lineno, start_colno, start_line, "%s^" % (' ' * start_colno))) |
|
self.lineno = lineno |
|
self.colno = colno |
|
|
|
TV_TokenTypes = T.TypeVar('TV_TokenTypes', int, str, bool) |
|
|
|
@dataclass(eq=False) |
|
class Token(T.Generic[TV_TokenTypes]): |
|
tid: str |
|
filename: str |
|
line_start: int |
|
lineno: int |
|
colno: int |
|
bytespan: T.Tuple[int, int] |
|
value: TV_TokenTypes |
|
|
|
def __eq__(self, other: object) -> bool: |
|
if isinstance(other, str): |
|
return self.tid == other |
|
elif isinstance(other, Token): |
|
return self.tid == other.tid |
|
return NotImplemented |
|
|
|
class Lexer: |
|
def __init__(self, code: str): |
|
self.code = code |
|
self.keywords = {'true', 'false', 'if', 'else', 'elif', |
|
'endif', 'and', 'or', 'not', 'foreach', 'endforeach', |
|
'in', 'continue', 'break'} |
|
self.future_keywords = {'return'} |
|
self.in_unit_test = 'MESON_RUNNING_IN_PROJECT_TESTS' in os.environ |
|
if self.in_unit_test: |
|
self.keywords.update({'testcase', 'endtestcase'}) |
|
self.token_specification = [ |
|
# Need to be sorted longest to shortest. |
|
('whitespace', re.compile(r'[ \t]+')), |
|
('multiline_fstring', re.compile(r"f'''(.|\n)*?'''", re.M)), |
|
('fstring', re.compile(r"f'([^'\\]|(\\.))*'")), |
|
('id', re.compile('[_a-zA-Z][_0-9a-zA-Z]*')), |
|
('number', re.compile(r'0[bB][01]+|0[oO][0-7]+|0[xX][0-9a-fA-F]+|0|[1-9]\d*')), |
|
('eol_cont', re.compile(r'\\[ \t]*(#.*)?\n')), |
|
('eol', re.compile(r'\n')), |
|
('multiline_string', re.compile(r"'''(.|\n)*?'''", re.M)), |
|
('comment', re.compile(r'#.*')), |
|
('lparen', re.compile(r'\(')), |
|
('rparen', re.compile(r'\)')), |
|
('lbracket', re.compile(r'\[')), |
|
('rbracket', re.compile(r'\]')), |
|
('lcurl', re.compile(r'\{')), |
|
('rcurl', re.compile(r'\}')), |
|
('dblquote', re.compile(r'"')), |
|
('string', re.compile(r"'([^'\\]|(\\.))*'")), |
|
('comma', re.compile(r',')), |
|
('plusassign', re.compile(r'\+=')), |
|
('dot', re.compile(r'\.')), |
|
('plus', re.compile(r'\+')), |
|
('dash', re.compile(r'-')), |
|
('star', re.compile(r'\*')), |
|
('percent', re.compile(r'%')), |
|
('fslash', re.compile(r'/')), |
|
('colon', re.compile(r':')), |
|
('equal', re.compile(r'==')), |
|
('nequal', re.compile(r'!=')), |
|
('assign', re.compile(r'=')), |
|
('le', re.compile(r'<=')), |
|
('lt', re.compile(r'<')), |
|
('ge', re.compile(r'>=')), |
|
('gt', re.compile(r'>')), |
|
('questionmark', re.compile(r'\?')), |
|
] |
|
|
|
def getline(self, line_start: int) -> str: |
|
return self.code[line_start:self.code.find('\n', line_start)] |
|
|
|
def lex(self, filename: str) -> T.Generator[Token, None, None]: |
|
line_start = 0 |
|
lineno = 1 |
|
loc = 0 |
|
par_count = 0 |
|
bracket_count = 0 |
|
curl_count = 0 |
|
col = 0 |
|
while loc < len(self.code): |
|
matched = False |
|
value: str = '' |
|
for (tid, reg) in self.token_specification: |
|
mo = reg.match(self.code, loc) |
|
if mo: |
|
curline = lineno |
|
curline_start = line_start |
|
col = mo.start() - line_start |
|
matched = True |
|
span_start = loc |
|
loc = mo.end() |
|
span_end = loc |
|
bytespan = (span_start, span_end) |
|
value = mo.group() |
|
if tid == 'lparen': |
|
par_count += 1 |
|
elif tid == 'rparen': |
|
par_count -= 1 |
|
elif tid == 'lbracket': |
|
bracket_count += 1 |
|
elif tid == 'rbracket': |
|
bracket_count -= 1 |
|
elif tid == 'lcurl': |
|
curl_count += 1 |
|
elif tid == 'rcurl': |
|
curl_count -= 1 |
|
elif tid == 'dblquote': |
|
raise ParseException('Double quotes are not supported. Use single quotes.', self.getline(line_start), lineno, col) |
|
elif tid in {'string', 'fstring'}: |
|
if value.find("\n") != -1: |
|
msg = ("Newline character in a string detected, use ''' (three single quotes) " |
|
"for multiline strings instead.\n" |
|
"This will become a hard error in a future Meson release.") |
|
mlog.warning(mlog.code_line(msg, self.getline(line_start), col), location=BaseNode(lineno, col, filename)) |
|
value = value[2 if tid == 'fstring' else 1:-1] |
|
elif tid in {'multiline_string', 'multiline_fstring'}: |
|
value = value[4 if tid == 'multiline_fstring' else 3:-3] |
|
lines = value.split('\n') |
|
if len(lines) > 1: |
|
lineno += len(lines) - 1 |
|
line_start = mo.end() - len(lines[-1]) |
|
elif tid == 'eol_cont': |
|
lineno += 1 |
|
line_start = loc |
|
tid = 'whitespace' |
|
elif tid == 'eol': |
|
lineno += 1 |
|
line_start = loc |
|
if par_count > 0 or bracket_count > 0 or curl_count > 0: |
|
tid = 'whitespace' |
|
elif tid == 'id': |
|
if value in self.keywords: |
|
tid = value |
|
else: |
|
if value in self.future_keywords: |
|
mlog.warning(f"Identifier '{value}' will become a reserved keyword in a future release. Please rename it.", |
|
location=BaseNode(lineno, col, filename)) |
|
yield Token(tid, filename, curline_start, curline, col, bytespan, value) |
|
break |
|
if not matched: |
|
raise ParseException('lexer', self.getline(line_start), lineno, col) |
|
|
|
@dataclass |
|
class BaseNode: |
|
lineno: int |
|
colno: int |
|
filename: str = field(hash=False) |
|
end_lineno: int = field(hash=False) |
|
end_colno: int = field(hash=False) |
|
whitespaces: T.Optional[WhitespaceNode] = field(hash=False) |
|
|
|
def __init__(self, lineno: int, colno: int, filename: str, |
|
end_lineno: T.Optional[int] = None, end_colno: T.Optional[int] = None) -> None: |
|
self.lineno = lineno |
|
self.colno = colno |
|
self.filename = filename |
|
self.end_lineno = end_lineno if end_lineno is not None else lineno |
|
self.end_colno = end_colno if end_colno is not None else colno |
|
self.whitespaces = None |
|
|
|
# Attributes for the visitors |
|
self.level = 0 |
|
self.ast_id = '' |
|
self.condition_level = 0 |
|
|
|
def accept(self, visitor: 'AstVisitor') -> None: |
|
fname = 'visit_{}'.format(type(self).__name__) |
|
if hasattr(visitor, fname): |
|
func = getattr(visitor, fname) |
|
if callable(func): |
|
func(self) |
|
|
|
def append_whitespaces(self, token: Token) -> None: |
|
if self.whitespaces is None: |
|
self.whitespaces = WhitespaceNode(token) |
|
else: |
|
self.whitespaces.append(token) |
|
|
|
|
|
@dataclass(unsafe_hash=True) |
|
class WhitespaceNode(BaseNode): |
|
|
|
value: str |
|
|
|
def __init__(self, token: Token[str]): |
|
super().__init__(token.lineno, token.colno, token.filename) |
|
self.value = '' |
|
self.append(token) |
|
|
|
def append(self, token: Token[str]) -> None: |
|
self.value += token.value |
|
|
|
@dataclass(unsafe_hash=True) |
|
class ElementaryNode(T.Generic[TV_TokenTypes], BaseNode): |
|
|
|
value: TV_TokenTypes |
|
bytespan: T.Tuple[int, int] = field(hash=False) |
|
|
|
def __init__(self, token: Token[TV_TokenTypes]): |
|
super().__init__(token.lineno, token.colno, token.filename) |
|
self.value = token.value |
|
self.bytespan = token.bytespan |
|
|
|
class BooleanNode(ElementaryNode[bool]): |
|
pass |
|
|
|
class IdNode(ElementaryNode[str]): |
|
pass |
|
|
|
@dataclass(unsafe_hash=True) |
|
class NumberNode(ElementaryNode[int]): |
|
|
|
raw_value: str = field(hash=False) |
|
|
|
def __init__(self, token: Token[str]): |
|
BaseNode.__init__(self, token.lineno, token.colno, token.filename) |
|
self.raw_value = token.value |
|
self.value = int(token.value, base=0) |
|
self.bytespan = token.bytespan |
|
|
|
class BaseStringNode(ElementaryNode[str]): |
|
pass |
|
|
|
@dataclass(unsafe_hash=True) |
|
class StringNode(BaseStringNode): |
|
|
|
raw_value: str = field(hash=False) |
|
|
|
def __init__(self, token: Token[str], escape: bool = True): |
|
super().__init__(token) |
|
self.value = ESCAPE_SEQUENCE_SINGLE_RE.sub(decode_match, token.value) if escape else token.value |
|
self.raw_value = token.value |
|
|
|
class FormatStringNode(StringNode): |
|
pass |
|
|
|
@dataclass(unsafe_hash=True) |
|
class MultilineStringNode(BaseStringNode): |
|
|
|
def __init__(self, token: Token[str]): |
|
super().__init__(token) |
|
self.value = token.value |
|
|
|
class MultilineFormatStringNode(MultilineStringNode): |
|
pass |
|
|
|
class ContinueNode(ElementaryNode): |
|
pass |
|
|
|
class BreakNode(ElementaryNode): |
|
pass |
|
|
|
class SymbolNode(ElementaryNode[str]): |
|
pass |
|
|
|
@dataclass(unsafe_hash=True) |
|
class ArgumentNode(BaseNode): |
|
|
|
arguments: T.List[BaseNode] = field(hash=False) |
|
commas: T.List[SymbolNode] = field(hash=False) |
|
columns: T.List[SymbolNode] = field(hash=False) |
|
kwargs: T.Dict[BaseNode, BaseNode] = field(hash=False) |
|
|
|
def __init__(self, token: Token[TV_TokenTypes]): |
|
super().__init__(token.lineno, token.colno, token.filename) |
|
self.arguments = [] |
|
self.commas = [] |
|
self.columns = [] |
|
self.kwargs = {} |
|
self.order_error = False |
|
|
|
def prepend(self, statement: BaseNode) -> None: |
|
if self.num_kwargs() > 0: |
|
self.order_error = True |
|
if not isinstance(statement, EmptyNode): |
|
self.arguments = [statement] + self.arguments |
|
|
|
def append(self, statement: BaseNode) -> None: |
|
if self.num_kwargs() > 0: |
|
self.order_error = True |
|
if not isinstance(statement, EmptyNode): |
|
self.arguments += [statement] |
|
|
|
def set_kwarg(self, name: IdNode, value: BaseNode) -> None: |
|
if any((isinstance(x, IdNode) and name.value == x.value) for x in self.kwargs): |
|
mlog.warning(f'Keyword argument "{name.value}" defined multiple times.', location=self) |
|
mlog.warning('This will be an error in future Meson releases.') |
|
self.kwargs[name] = value |
|
|
|
def set_kwarg_no_check(self, name: BaseNode, value: BaseNode) -> None: |
|
self.kwargs[name] = value |
|
|
|
def num_args(self) -> int: |
|
return len(self.arguments) |
|
|
|
def num_kwargs(self) -> int: |
|
return len(self.kwargs) |
|
|
|
def incorrect_order(self) -> bool: |
|
return self.order_error |
|
|
|
def __len__(self) -> int: |
|
return self.num_args() # Fixme |
|
|
|
@dataclass(unsafe_hash=True) |
|
class ArrayNode(BaseNode): |
|
|
|
lbracket: SymbolNode |
|
args: ArgumentNode |
|
rbracket: SymbolNode |
|
|
|
def __init__(self, lbracket: SymbolNode, args: ArgumentNode, rbracket: SymbolNode): |
|
super().__init__(lbracket.lineno, lbracket.colno, args.filename, end_lineno=rbracket.lineno, end_colno=rbracket.colno+1) |
|
self.lbracket = lbracket |
|
self.args = args |
|
self.rbracket = rbracket |
|
|
|
@dataclass(unsafe_hash=True) |
|
class DictNode(BaseNode): |
|
|
|
lcurl: SymbolNode |
|
args: ArgumentNode |
|
rcurl: SymbolNode |
|
|
|
def __init__(self, lcurl: SymbolNode, args: ArgumentNode, rcurl: SymbolNode): |
|
super().__init__(lcurl.lineno, lcurl.colno, args.filename, end_lineno=rcurl.lineno, end_colno=rcurl.colno+1) |
|
self.lcurl = lcurl |
|
self.args = args |
|
self.rcurl = rcurl |
|
|
|
class EmptyNode(BaseNode): |
|
pass |
|
|
|
@dataclass(unsafe_hash=True) |
|
class BinaryOperatorNode(BaseNode): |
|
|
|
left: BaseNode |
|
operator: SymbolNode |
|
right: BaseNode |
|
|
|
def __init__(self, left: BaseNode, operator: SymbolNode, right: BaseNode): |
|
super().__init__(left.lineno, left.colno, left.filename) |
|
self.left = left |
|
self.operator = operator |
|
self.right = right |
|
|
|
class OrNode(BinaryOperatorNode): |
|
pass |
|
|
|
class AndNode(BinaryOperatorNode): |
|
pass |
|
|
|
@dataclass(unsafe_hash=True) |
|
class ComparisonNode(BinaryOperatorNode): |
|
|
|
ctype: COMPARISONS |
|
|
|
def __init__(self, ctype: COMPARISONS, left: BaseNode, operator: SymbolNode, right: BaseNode): |
|
super().__init__(left, operator, right) |
|
self.ctype = ctype |
|
|
|
@dataclass(unsafe_hash=True) |
|
class ArithmeticNode(BinaryOperatorNode): |
|
|
|
# TODO: use a Literal for operation |
|
operation: str |
|
|
|
def __init__(self, operation: str, left: BaseNode, operator: SymbolNode, right: BaseNode): |
|
super().__init__(left, operator, right) |
|
self.operation = operation |
|
|
|
@dataclass(unsafe_hash=True) |
|
class UnaryOperatorNode(BaseNode): |
|
|
|
operator: SymbolNode |
|
value: BaseNode |
|
|
|
def __init__(self, token: Token[TV_TokenTypes], operator: SymbolNode, value: BaseNode): |
|
super().__init__(token.lineno, token.colno, token.filename) |
|
self.operator = operator |
|
self.value = value |
|
|
|
class NotNode(UnaryOperatorNode): |
|
pass |
|
|
|
class UMinusNode(UnaryOperatorNode): |
|
pass |
|
|
|
@dataclass(unsafe_hash=True) |
|
class CodeBlockNode(BaseNode): |
|
|
|
pre_whitespaces: T.Optional[WhitespaceNode] = field(hash=False) |
|
lines: T.List[BaseNode] = field(hash=False) |
|
|
|
def __init__(self, token: Token[TV_TokenTypes]): |
|
super().__init__(token.lineno, token.colno, token.filename) |
|
self.pre_whitespaces = None |
|
self.lines = [] |
|
|
|
def append_whitespaces(self, token: Token) -> None: |
|
if self.lines: |
|
self.lines[-1].append_whitespaces(token) |
|
elif self.pre_whitespaces is None: |
|
self.pre_whitespaces = WhitespaceNode(token) |
|
else: |
|
self.pre_whitespaces.append(token) |
|
|
|
@dataclass(unsafe_hash=True) |
|
class IndexNode(BaseNode): |
|
|
|
iobject: BaseNode |
|
lbracket: SymbolNode |
|
index: BaseNode |
|
rbracket: SymbolNode |
|
|
|
def __init__(self, iobject: BaseNode, lbracket: SymbolNode, index: BaseNode, rbracket: SymbolNode): |
|
super().__init__(iobject.lineno, iobject.colno, iobject.filename) |
|
self.iobject = iobject |
|
self.lbracket = lbracket |
|
self.index = index |
|
self.rbracket = rbracket |
|
|
|
@dataclass(unsafe_hash=True) |
|
class MethodNode(BaseNode): |
|
|
|
source_object: BaseNode |
|
dot: SymbolNode |
|
name: IdNode |
|
lpar: SymbolNode |
|
args: ArgumentNode |
|
rpar: SymbolNode |
|
|
|
def __init__(self, source_object: BaseNode, dot: SymbolNode, name: IdNode, lpar: SymbolNode, args: ArgumentNode, rpar: SymbolNode): |
|
super().__init__(name.lineno, name.colno, name.filename, end_lineno=rpar.lineno, end_colno=rpar.colno+1) |
|
self.source_object = source_object |
|
self.dot = dot |
|
self.name = name |
|
self.lpar = lpar |
|
self.args = args |
|
self.rpar = rpar |
|
|
|
@dataclass(unsafe_hash=True) |
|
class FunctionNode(BaseNode): |
|
|
|
func_name: IdNode |
|
lpar: SymbolNode |
|
args: ArgumentNode |
|
rpar: SymbolNode |
|
|
|
def __init__(self, func_name: IdNode, lpar: SymbolNode, args: ArgumentNode, rpar: SymbolNode): |
|
super().__init__(func_name.lineno, func_name.colno, func_name.filename, end_lineno=rpar.end_lineno, end_colno=rpar.end_colno+1) |
|
self.func_name = func_name |
|
self.lpar = lpar |
|
self.args = args |
|
self.rpar = rpar |
|
|
|
@dataclass(unsafe_hash=True) |
|
class AssignmentNode(BaseNode): |
|
|
|
var_name: IdNode |
|
operator: SymbolNode |
|
value: BaseNode |
|
|
|
def __init__(self, var_name: IdNode, operator: SymbolNode, value: BaseNode): |
|
super().__init__(var_name.lineno, var_name.colno, var_name.filename) |
|
self.var_name = var_name |
|
self.operator = operator |
|
self.value = value |
|
|
|
class PlusAssignmentNode(AssignmentNode): |
|
pass |
|
|
|
@dataclass(unsafe_hash=True) |
|
class ForeachClauseNode(BaseNode): |
|
|
|
foreach_: SymbolNode = field(hash=False) |
|
varnames: T.List[IdNode] = field(hash=False) |
|
commas: T.List[SymbolNode] = field(hash=False) |
|
column: SymbolNode = field(hash=False) |
|
items: BaseNode |
|
block: CodeBlockNode |
|
endforeach: SymbolNode = field(hash=False) |
|
|
|
def __init__(self, foreach_: SymbolNode, varnames: T.List[IdNode], commas: T.List[SymbolNode], column: SymbolNode, items: BaseNode, block: CodeBlockNode, endforeach: SymbolNode): |
|
super().__init__(foreach_.lineno, foreach_.colno, foreach_.filename) |
|
self.foreach_ = foreach_ |
|
self.varnames = varnames |
|
self.commas = commas |
|
self.column = column |
|
self.items = items |
|
self.block = block |
|
self.endforeach = endforeach |
|
|
|
|
|
@dataclass(unsafe_hash=True) |
|
class IfNode(BaseNode): |
|
|
|
if_: SymbolNode |
|
condition: BaseNode |
|
block: CodeBlockNode |
|
|
|
def __init__(self, linenode: BaseNode, if_node: SymbolNode, condition: BaseNode, block: CodeBlockNode): |
|
super().__init__(linenode.lineno, linenode.colno, linenode.filename) |
|
self.if_ = if_node |
|
self.condition = condition |
|
self.block = block |
|
|
|
@dataclass(unsafe_hash=True) |
|
class ElseNode(BaseNode): |
|
|
|
else_: SymbolNode |
|
block: CodeBlockNode |
|
|
|
def __init__(self, else_: SymbolNode, block: CodeBlockNode): |
|
super().__init__(block.lineno, block.colno, block.filename) |
|
self.else_ = else_ |
|
self.block = block |
|
|
|
@dataclass(unsafe_hash=True) |
|
class IfClauseNode(BaseNode): |
|
|
|
ifs: T.List[IfNode] = field(hash=False) |
|
elseblock: T.Union[EmptyNode, ElseNode] |
|
endif: SymbolNode |
|
|
|
def __init__(self, linenode: BaseNode): |
|
super().__init__(linenode.lineno, linenode.colno, linenode.filename) |
|
self.ifs = [] |
|
self.elseblock = EmptyNode(linenode.lineno, linenode.colno, linenode.filename) |
|
self.endif = None |
|
|
|
@dataclass(unsafe_hash=True) |
|
class TestCaseClauseNode(BaseNode): |
|
|
|
testcase: SymbolNode |
|
condition: BaseNode |
|
block: CodeBlockNode |
|
endtestcase: SymbolNode |
|
|
|
def __init__(self, testcase: SymbolNode, condition: BaseNode, block: CodeBlockNode, endtestcase: SymbolNode): |
|
super().__init__(condition.lineno, condition.colno, condition.filename) |
|
self.testcase = testcase |
|
self.condition = condition |
|
self.block = block |
|
self.endtestcase = endtestcase |
|
|
|
@dataclass(unsafe_hash=True) |
|
class TernaryNode(BaseNode): |
|
|
|
condition: BaseNode |
|
questionmark: SymbolNode |
|
trueblock: BaseNode |
|
column: SymbolNode |
|
falseblock: BaseNode |
|
|
|
def __init__(self, condition: BaseNode, questionmark: SymbolNode, trueblock: BaseNode, column: SymbolNode, falseblock: BaseNode): |
|
super().__init__(condition.lineno, condition.colno, condition.filename) |
|
self.condition = condition |
|
self.questionmark = questionmark |
|
self.trueblock = trueblock |
|
self.column = column |
|
self.falseblock = falseblock |
|
|
|
|
|
@dataclass(unsafe_hash=True) |
|
class ParenthesizedNode(BaseNode): |
|
|
|
lpar: SymbolNode = field(hash=False) |
|
inner: BaseNode |
|
rpar: SymbolNode = field(hash=False) |
|
|
|
def __init__(self, lpar: SymbolNode, inner: BaseNode, rpar: SymbolNode): |
|
super().__init__(lpar.lineno, lpar.colno, inner.filename, end_lineno=rpar.lineno, end_colno=rpar.colno+1) |
|
self.lpar = lpar |
|
self.inner = inner |
|
self.rpar = rpar |
|
|
|
|
|
if T.TYPE_CHECKING: |
|
COMPARISONS = Literal['==', '!=', '<', '<=', '>=', '>', 'in', 'notin'] |
|
|
|
comparison_map: T.Mapping[str, COMPARISONS] = { |
|
'equal': '==', |
|
'nequal': '!=', |
|
'lt': '<', |
|
'le': '<=', |
|
'gt': '>', |
|
'ge': '>=', |
|
'in': 'in', |
|
'not in': 'notin', |
|
} |
|
|
|
# Recursive descent parser for Meson's definition language. |
|
# Very basic apart from the fact that we have many precedence |
|
# levels so there are not enough words to describe them all. |
|
# Enter numbering: |
|
# |
|
# 1 assignment |
|
# 2 or |
|
# 3 and |
|
# 4 comparison |
|
# 5 arithmetic |
|
# 6 negation |
|
# 7 funcall, method call |
|
# 8 parentheses |
|
# 9 plain token |
|
|
|
class Parser: |
|
def __init__(self, code: str, filename: str): |
|
self.lexer = Lexer(code) |
|
self.stream = self.lexer.lex(filename) |
|
self.current: Token = Token('eof', '', 0, 0, 0, (0, 0), None) |
|
self.previous = self.current |
|
self.current_ws: T.List[Token] = [] |
|
|
|
self.getsym() |
|
self.in_ternary = False |
|
|
|
def create_node(self, node_type: T.Type[BaseNodeT], *args: T.Any, **kwargs: T.Any) -> BaseNodeT: |
|
node = node_type(*args, **kwargs) |
|
for ws_token in self.current_ws: |
|
node.append_whitespaces(ws_token) |
|
self.current_ws = [] |
|
return node |
|
|
|
def getsym(self) -> None: |
|
self.previous = self.current |
|
try: |
|
self.current = next(self.stream) |
|
|
|
while self.current.tid in {'eol', 'comment', 'whitespace'}: |
|
self.current_ws.append(self.current) |
|
if self.current.tid == 'eol': |
|
break |
|
self.current = next(self.stream) |
|
|
|
except StopIteration: |
|
self.current = Token('eof', '', self.current.line_start, self.current.lineno, self.current.colno + self.current.bytespan[1] - self.current.bytespan[0], (0, 0), None) |
|
|
|
def getline(self) -> str: |
|
return self.lexer.getline(self.current.line_start) |
|
|
|
def accept(self, s: str) -> bool: |
|
if self.current.tid == s: |
|
self.getsym() |
|
return True |
|
return False |
|
|
|
def accept_any(self, tids: T.Tuple[str, ...]) -> str: |
|
tid = self.current.tid |
|
if tid in tids: |
|
self.getsym() |
|
return tid |
|
return '' |
|
|
|
def expect(self, s: str) -> bool: |
|
if self.accept(s): |
|
return True |
|
raise ParseException(f'Expecting {s} got {self.current.tid}.', self.getline(), self.current.lineno, self.current.colno) |
|
|
|
def block_expect(self, s: str, block_start: Token) -> bool: |
|
if self.accept(s): |
|
return True |
|
raise BlockParseException(f'Expecting {s} got {self.current.tid}.', self.getline(), self.current.lineno, self.current.colno, self.lexer.getline(block_start.line_start), block_start.lineno, block_start.colno) |
|
|
|
def parse(self) -> CodeBlockNode: |
|
block = self.codeblock() |
|
try: |
|
self.expect('eof') |
|
except ParseException as e: |
|
e.ast = block |
|
raise |
|
return block |
|
|
|
def statement(self) -> BaseNode: |
|
return self.e1() |
|
|
|
def e1(self) -> BaseNode: |
|
left = self.e2() |
|
if self.accept('plusassign'): |
|
operator = self.create_node(SymbolNode, self.previous) |
|
value = self.e1() |
|
if not isinstance(left, IdNode): |
|
raise ParseException('Plusassignment target must be an id.', self.getline(), left.lineno, left.colno) |
|
assert isinstance(left.value, str) |
|
return self.create_node(PlusAssignmentNode, left, operator, value) |
|
elif self.accept('assign'): |
|
operator = self.create_node(SymbolNode, self.previous) |
|
value = self.e1() |
|
if not isinstance(left, IdNode): |
|
raise ParseException('Assignment target must be an id.', |
|
self.getline(), left.lineno, left.colno) |
|
assert isinstance(left.value, str) |
|
return self.create_node(AssignmentNode, left, operator, value) |
|
elif self.accept('questionmark'): |
|
if self.in_ternary: |
|
raise ParseException('Nested ternary operators are not allowed.', |
|
self.getline(), left.lineno, left.colno) |
|
|
|
qm_node = self.create_node(SymbolNode, self.previous) |
|
self.in_ternary = True |
|
trueblock = self.e1() |
|
self.expect('colon') |
|
column_node = self.create_node(SymbolNode, self.previous) |
|
falseblock = self.e1() |
|
self.in_ternary = False |
|
return self.create_node(TernaryNode, left, qm_node, trueblock, column_node, falseblock) |
|
return left |
|
|
|
def e2(self) -> BaseNode: |
|
left = self.e3() |
|
while self.accept('or'): |
|
operator = self.create_node(SymbolNode, self.previous) |
|
if isinstance(left, EmptyNode): |
|
raise ParseException('Invalid or clause.', |
|
self.getline(), left.lineno, left.colno) |
|
left = self.create_node(OrNode, left, operator, self.e3()) |
|
return left |
|
|
|
def e3(self) -> BaseNode: |
|
left = self.e4() |
|
while self.accept('and'): |
|
operator = self.create_node(SymbolNode, self.previous) |
|
if isinstance(left, EmptyNode): |
|
raise ParseException('Invalid and clause.', |
|
self.getline(), left.lineno, left.colno) |
|
left = self.create_node(AndNode, left, operator, self.e4()) |
|
return left |
|
|
|
def e4(self) -> BaseNode: |
|
left = self.e5() |
|
for nodename, operator_type in comparison_map.items(): |
|
if self.accept(nodename): |
|
operator = self.create_node(SymbolNode, self.previous) |
|
return self.create_node(ComparisonNode, operator_type, left, operator, self.e5()) |
|
if self.accept('not'): |
|
ws = self.current_ws.copy() |
|
not_token = self.previous |
|
if self.accept('in'): |
|
in_token = self.previous |
|
self.current_ws = self.current_ws[len(ws):] # remove whitespaces between not and in |
|
temp_node = EmptyNode(in_token.lineno, in_token.colno, in_token.filename) |
|
for w in ws: |
|
temp_node.append_whitespaces(w) |
|
|
|
not_token.bytespan = (not_token.bytespan[0], in_token.bytespan[1]) |
|
not_token.value += temp_node.whitespaces.value + in_token.value |
|
operator = self.create_node(SymbolNode, not_token) |
|
return self.create_node(ComparisonNode, 'notin', left, operator, self.e5()) |
|
return left |
|
|
|
def e5(self) -> BaseNode: |
|
return self.e5addsub() |
|
|
|
def e5addsub(self) -> BaseNode: |
|
op_map = { |
|
'plus': 'add', |
|
'dash': 'sub', |
|
} |
|
left = self.e5muldiv() |
|
while True: |
|
op = self.accept_any(tuple(op_map.keys())) |
|
if op: |
|
operator = self.create_node(SymbolNode, self.previous) |
|
left = self.create_node(ArithmeticNode, op_map[op], left, operator, self.e5muldiv()) |
|
else: |
|
break |
|
return left |
|
|
|
def e5muldiv(self) -> BaseNode: |
|
op_map = { |
|
'percent': 'mod', |
|
'star': 'mul', |
|
'fslash': 'div', |
|
} |
|
left = self.e6() |
|
while True: |
|
op = self.accept_any(tuple(op_map.keys())) |
|
if op: |
|
operator = self.create_node(SymbolNode, self.previous) |
|
left = self.create_node(ArithmeticNode, op_map[op], left, operator, self.e6()) |
|
else: |
|
break |
|
return left |
|
|
|
def e6(self) -> BaseNode: |
|
if self.accept('not'): |
|
operator = self.create_node(SymbolNode, self.previous) |
|
return self.create_node(NotNode, self.current, operator, self.e7()) |
|
if self.accept('dash'): |
|
operator = self.create_node(SymbolNode, self.previous) |
|
return self.create_node(UMinusNode, self.current, operator, self.e7()) |
|
return self.e7() |
|
|
|
def e7(self) -> BaseNode: |
|
left = self.e8() |
|
block_start = self.current |
|
if self.accept('lparen'): |
|
lpar = self.create_node(SymbolNode, block_start) |
|
args = self.args() |
|
self.block_expect('rparen', block_start) |
|
rpar = self.create_node(SymbolNode, self.previous) |
|
if not isinstance(left, IdNode): |
|
raise ParseException('Function call must be applied to plain id', |
|
self.getline(), left.lineno, left.colno) |
|
assert isinstance(left.value, str) |
|
left = self.create_node(FunctionNode, left, lpar, args, rpar) |
|
go_again = True |
|
while go_again: |
|
go_again = False |
|
if self.accept('dot'): |
|
go_again = True |
|
left = self.method_call(left) |
|
if self.accept('lbracket'): |
|
go_again = True |
|
left = self.index_call(left) |
|
return left |
|
|
|
def e8(self) -> BaseNode: |
|
block_start = self.current |
|
if self.accept('lparen'): |
|
lpar = self.create_node(SymbolNode, block_start) |
|
e = self.statement() |
|
self.block_expect('rparen', block_start) |
|
rpar = self.create_node(SymbolNode, self.previous) |
|
return ParenthesizedNode(lpar, e, rpar) |
|
elif self.accept('lbracket'): |
|
lbracket = self.create_node(SymbolNode, block_start) |
|
args = self.args() |
|
self.block_expect('rbracket', block_start) |
|
rbracket = self.create_node(SymbolNode, self.previous) |
|
return self.create_node(ArrayNode, lbracket, args, rbracket) |
|
elif self.accept('lcurl'): |
|
lcurl = self.create_node(SymbolNode, block_start) |
|
key_values = self.key_values() |
|
self.block_expect('rcurl', block_start) |
|
rcurl = self.create_node(SymbolNode, self.previous) |
|
return self.create_node(DictNode, lcurl, key_values, rcurl) |
|
else: |
|
return self.e9() |
|
|
|
def e9(self) -> BaseNode: |
|
t = self.current |
|
if self.accept('true'): |
|
t.value = True |
|
return self.create_node(BooleanNode, t) |
|
if self.accept('false'): |
|
t.value = False |
|
return self.create_node(BooleanNode, t) |
|
if self.accept('id'): |
|
return self.create_node(IdNode, t) |
|
if self.accept('number'): |
|
return self.create_node(NumberNode, t) |
|
if self.accept('string'): |
|
return self.create_node(StringNode, t) |
|
if self.accept('fstring'): |
|
return self.create_node(FormatStringNode, t) |
|
if self.accept('multiline_string'): |
|
return self.create_node(MultilineStringNode, t) |
|
if self.accept('multiline_fstring'): |
|
return self.create_node(MultilineFormatStringNode, t) |
|
return EmptyNode(self.current.lineno, self.current.colno, self.current.filename) |
|
|
|
def key_values(self) -> ArgumentNode: |
|
s = self.statement() |
|
a = self.create_node(ArgumentNode, self.current) |
|
|
|
while not isinstance(s, EmptyNode): |
|
if self.accept('colon'): |
|
a.columns.append(self.create_node(SymbolNode, self.previous)) |
|
a.set_kwarg_no_check(s, self.statement()) |
|
if not self.accept('comma'): |
|
return a |
|
a.commas.append(self.create_node(SymbolNode, self.previous)) |
|
else: |
|
raise ParseException('Only key:value pairs are valid in dict construction.', |
|
self.getline(), s.lineno, s.colno) |
|
s = self.statement() |
|
return a |
|
|
|
def args(self) -> ArgumentNode: |
|
s = self.statement() |
|
a = self.create_node(ArgumentNode, self.current) |
|
|
|
while not isinstance(s, EmptyNode): |
|
if self.accept('comma'): |
|
a.commas.append(self.create_node(SymbolNode, self.previous)) |
|
a.append(s) |
|
elif self.accept('colon'): |
|
a.columns.append(self.create_node(SymbolNode, self.previous)) |
|
if not isinstance(s, IdNode): |
|
raise ParseException('Dictionary key must be a plain identifier.', |
|
self.getline(), s.lineno, s.colno) |
|
a.set_kwarg(s, self.statement()) |
|
if not self.accept('comma'): |
|
return a |
|
a.commas.append(self.create_node(SymbolNode, self.previous)) |
|
else: |
|
a.append(s) |
|
return a |
|
s = self.statement() |
|
return a |
|
|
|
def method_call(self, source_object: BaseNode) -> MethodNode: |
|
dot = self.create_node(SymbolNode, self.previous) |
|
methodname = self.e9() |
|
if not isinstance(methodname, IdNode): |
|
if isinstance(source_object, NumberNode) and isinstance(methodname, NumberNode): |
|
raise ParseException('meson does not support float numbers', |
|
self.getline(), source_object.lineno, source_object.colno) |
|
raise ParseException('Method name must be plain id', |
|
self.getline(), self.current.lineno, self.current.colno) |
|
assert isinstance(methodname.value, str) |
|
self.expect('lparen') |
|
lpar = self.create_node(SymbolNode, self.previous) |
|
args = self.args() |
|
rpar = self.create_node(SymbolNode, self.current) |
|
self.expect('rparen') |
|
method = self.create_node(MethodNode, source_object, dot, methodname, lpar, args, rpar) |
|
if self.accept('dot'): |
|
return self.method_call(method) |
|
return method |
|
|
|
def index_call(self, source_object: BaseNode) -> IndexNode: |
|
lbracket = self.create_node(SymbolNode, self.previous) |
|
index_statement = self.statement() |
|
self.expect('rbracket') |
|
rbracket = self.create_node(SymbolNode, self.previous) |
|
return self.create_node(IndexNode, source_object, lbracket, index_statement, rbracket) |
|
|
|
def foreachblock(self) -> ForeachClauseNode: |
|
foreach_ = self.create_node(SymbolNode, self.previous) |
|
self.expect('id') |
|
assert isinstance(self.previous.value, str) |
|
varnames = [self.create_node(IdNode, self.previous)] |
|
commas = [] |
|
|
|
if self.accept('comma'): |
|
commas.append(self.create_node(SymbolNode, self.previous)) |
|
self.expect('id') |
|
assert isinstance(self.previous.value, str) |
|
varnames.append(self.create_node(IdNode, self.previous)) |
|
|
|
self.expect('colon') |
|
column = self.create_node(SymbolNode, self.previous) |
|
items = self.statement() |
|
block = self.codeblock() |
|
endforeach = self.create_node(SymbolNode, self.current) |
|
return self.create_node(ForeachClauseNode, foreach_, varnames, commas, column, items, block, endforeach) |
|
|
|
def ifblock(self) -> IfClauseNode: |
|
if_node = self.create_node(SymbolNode, self.previous) |
|
condition = self.statement() |
|
clause = self.create_node(IfClauseNode, condition) |
|
self.expect('eol') |
|
block = self.codeblock() |
|
clause.ifs.append(self.create_node(IfNode, clause, if_node, condition, block)) |
|
self.elseifblock(clause) |
|
clause.elseblock = self.elseblock() |
|
clause.endif = self.create_node(SymbolNode, self.current) |
|
return clause |
|
|
|
def elseifblock(self, clause: IfClauseNode) -> None: |
|
while self.accept('elif'): |
|
elif_ = self.create_node(SymbolNode, self.previous) |
|
s = self.statement() |
|
self.expect('eol') |
|
b = self.codeblock() |
|
clause.ifs.append(self.create_node(IfNode, s, elif_, s, b)) |
|
|
|
def elseblock(self) -> T.Union[ElseNode, EmptyNode]: |
|
if self.accept('else'): |
|
else_ = self.create_node(SymbolNode, self.previous) |
|
self.expect('eol') |
|
block = self.codeblock() |
|
return ElseNode(else_, block) |
|
return EmptyNode(self.current.lineno, self.current.colno, self.current.filename) |
|
|
|
def testcaseblock(self) -> TestCaseClauseNode: |
|
testcase = self.create_node(SymbolNode, self.previous) |
|
condition = self.statement() |
|
self.expect('eol') |
|
block = self.codeblock() |
|
endtestcase = SymbolNode(self.current) |
|
return self.create_node(TestCaseClauseNode, testcase, condition, block, endtestcase) |
|
|
|
def line(self) -> BaseNode: |
|
block_start = self.current |
|
if self.current == 'eol': |
|
return EmptyNode(self.current.lineno, self.current.colno, self.current.filename) |
|
if self.accept('if'): |
|
ifblock = self.ifblock() |
|
self.block_expect('endif', block_start) |
|
return ifblock |
|
if self.accept('foreach'): |
|
forblock = self.foreachblock() |
|
self.block_expect('endforeach', block_start) |
|
return forblock |
|
if self.accept('continue'): |
|
return self.create_node(ContinueNode, self.current) |
|
if self.accept('break'): |
|
return self.create_node(BreakNode, self.current) |
|
if self.lexer.in_unit_test and self.accept('testcase'): |
|
block = self.testcaseblock() |
|
self.block_expect('endtestcase', block_start) |
|
return block |
|
return self.statement() |
|
|
|
def codeblock(self) -> CodeBlockNode: |
|
block = self.create_node(CodeBlockNode, self.current) |
|
cond = True |
|
|
|
try: |
|
while cond: |
|
for ws_token in self.current_ws: |
|
block.append_whitespaces(ws_token) |
|
self.current_ws = [] |
|
|
|
curline = self.line() |
|
|
|
if not isinstance(curline, EmptyNode): |
|
block.lines.append(curline) |
|
|
|
cond = self.accept('eol') |
|
|
|
except ParseException as e: |
|
e.ast = block |
|
raise |
|
|
|
# Remaining whitespaces will not be catched since there are no more nodes |
|
for ws_token in self.current_ws: |
|
block.append_whitespaces(ws_token) |
|
self.current_ws = [] |
|
|
|
return block
|
|
|