# SPDX-License-Identifier: Apache-2.0
# Copyright 2014-2017 The Meson development team
from __future__ import annotations
from dataclasses import dataclass , field
import re
import codecs
import os
import typing as T
from . mesonlib import MesonException
from . import mlog
if T . TYPE_CHECKING :
from typing_extensions import Literal
from . ast import AstVisitor
BaseNodeT = T . TypeVar ( ' BaseNodeT ' , bound = ' BaseNode ' )
# This is the regex for the supported escape sequences of a regular string
# literal, like 'abc\x00'
ESCAPE_SEQUENCE_SINGLE_RE = re . compile ( r '''
( \\U [ A - Fa - f0 - 9 ] { 8 } # 8-digit hex escapes
| \\u [ A - Fa - f0 - 9 ] { 4 } # 4-digit hex escapes
| \\x [ A - Fa - f0 - 9 ] { 2 } # 2-digit hex escapes
| \\[ 0 - 7 ] { 1 , 3 } # Octal escapes
| \\N \{ [ ^ } ] + \} # Unicode characters by name
| \\[ \\' abfnrtv] # Single-character escapes
) ''' , re.UNICODE | re.VERBOSE)
def decode_match ( match : T . Match [ str ] ) - > str :
return codecs . decode ( match . group ( 0 ) . encode ( ) , ' unicode_escape ' )
class ParseException ( MesonException ) :
ast : T . Optional [ CodeBlockNode ] = None
def __init__ ( self , text : str , line : str , lineno : int , colno : int ) - > None :
# Format as error message, followed by the line with the error, followed by a caret to show the error column.
super ( ) . __init__ ( mlog . code_line ( text , line , colno ) )
self . lineno = lineno
self . colno = colno
class BlockParseException ( ParseException ) :
def __init__ (
self ,
text : str ,
line : str ,
lineno : int ,
colno : int ,
start_line : str ,
start_lineno : int ,
start_colno : int ,
) - > None :
# This can be formatted in two ways - one if the block start and end are on the same line, and a different way if they are on different lines.
if lineno == start_lineno :
# If block start and end are on the same line, it is formatted as:
# Error message
# Followed by the line with the error
# Followed by a caret to show the block start
# Followed by underscores
# Followed by a caret to show the block end.
MesonException . __init__ ( self , " {} \n {} \n {} " . format ( text , line , ' {} ^ {} ^ ' . format ( ' ' * start_colno , ' _ ' * ( colno - start_colno - 1 ) ) ) )
else :
# If block start and end are on different lines, it is formatted as:
# Error message
# Followed by the line with the error
# Followed by a caret to show the error column.
# Followed by a message saying where the block started.
# Followed by the line of the block start.
# Followed by a caret for the block start.
MesonException . __init__ ( self , " %s \n %s \n %s \n For a block that started at %d , %d \n %s \n %s " % ( text , line , ' %s ^ ' % ( ' ' * colno ) , start_lineno , start_colno , start_line , " %s ^ " % ( ' ' * start_colno ) ) )
self . lineno = lineno
self . colno = colno
TV_TokenTypes = T . TypeVar ( ' TV_TokenTypes ' , int , str , bool )
@dataclass ( eq = False )
class Token ( T . Generic [ TV_TokenTypes ] ) :
tid : str
filename : str
line_start : int
lineno : int
colno : int
bytespan : T . Tuple [ int , int ]
value : TV_TokenTypes
def __eq__ ( self , other : object ) - > bool :
if isinstance ( other , str ) :
return self . tid == other
elif isinstance ( other , Token ) :
return self . tid == other . tid
return NotImplemented
class Lexer :
def __init__ ( self , code : str ) :
self . code = code
self . keywords = { ' true ' , ' false ' , ' if ' , ' else ' , ' elif ' ,
' endif ' , ' and ' , ' or ' , ' not ' , ' foreach ' , ' endforeach ' ,
' in ' , ' continue ' , ' break ' }
self . future_keywords = { ' return ' }
self . in_unit_test = ' MESON_RUNNING_IN_PROJECT_TESTS ' in os . environ
if self . in_unit_test :
self . keywords . update ( { ' testcase ' , ' endtestcase ' } )
self . token_specification = [
# Need to be sorted longest to shortest.
( ' whitespace ' , re . compile ( r ' [ \ t]+ ' ) ) ,
( ' multiline_fstring ' , re . compile ( r " f ' ' ' (.| \ n)*? ' ' ' " , re . M ) ) ,
( ' fstring ' , re . compile ( r " f ' ([^ ' \\ ]|( \\ .))* ' " ) ) ,
( ' id ' , re . compile ( ' [_a-zA-Z][_0-9a-zA-Z]* ' ) ) ,
( ' number ' , re . compile ( r ' 0[bB][01]+|0[oO][0-7]+|0[xX][0-9a-fA-F]+|0|[1-9] \ d* ' ) ) ,
( ' eol_cont ' , re . compile ( r ' \\ [ \ t]*(#.*)? \ n ' ) ) ,
( ' eol ' , re . compile ( r ' \ n ' ) ) ,
( ' multiline_string ' , re . compile ( r " ' ' ' (.| \ n)*? ' ' ' " , re . M ) ) ,
( ' comment ' , re . compile ( r ' #.* ' ) ) ,
( ' lparen ' , re . compile ( r ' \ ( ' ) ) ,
( ' rparen ' , re . compile ( r ' \ ) ' ) ) ,
( ' lbracket ' , re . compile ( r ' \ [ ' ) ) ,
( ' rbracket ' , re . compile ( r ' \ ] ' ) ) ,
( ' lcurl ' , re . compile ( r ' \ { ' ) ) ,
( ' rcurl ' , re . compile ( r ' \ } ' ) ) ,
( ' dblquote ' , re . compile ( r ' " ' ) ) ,
( ' string ' , re . compile ( r " ' ([^ ' \\ ]|( \\ .))* ' " ) ) ,
( ' comma ' , re . compile ( r ' , ' ) ) ,
( ' plusassign ' , re . compile ( r ' \ += ' ) ) ,
( ' dot ' , re . compile ( r ' \ . ' ) ) ,
( ' plus ' , re . compile ( r ' \ + ' ) ) ,
( ' dash ' , re . compile ( r ' - ' ) ) ,
( ' star ' , re . compile ( r ' \ * ' ) ) ,
( ' percent ' , re . compile ( r ' % ' ) ) ,
( ' fslash ' , re . compile ( r ' / ' ) ) ,
( ' colon ' , re . compile ( r ' : ' ) ) ,
( ' equal ' , re . compile ( r ' == ' ) ) ,
( ' nequal ' , re . compile ( r ' != ' ) ) ,
( ' assign ' , re . compile ( r ' = ' ) ) ,
( ' le ' , re . compile ( r ' <= ' ) ) ,
( ' lt ' , re . compile ( r ' < ' ) ) ,
( ' ge ' , re . compile ( r ' >= ' ) ) ,
( ' gt ' , re . compile ( r ' > ' ) ) ,
( ' questionmark ' , re . compile ( r ' \ ? ' ) ) ,
]
def getline ( self , line_start : int ) - > str :
return self . code [ line_start : self . code . find ( ' \n ' , line_start ) ]
def lex ( self , filename : str ) - > T . Generator [ Token , None , None ] :
line_start = 0
lineno = 1
loc = 0
par_count = 0
bracket_count = 0
curl_count = 0
col = 0
while loc < len ( self . code ) :
matched = False
value : str = ' '
for ( tid , reg ) in self . token_specification :
mo = reg . match ( self . code , loc )
if mo :
curline = lineno
curline_start = line_start
col = mo . start ( ) - line_start
matched = True
span_start = loc
loc = mo . end ( )
span_end = loc
bytespan = ( span_start , span_end )
value = mo . group ( )
if tid == ' lparen ' :
par_count + = 1
elif tid == ' rparen ' :
par_count - = 1
elif tid == ' lbracket ' :
bracket_count + = 1
elif tid == ' rbracket ' :
bracket_count - = 1
elif tid == ' lcurl ' :
curl_count + = 1
elif tid == ' rcurl ' :
curl_count - = 1
elif tid == ' dblquote ' :
raise ParseException ( ' Double quotes are not supported. Use single quotes. ' , self . getline ( line_start ) , lineno , col )
elif tid in { ' string ' , ' fstring ' } :
if value . find ( " \n " ) != - 1 :
msg = ( " Newline character in a string detected, use ' ' ' (three single quotes) "
" for multiline strings instead. \n "
" This will become a hard error in a future Meson release. " )
mlog . warning ( mlog . code_line ( msg , self . getline ( line_start ) , col ) , location = BaseNode ( lineno , col , filename ) )
value = value [ 2 if tid == ' fstring ' else 1 : - 1 ]
elif tid in { ' multiline_string ' , ' multiline_fstring ' } :
value = value [ 4 if tid == ' multiline_fstring ' else 3 : - 3 ]
lines = value . split ( ' \n ' )
if len ( lines ) > 1 :
lineno + = len ( lines ) - 1
line_start = mo . end ( ) - len ( lines [ - 1 ] )
elif tid == ' eol_cont ' :
lineno + = 1
line_start = loc
tid = ' whitespace '
elif tid == ' eol ' :
lineno + = 1
line_start = loc
if par_count > 0 or bracket_count > 0 or curl_count > 0 :
tid = ' whitespace '
elif tid == ' id ' :
if value in self . keywords :
tid = value
else :
if value in self . future_keywords :
mlog . warning ( f " Identifier ' { value } ' will become a reserved keyword in a future release. Please rename it. " ,
location = BaseNode ( lineno , col , filename ) )
yield Token ( tid , filename , curline_start , curline , col , bytespan , value )
break
if not matched :
raise ParseException ( ' lexer ' , self . getline ( line_start ) , lineno , col )
@dataclass
class BaseNode :
lineno : int
colno : int
filename : str = field ( hash = False )
end_lineno : int = field ( hash = False )
end_colno : int = field ( hash = False )
whitespaces : T . Optional [ WhitespaceNode ] = field ( hash = False )
def __init__ ( self , lineno : int , colno : int , filename : str ,
end_lineno : T . Optional [ int ] = None , end_colno : T . Optional [ int ] = None ) - > None :
self . lineno = lineno
self . colno = colno
self . filename = filename
self . end_lineno = end_lineno if end_lineno is not None else lineno
self . end_colno = end_colno if end_colno is not None else colno
self . whitespaces = None
# Attributes for the visitors
self . level = 0
self . ast_id = ' '
self . condition_level = 0
def accept ( self , visitor : ' AstVisitor ' ) - > None :
fname = ' visit_ {} ' . format ( type ( self ) . __name__ )
if hasattr ( visitor , fname ) :
func = getattr ( visitor , fname )
if callable ( func ) :
func ( self )
def append_whitespaces ( self , token : Token ) - > None :
if self . whitespaces is None :
self . whitespaces = WhitespaceNode ( token )
else :
self . whitespaces . append ( token )
@dataclass ( unsafe_hash = True )
class WhitespaceNode ( BaseNode ) :
value : str
def __init__ ( self , token : Token [ str ] ) :
super ( ) . __init__ ( token . lineno , token . colno , token . filename )
self . value = ' '
self . append ( token )
def append ( self , token : Token [ str ] ) - > None :
self . value + = token . value
@dataclass ( unsafe_hash = True )
class ElementaryNode ( T . Generic [ TV_TokenTypes ] , BaseNode ) :
value : TV_TokenTypes
bytespan : T . Tuple [ int , int ] = field ( hash = False )
def __init__ ( self , token : Token [ TV_TokenTypes ] ) :
super ( ) . __init__ ( token . lineno , token . colno , token . filename )
self . value = token . value
self . bytespan = token . bytespan
class BooleanNode ( ElementaryNode [ bool ] ) :
pass
class IdNode ( ElementaryNode [ str ] ) :
pass
@dataclass ( unsafe_hash = True )
class NumberNode ( ElementaryNode [ int ] ) :
raw_value : str = field ( hash = False )
def __init__ ( self , token : Token [ str ] ) :
BaseNode . __init__ ( self , token . lineno , token . colno , token . filename )
self . raw_value = token . value
self . value = int ( token . value , base = 0 )
self . bytespan = token . bytespan
class BaseStringNode ( ElementaryNode [ str ] ) :
pass
@dataclass ( unsafe_hash = True )
class StringNode ( BaseStringNode ) :
raw_value : str = field ( hash = False )
def __init__ ( self , token : Token [ str ] , escape : bool = True ) :
super ( ) . __init__ ( token )
self . value = ESCAPE_SEQUENCE_SINGLE_RE . sub ( decode_match , token . value ) if escape else token . value
self . raw_value = token . value
class FormatStringNode ( StringNode ) :
pass
@dataclass ( unsafe_hash = True )
class MultilineStringNode ( BaseStringNode ) :
def __init__ ( self , token : Token [ str ] ) :
super ( ) . __init__ ( token )
self . value = token . value
class MultilineFormatStringNode ( MultilineStringNode ) :
pass
class ContinueNode ( ElementaryNode ) :
pass
class BreakNode ( ElementaryNode ) :
pass
class SymbolNode ( ElementaryNode [ str ] ) :
pass
@dataclass ( unsafe_hash = True )
class ArgumentNode ( BaseNode ) :
arguments : T . List [ BaseNode ] = field ( hash = False )
commas : T . List [ SymbolNode ] = field ( hash = False )
columns : T . List [ SymbolNode ] = field ( hash = False )
kwargs : T . Dict [ BaseNode , BaseNode ] = field ( hash = False )
def __init__ ( self , token : Token [ TV_TokenTypes ] ) :
super ( ) . __init__ ( token . lineno , token . colno , token . filename )
self . arguments = [ ]
self . commas = [ ]
self . columns = [ ]
self . kwargs = { }
self . order_error = False
def prepend ( self , statement : BaseNode ) - > None :
if self . num_kwargs ( ) > 0 :
self . order_error = True
if not isinstance ( statement , EmptyNode ) :
self . arguments = [ statement ] + self . arguments
def append ( self , statement : BaseNode ) - > None :
if self . num_kwargs ( ) > 0 :
self . order_error = True
if not isinstance ( statement , EmptyNode ) :
self . arguments + = [ statement ]
def set_kwarg ( self , name : IdNode , value : BaseNode ) - > None :
if any ( ( isinstance ( x , IdNode ) and name . value == x . value ) for x in self . kwargs ) :
mlog . warning ( f ' Keyword argument " { name . value } " defined multiple times. ' , location = self )
mlog . warning ( ' This will be an error in future Meson releases. ' )
self . kwargs [ name ] = value
def set_kwarg_no_check ( self , name : BaseNode , value : BaseNode ) - > None :
self . kwargs [ name ] = value
def num_args ( self ) - > int :
return len ( self . arguments )
def num_kwargs ( self ) - > int :
return len ( self . kwargs )
def incorrect_order ( self ) - > bool :
return self . order_error
def __len__ ( self ) - > int :
return self . num_args ( ) # Fixme
@dataclass ( unsafe_hash = True )
class ArrayNode ( BaseNode ) :
lbracket : SymbolNode
args : ArgumentNode
rbracket : SymbolNode
def __init__ ( self , lbracket : SymbolNode , args : ArgumentNode , rbracket : SymbolNode ) :
super ( ) . __init__ ( lbracket . lineno , lbracket . colno , args . filename , end_lineno = rbracket . lineno , end_colno = rbracket . colno + 1 )
self . lbracket = lbracket
self . args = args
self . rbracket = rbracket
@dataclass ( unsafe_hash = True )
class DictNode ( BaseNode ) :
lcurl : SymbolNode
args : ArgumentNode
rcurl : SymbolNode
def __init__ ( self , lcurl : SymbolNode , args : ArgumentNode , rcurl : SymbolNode ) :
super ( ) . __init__ ( lcurl . lineno , lcurl . colno , args . filename , end_lineno = rcurl . lineno , end_colno = rcurl . colno + 1 )
self . lcurl = lcurl
self . args = args
self . rcurl = rcurl
class EmptyNode ( BaseNode ) :
pass
@dataclass ( unsafe_hash = True )
class BinaryOperatorNode ( BaseNode ) :
left : BaseNode
operator : SymbolNode
right : BaseNode
def __init__ ( self , left : BaseNode , operator : SymbolNode , right : BaseNode ) :
super ( ) . __init__ ( left . lineno , left . colno , left . filename )
self . left = left
self . operator = operator
self . right = right
class OrNode ( BinaryOperatorNode ) :
pass
class AndNode ( BinaryOperatorNode ) :
pass
@dataclass ( unsafe_hash = True )
class ComparisonNode ( BinaryOperatorNode ) :
ctype : COMPARISONS
def __init__ ( self , ctype : COMPARISONS , left : BaseNode , operator : SymbolNode , right : BaseNode ) :
super ( ) . __init__ ( left , operator , right )
self . ctype = ctype
@dataclass ( unsafe_hash = True )
class ArithmeticNode ( BinaryOperatorNode ) :
# TODO: use a Literal for operation
operation : str
def __init__ ( self , operation : str , left : BaseNode , operator : SymbolNode , right : BaseNode ) :
super ( ) . __init__ ( left , operator , right )
self . operation = operation
@dataclass ( unsafe_hash = True )
class UnaryOperatorNode ( BaseNode ) :
operator : SymbolNode
value : BaseNode
def __init__ ( self , token : Token [ TV_TokenTypes ] , operator : SymbolNode , value : BaseNode ) :
super ( ) . __init__ ( token . lineno , token . colno , token . filename )
self . operator = operator
self . value = value
class NotNode ( UnaryOperatorNode ) :
pass
class UMinusNode ( UnaryOperatorNode ) :
pass
@dataclass ( unsafe_hash = True )
class CodeBlockNode ( BaseNode ) :
pre_whitespaces : T . Optional [ WhitespaceNode ] = field ( hash = False )
lines : T . List [ BaseNode ] = field ( hash = False )
def __init__ ( self , token : Token [ TV_TokenTypes ] ) :
super ( ) . __init__ ( token . lineno , token . colno , token . filename )
self . pre_whitespaces = None
self . lines = [ ]
def append_whitespaces ( self , token : Token ) - > None :
if self . lines :
self . lines [ - 1 ] . append_whitespaces ( token )
elif self . pre_whitespaces is None :
self . pre_whitespaces = WhitespaceNode ( token )
else :
self . pre_whitespaces . append ( token )
@dataclass ( unsafe_hash = True )
class IndexNode ( BaseNode ) :
iobject : BaseNode
lbracket : SymbolNode
index : BaseNode
rbracket : SymbolNode
def __init__ ( self , iobject : BaseNode , lbracket : SymbolNode , index : BaseNode , rbracket : SymbolNode ) :
super ( ) . __init__ ( iobject . lineno , iobject . colno , iobject . filename )
self . iobject = iobject
self . lbracket = lbracket
self . index = index
self . rbracket = rbracket
@dataclass ( unsafe_hash = True )
class MethodNode ( BaseNode ) :
source_object : BaseNode
dot : SymbolNode
name : IdNode
lpar : SymbolNode
args : ArgumentNode
rpar : SymbolNode
def __init__ ( self , source_object : BaseNode , dot : SymbolNode , name : IdNode , lpar : SymbolNode , args : ArgumentNode , rpar : SymbolNode ) :
super ( ) . __init__ ( name . lineno , name . colno , name . filename , end_lineno = rpar . lineno , end_colno = rpar . colno + 1 )
self . source_object = source_object
self . dot = dot
self . name = name
self . lpar = lpar
self . args = args
self . rpar = rpar
@dataclass ( unsafe_hash = True )
class FunctionNode ( BaseNode ) :
func_name : IdNode
lpar : SymbolNode
args : ArgumentNode
rpar : SymbolNode
def __init__ ( self , func_name : IdNode , lpar : SymbolNode , args : ArgumentNode , rpar : SymbolNode ) :
super ( ) . __init__ ( func_name . lineno , func_name . colno , func_name . filename , end_lineno = rpar . end_lineno , end_colno = rpar . end_colno + 1 )
self . func_name = func_name
self . lpar = lpar
self . args = args
self . rpar = rpar
@dataclass ( unsafe_hash = True )
class AssignmentNode ( BaseNode ) :
var_name : IdNode
operator : SymbolNode
value : BaseNode
def __init__ ( self , var_name : IdNode , operator : SymbolNode , value : BaseNode ) :
super ( ) . __init__ ( var_name . lineno , var_name . colno , var_name . filename )
self . var_name = var_name
self . operator = operator
self . value = value
class PlusAssignmentNode ( AssignmentNode ) :
pass
@dataclass ( unsafe_hash = True )
class ForeachClauseNode ( BaseNode ) :
foreach_ : SymbolNode = field ( hash = False )
varnames : T . List [ IdNode ] = field ( hash = False )
commas : T . List [ SymbolNode ] = field ( hash = False )
column : SymbolNode = field ( hash = False )
items : BaseNode
block : CodeBlockNode
endforeach : SymbolNode = field ( hash = False )
def __init__ ( self , foreach_ : SymbolNode , varnames : T . List [ IdNode ] , commas : T . List [ SymbolNode ] , column : SymbolNode , items : BaseNode , block : CodeBlockNode , endforeach : SymbolNode ) :
super ( ) . __init__ ( foreach_ . lineno , foreach_ . colno , foreach_ . filename )
self . foreach_ = foreach_
self . varnames = varnames
self . commas = commas
self . column = column
self . items = items
self . block = block
self . endforeach = endforeach
@dataclass ( unsafe_hash = True )
class IfNode ( BaseNode ) :
if_ : SymbolNode
condition : BaseNode
block : CodeBlockNode
def __init__ ( self , linenode : BaseNode , if_node : SymbolNode , condition : BaseNode , block : CodeBlockNode ) :
super ( ) . __init__ ( linenode . lineno , linenode . colno , linenode . filename )
self . if_ = if_node
self . condition = condition
self . block = block
@dataclass ( unsafe_hash = True )
class ElseNode ( BaseNode ) :
else_ : SymbolNode
block : CodeBlockNode
def __init__ ( self , else_ : SymbolNode , block : CodeBlockNode ) :
super ( ) . __init__ ( block . lineno , block . colno , block . filename )
self . else_ = else_
self . block = block
@dataclass ( unsafe_hash = True )
class IfClauseNode ( BaseNode ) :
ifs : T . List [ IfNode ] = field ( hash = False )
elseblock : T . Union [ EmptyNode , ElseNode ]
endif : SymbolNode
def __init__ ( self , linenode : BaseNode ) :
super ( ) . __init__ ( linenode . lineno , linenode . colno , linenode . filename )
self . ifs = [ ]
self . elseblock = EmptyNode ( linenode . lineno , linenode . colno , linenode . filename )
self . endif = None
@dataclass ( unsafe_hash = True )
class TestCaseClauseNode ( BaseNode ) :
testcase : SymbolNode
condition : BaseNode
block : CodeBlockNode
endtestcase : SymbolNode
def __init__ ( self , testcase : SymbolNode , condition : BaseNode , block : CodeBlockNode , endtestcase : SymbolNode ) :
super ( ) . __init__ ( condition . lineno , condition . colno , condition . filename )
self . testcase = testcase
self . condition = condition
self . block = block
self . endtestcase = endtestcase
@dataclass ( unsafe_hash = True )
class TernaryNode ( BaseNode ) :
condition : BaseNode
questionmark : SymbolNode
trueblock : BaseNode
column : SymbolNode
falseblock : BaseNode
def __init__ ( self , condition : BaseNode , questionmark : SymbolNode , trueblock : BaseNode , column : SymbolNode , falseblock : BaseNode ) :
super ( ) . __init__ ( condition . lineno , condition . colno , condition . filename )
self . condition = condition
self . questionmark = questionmark
self . trueblock = trueblock
self . column = column
self . falseblock = falseblock
@dataclass ( unsafe_hash = True )
class ParenthesizedNode ( BaseNode ) :
lpar : SymbolNode = field ( hash = False )
inner : BaseNode
rpar : SymbolNode = field ( hash = False )
def __init__ ( self , lpar : SymbolNode , inner : BaseNode , rpar : SymbolNode ) :
super ( ) . __init__ ( lpar . lineno , lpar . colno , inner . filename , end_lineno = rpar . lineno , end_colno = rpar . colno + 1 )
self . lpar = lpar
self . inner = inner
self . rpar = rpar
if T . TYPE_CHECKING :
COMPARISONS = Literal [ ' == ' , ' != ' , ' < ' , ' <= ' , ' >= ' , ' > ' , ' in ' , ' notin ' ]
comparison_map : T . Mapping [ str , COMPARISONS ] = {
' equal ' : ' == ' ,
' nequal ' : ' != ' ,
' lt ' : ' < ' ,
' le ' : ' <= ' ,
' gt ' : ' > ' ,
' ge ' : ' >= ' ,
' in ' : ' in ' ,
' not in ' : ' notin ' ,
}
# Recursive descent parser for Meson's definition language.
# Very basic apart from the fact that we have many precedence
# levels so there are not enough words to describe them all.
# Enter numbering:
#
# 1 assignment
# 2 or
# 3 and
# 4 comparison
# 5 arithmetic
# 6 negation
# 7 funcall, method call
# 8 parentheses
# 9 plain token
class Parser :
def __init__ ( self , code : str , filename : str ) :
self . lexer = Lexer ( code )
self . stream = self . lexer . lex ( filename )
self . current : Token = Token ( ' eof ' , ' ' , 0 , 0 , 0 , ( 0 , 0 ) , None )
self . previous = self . current
self . current_ws : T . List [ Token ] = [ ]
self . getsym ( )
self . in_ternary = False
def create_node ( self , node_type : T . Type [ BaseNodeT ] , * args : T . Any , * * kwargs : T . Any ) - > BaseNodeT :
node = node_type ( * args , * * kwargs )
for ws_token in self . current_ws :
node . append_whitespaces ( ws_token )
self . current_ws = [ ]
return node
def getsym ( self ) - > None :
self . previous = self . current
try :
self . current = next ( self . stream )
while self . current . tid in { ' eol ' , ' comment ' , ' whitespace ' } :
self . current_ws . append ( self . current )
if self . current . tid == ' eol ' :
break
self . current = next ( self . stream )
except StopIteration :
self . current = Token ( ' eof ' , ' ' , self . current . line_start , self . current . lineno , self . current . colno + self . current . bytespan [ 1 ] - self . current . bytespan [ 0 ] , ( 0 , 0 ) , None )
def getline ( self ) - > str :
return self . lexer . getline ( self . current . line_start )
def accept ( self , s : str ) - > bool :
if self . current . tid == s :
self . getsym ( )
return True
return False
typing: fix some broken Sequence annotations
T.Sequence is a questionable concept. The idea is to hammer out generic,
maximally forgiving APIs that operate on protocols, which is a fancy way
of saying "I don't care if you use tuples or lists". This is rarely
needed, actually, and in exchange for this fancy behavior you get free
bugs.
Specifically, `somestr` is of type `T.Sequence[str]`, and also
`somestr[0]` is another string of type you guessed it. It's ~~turtles~~
strings all the way down.
It's worth noting that trying to code for "protocols" is a broken
concept if the contents have semantic meaning, e.g. it operates on
"the install tags of this object" rather than "an iterable that supports
efficient element access".
The other way to use T.Sequence is "I don't like that T.List is
invariant, but also I don't like that T.Tuple makes you specify exact
ordering". This sort of works. In fact it probably does work as long as
you don't allow str in your sequences, which of course everyone allows
anyway.
Use of Sequence has cute side effects, such as actually passing lists
around, knowing that you are going to get a list and knowing that you
need to pass it on as a list, and then having to re-allocate as
`list(mylist)` "because the type annotations says it could be a str or
tuple".
Except it cannot be a str, because if it is then the application is
fatally flawed and logic errors occur to disastrous end user effects,
and the type annotations:
- do not enforce their promises of annotating types
- fail to live up to "minimal runtime penalties" due to all the `list()`
Shun this broken concept, by hardening the type annotations. As it turns
out, we do not actually need any of this covariance or protocol-ism for
a list of strings! The whole attempt was a slow, buggy waste of time.
2 years ago
def accept_any ( self , tids : T . Tuple [ str , . . . ] ) - > str :
tid = self . current . tid
if tid in tids :
self . getsym ( )
return tid
return ' '
def expect ( self , s : str ) - > bool :
if self . accept ( s ) :
return True
raise ParseException ( f ' Expecting { s } got { self . current . tid } . ' , self . getline ( ) , self . current . lineno , self . current . colno )
def block_expect ( self , s : str , block_start : Token ) - > bool :
if self . accept ( s ) :
return True
raise BlockParseException ( f ' Expecting { s } got { self . current . tid } . ' , self . getline ( ) , self . current . lineno , self . current . colno , self . lexer . getline ( block_start . line_start ) , block_start . lineno , block_start . colno )
def parse ( self ) - > CodeBlockNode :
block = self . codeblock ( )
try :
self . expect ( ' eof ' )
except ParseException as e :
e . ast = block
raise
return block
def statement ( self ) - > BaseNode :
return self . e1 ( )
def e1 ( self ) - > BaseNode :
left = self . e2 ( )
if self . accept ( ' plusassign ' ) :
operator = self . create_node ( SymbolNode , self . previous )
value = self . e1 ( )
if not isinstance ( left , IdNode ) :
raise ParseException ( ' Plusassignment target must be an id. ' , self . getline ( ) , left . lineno , left . colno )
assert isinstance ( left . value , str )
return self . create_node ( PlusAssignmentNode , left , operator , value )
elif self . accept ( ' assign ' ) :
operator = self . create_node ( SymbolNode , self . previous )
value = self . e1 ( )
if not isinstance ( left , IdNode ) :
raise ParseException ( ' Assignment target must be an id. ' ,
self . getline ( ) , left . lineno , left . colno )
assert isinstance ( left . value , str )
return self . create_node ( AssignmentNode , left , operator , value )
elif self . accept ( ' questionmark ' ) :
if self . in_ternary :
raise ParseException ( ' Nested ternary operators are not allowed. ' ,
self . getline ( ) , left . lineno , left . colno )
qm_node = self . create_node ( SymbolNode , self . previous )
self . in_ternary = True
trueblock = self . e1 ( )
self . expect ( ' colon ' )
column_node = self . create_node ( SymbolNode , self . previous )
falseblock = self . e1 ( )
self . in_ternary = False
return self . create_node ( TernaryNode , left , qm_node , trueblock , column_node , falseblock )
return left
def e2 ( self ) - > BaseNode :
left = self . e3 ( )
while self . accept ( ' or ' ) :
operator = self . create_node ( SymbolNode , self . previous )
if isinstance ( left , EmptyNode ) :
raise ParseException ( ' Invalid or clause. ' ,
self . getline ( ) , left . lineno , left . colno )
left = self . create_node ( OrNode , left , operator , self . e3 ( ) )
return left
def e3 ( self ) - > BaseNode :
left = self . e4 ( )
while self . accept ( ' and ' ) :
operator = self . create_node ( SymbolNode , self . previous )
if isinstance ( left , EmptyNode ) :
raise ParseException ( ' Invalid and clause. ' ,
self . getline ( ) , left . lineno , left . colno )
left = self . create_node ( AndNode , left , operator , self . e4 ( ) )
return left
def e4 ( self ) - > BaseNode :
left = self . e5 ( )
for nodename , operator_type in comparison_map . items ( ) :
if self . accept ( nodename ) :
operator = self . create_node ( SymbolNode , self . previous )
return self . create_node ( ComparisonNode , operator_type , left , operator , self . e5 ( ) )
if self . accept ( ' not ' ) :
ws = self . current_ws . copy ( )
not_token = self . previous
if self . accept ( ' in ' ) :
in_token = self . previous
self . current_ws = self . current_ws [ len ( ws ) : ] # remove whitespaces between not and in
temp_node = EmptyNode ( in_token . lineno , in_token . colno , in_token . filename )
for w in ws :
temp_node . append_whitespaces ( w )
not_token . bytespan = ( not_token . bytespan [ 0 ] , in_token . bytespan [ 1 ] )
not_token . value + = temp_node . whitespaces . value + in_token . value
operator = self . create_node ( SymbolNode , not_token )
return self . create_node ( ComparisonNode , ' notin ' , left , operator , self . e5 ( ) )
return left
def e5 ( self ) - > BaseNode :
return self . e5addsub ( )
def e5addsub ( self ) - > BaseNode :
op_map = {
' plus ' : ' add ' ,
' dash ' : ' sub ' ,
}
left = self . e5muldiv ( )
while True :
op = self . accept_any ( tuple ( op_map . keys ( ) ) )
if op :
operator = self . create_node ( SymbolNode , self . previous )
left = self . create_node ( ArithmeticNode , op_map [ op ] , left , operator , self . e5muldiv ( ) )
else :
break
return left
def e5muldiv ( self ) - > BaseNode :
op_map = {
' percent ' : ' mod ' ,
' star ' : ' mul ' ,
' fslash ' : ' div ' ,
}
left = self . e6 ( )
while True :
op = self . accept_any ( tuple ( op_map . keys ( ) ) )
if op :
operator = self . create_node ( SymbolNode , self . previous )
left = self . create_node ( ArithmeticNode , op_map [ op ] , left , operator , self . e6 ( ) )
else :
break
return left
def e6 ( self ) - > BaseNode :
if self . accept ( ' not ' ) :
operator = self . create_node ( SymbolNode , self . previous )
return self . create_node ( NotNode , self . current , operator , self . e7 ( ) )
if self . accept ( ' dash ' ) :
operator = self . create_node ( SymbolNode , self . previous )
return self . create_node ( UMinusNode , self . current , operator , self . e7 ( ) )
return self . e7 ( )
def e7 ( self ) - > BaseNode :
left = self . e8 ( )
block_start = self . current
if self . accept ( ' lparen ' ) :
lpar = self . create_node ( SymbolNode , block_start )
args = self . args ( )
self . block_expect ( ' rparen ' , block_start )
rpar = self . create_node ( SymbolNode , self . previous )
if not isinstance ( left , IdNode ) :
raise ParseException ( ' Function call must be applied to plain id ' ,
self . getline ( ) , left . lineno , left . colno )
assert isinstance ( left . value , str )
left = self . create_node ( FunctionNode , left , lpar , args , rpar )
go_again = True
while go_again :
go_again = False
if self . accept ( ' dot ' ) :
go_again = True
left = self . method_call ( left )
if self . accept ( ' lbracket ' ) :
go_again = True
left = self . index_call ( left )
return left
def e8 ( self ) - > BaseNode :
block_start = self . current
if self . accept ( ' lparen ' ) :
lpar = self . create_node ( SymbolNode , block_start )
e = self . statement ( )
self . block_expect ( ' rparen ' , block_start )
rpar = self . create_node ( SymbolNode , self . previous )
return ParenthesizedNode ( lpar , e , rpar )
elif self . accept ( ' lbracket ' ) :
lbracket = self . create_node ( SymbolNode , block_start )
args = self . args ( )
self . block_expect ( ' rbracket ' , block_start )
rbracket = self . create_node ( SymbolNode , self . previous )
return self . create_node ( ArrayNode , lbracket , args , rbracket )
elif self . accept ( ' lcurl ' ) :
lcurl = self . create_node ( SymbolNode , block_start )
key_values = self . key_values ( )
self . block_expect ( ' rcurl ' , block_start )
rcurl = self . create_node ( SymbolNode , self . previous )
return self . create_node ( DictNode , lcurl , key_values , rcurl )
else :
return self . e9 ( )
def e9 ( self ) - > BaseNode :
t = self . current
if self . accept ( ' true ' ) :
t . value = True
return self . create_node ( BooleanNode , t )
if self . accept ( ' false ' ) :
t . value = False
return self . create_node ( BooleanNode , t )
if self . accept ( ' id ' ) :
return self . create_node ( IdNode , t )
if self . accept ( ' number ' ) :
return self . create_node ( NumberNode , t )
if self . accept ( ' string ' ) :
return self . create_node ( StringNode , t )
if self . accept ( ' fstring ' ) :
return self . create_node ( FormatStringNode , t )
if self . accept ( ' multiline_string ' ) :
return self . create_node ( MultilineStringNode , t )
if self . accept ( ' multiline_fstring ' ) :
return self . create_node ( MultilineFormatStringNode , t )
return EmptyNode ( self . current . lineno , self . current . colno , self . current . filename )
def key_values ( self ) - > ArgumentNode :
s = self . statement ( )
a = self . create_node ( ArgumentNode , self . current )
while not isinstance ( s , EmptyNode ) :
if self . accept ( ' colon ' ) :
a . columns . append ( self . create_node ( SymbolNode , self . previous ) )
a . set_kwarg_no_check ( s , self . statement ( ) )
if not self . accept ( ' comma ' ) :
return a
a . commas . append ( self . create_node ( SymbolNode , self . previous ) )
else :
raise ParseException ( ' Only key:value pairs are valid in dict construction. ' ,
self . getline ( ) , s . lineno , s . colno )
s = self . statement ( )
return a
def args ( self ) - > ArgumentNode :
s = self . statement ( )
a = self . create_node ( ArgumentNode , self . current )
while not isinstance ( s , EmptyNode ) :
if self . accept ( ' comma ' ) :
a . commas . append ( self . create_node ( SymbolNode , self . previous ) )
a . append ( s )
elif self . accept ( ' colon ' ) :
a . columns . append ( self . create_node ( SymbolNode , self . previous ) )
if not isinstance ( s , IdNode ) :
raise ParseException ( ' Dictionary key must be a plain identifier. ' ,
self . getline ( ) , s . lineno , s . colno )
a . set_kwarg ( s , self . statement ( ) )
if not self . accept ( ' comma ' ) :
return a
a . commas . append ( self . create_node ( SymbolNode , self . previous ) )
else :
a . append ( s )
return a
s = self . statement ( )
return a
def method_call ( self , source_object : BaseNode ) - > MethodNode :
dot = self . create_node ( SymbolNode , self . previous )
methodname = self . e9 ( )
if not isinstance ( methodname , IdNode ) :
if isinstance ( source_object , NumberNode ) and isinstance ( methodname , NumberNode ) :
raise ParseException ( ' meson does not support float numbers ' ,
self . getline ( ) , source_object . lineno , source_object . colno )
raise ParseException ( ' Method name must be plain id ' ,
self . getline ( ) , self . current . lineno , self . current . colno )
assert isinstance ( methodname . value , str )
self . expect ( ' lparen ' )
lpar = self . create_node ( SymbolNode , self . previous )
args = self . args ( )
rpar = self . create_node ( SymbolNode , self . current )
self . expect ( ' rparen ' )
method = self . create_node ( MethodNode , source_object , dot , methodname , lpar , args , rpar )
if self . accept ( ' dot ' ) :
return self . method_call ( method )
return method
def index_call ( self , source_object : BaseNode ) - > IndexNode :
lbracket = self . create_node ( SymbolNode , self . previous )
index_statement = self . statement ( )
self . expect ( ' rbracket ' )
rbracket = self . create_node ( SymbolNode , self . previous )
return self . create_node ( IndexNode , source_object , lbracket , index_statement , rbracket )
def foreachblock ( self ) - > ForeachClauseNode :
foreach_ = self . create_node ( SymbolNode , self . previous )
self . expect ( ' id ' )
assert isinstance ( self . previous . value , str )
varnames = [ self . create_node ( IdNode , self . previous ) ]
commas = [ ]
if self . accept ( ' comma ' ) :
commas . append ( self . create_node ( SymbolNode , self . previous ) )
self . expect ( ' id ' )
assert isinstance ( self . previous . value , str )
varnames . append ( self . create_node ( IdNode , self . previous ) )
self . expect ( ' colon ' )
column = self . create_node ( SymbolNode , self . previous )
items = self . statement ( )
block = self . codeblock ( )
endforeach = self . create_node ( SymbolNode , self . current )
return self . create_node ( ForeachClauseNode , foreach_ , varnames , commas , column , items , block , endforeach )
def ifblock ( self ) - > IfClauseNode :
if_node = self . create_node ( SymbolNode , self . previous )
condition = self . statement ( )
clause = self . create_node ( IfClauseNode , condition )
self . expect ( ' eol ' )
block = self . codeblock ( )
clause . ifs . append ( self . create_node ( IfNode , clause , if_node , condition , block ) )
self . elseifblock ( clause )
clause . elseblock = self . elseblock ( )
clause . endif = self . create_node ( SymbolNode , self . current )
return clause
def elseifblock ( self , clause : IfClauseNode ) - > None :
while self . accept ( ' elif ' ) :
elif_ = self . create_node ( SymbolNode , self . previous )
s = self . statement ( )
self . expect ( ' eol ' )
b = self . codeblock ( )
clause . ifs . append ( self . create_node ( IfNode , s , elif_ , s , b ) )
def elseblock ( self ) - > T . Union [ ElseNode , EmptyNode ] :
if self . accept ( ' else ' ) :
else_ = self . create_node ( SymbolNode , self . previous )
self . expect ( ' eol ' )
block = self . codeblock ( )
return ElseNode ( else_ , block )
return EmptyNode ( self . current . lineno , self . current . colno , self . current . filename )
def testcaseblock ( self ) - > TestCaseClauseNode :
testcase = self . create_node ( SymbolNode , self . previous )
condition = self . statement ( )
self . expect ( ' eol ' )
block = self . codeblock ( )
endtestcase = SymbolNode ( self . current )
return self . create_node ( TestCaseClauseNode , testcase , condition , block , endtestcase )
def line ( self ) - > BaseNode :
block_start = self . current
if self . current == ' eol ' :
return EmptyNode ( self . current . lineno , self . current . colno , self . current . filename )
if self . accept ( ' if ' ) :
ifblock = self . ifblock ( )
self . block_expect ( ' endif ' , block_start )
return ifblock
if self . accept ( ' foreach ' ) :
forblock = self . foreachblock ( )
self . block_expect ( ' endforeach ' , block_start )
return forblock
if self . accept ( ' continue ' ) :
return self . create_node ( ContinueNode , self . current )
if self . accept ( ' break ' ) :
return self . create_node ( BreakNode , self . current )
if self . lexer . in_unit_test and self . accept ( ' testcase ' ) :
block = self . testcaseblock ( )
self . block_expect ( ' endtestcase ' , block_start )
return block
return self . statement ( )
def codeblock ( self ) - > CodeBlockNode :
block = self . create_node ( CodeBlockNode , self . current )
cond = True
try :
while cond :
for ws_token in self . current_ws :
block . append_whitespaces ( ws_token )
self . current_ws = [ ]
curline = self . line ( )
if not isinstance ( curline , EmptyNode ) :
block . lines . append ( curline )
cond = self . accept ( ' eol ' )
except ParseException as e :
e . ast = block
raise
# Remaining whitespaces will not be catched since there are no more nodes
for ws_token in self . current_ws :
block . append_whitespaces ( ws_token )
self . current_ws = [ ]
return block