cargo/cfg: Add a parser for the rust/cargo cfg() expressions

This uses a recursive descent parser + lexer to create an IR from cfg()
expressions, which it then converts into meson IR.
pull/11733/head
Dylan Baker 3 years ago
parent 5e59e5a9e4
commit 4017dab484
  1. 276
      mesonbuild/cargo/cfg.py
  2. 2
      run_unittests.py
  3. 125
      unittests/cargotests.py

@ -0,0 +1,276 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright © 2022-2023 Intel Corporation
"""Rust CFG parser.
Rust uses its `cfg()` format in cargo.
This may have the following functions:
- all()
- any()
- not()
And additionally is made up of `identifier [ = str]`. Where the str is optional,
so you could have examples like:
```
[target.`cfg(unix)`.dependencies]
[target.'cfg(target_arch = "x86_64")'.dependencies]
[target.'cfg(all(target_arch = "x86_64", target_arch = "x86"))'.dependencies]
```
"""
from __future__ import annotations
import dataclasses
import enum
import functools
import typing as T
from . import builder
from .. import mparser
from ..mesonlib import MesonBugException
if T.TYPE_CHECKING:
_T = T.TypeVar('_T')
_LEX_TOKEN = T.Tuple['TokenType', T.Optional[str]]
_LEX_STREAM = T.Iterable[_LEX_TOKEN]
_LEX_STREAM_AH = T.Iterator[T.Tuple[_LEX_TOKEN, T.Optional[_LEX_TOKEN]]]
class TokenType(enum.Enum):
LPAREN = enum.auto()
RPAREN = enum.auto()
STRING = enum.auto()
IDENTIFIER = enum.auto()
ALL = enum.auto()
ANY = enum.auto()
NOT = enum.auto()
COMMA = enum.auto()
EQUAL = enum.auto()
def lexer(raw: str) -> _LEX_STREAM:
"""Lex a cfg() expression.
:param raw: The raw cfg() expression
:return: An iterable of tokens
"""
buffer: T.List[str] = []
is_string: bool = False
for s in raw:
if s.isspace() or s in {')', '(', ',', '='} or (s == '"' and buffer):
val = ''.join(buffer)
buffer.clear()
if is_string:
yield (TokenType.STRING, val)
elif val == 'any':
yield (TokenType.ANY, None)
elif val == 'all':
yield (TokenType.ALL, None)
elif val == 'not':
yield (TokenType.NOT, None)
elif val:
yield (TokenType.IDENTIFIER, val)
if s == '(':
yield (TokenType.LPAREN, None)
continue
elif s == ')':
yield (TokenType.RPAREN, None)
continue
elif s == ',':
yield (TokenType.COMMA, None)
continue
elif s == '=':
yield (TokenType.EQUAL, None)
continue
elif s.isspace():
continue
if s == '"':
is_string = not is_string
else:
buffer.append(s)
if buffer:
# This should always be an identifier
yield (TokenType.IDENTIFIER, ''.join(buffer))
def lookahead(iter: T.Iterator[_T]) -> T.Iterator[T.Tuple[_T, T.Optional[_T]]]:
"""Get the current value of the iterable, and the next if possible.
:param iter: The iterable to look into
:yield: A tuple of the current value, and, if possible, the next
:return: nothing
"""
current: _T
next_: T.Optional[_T]
try:
next_ = next(iter)
except StopIteration:
# This is an empty iterator, there's nothing to look ahead to
return
while True:
current = next_
try:
next_ = next(iter)
except StopIteration:
next_ = None
yield current, next_
if next_ is None:
break
@dataclasses.dataclass
class IR:
"""Base IR node for Cargo CFG."""
filename: str
@dataclasses.dataclass
class String(IR):
value: str
@dataclasses.dataclass
class Identifier(IR):
value: str
@dataclasses.dataclass
class Equal(IR):
lhs: IR
rhs: IR
@dataclasses.dataclass
class Any(IR):
args: T.List[IR]
@dataclasses.dataclass
class All(IR):
args: T.List[IR]
@dataclasses.dataclass
class Not(IR):
value: IR
def _parse(ast: _LEX_STREAM_AH, filename: str) -> IR:
(token, value), n_stream = next(ast)
if n_stream is not None:
ntoken, _ = n_stream
else:
ntoken, _ = (None, None)
stream: T.List[_LEX_TOKEN]
if token is TokenType.IDENTIFIER:
if ntoken is TokenType.EQUAL:
return Equal(filename, Identifier(filename, value), _parse(ast, filename))
if token is TokenType.STRING:
return String(filename, value)
if token is TokenType.EQUAL:
# In this case the previous caller already has handled the equal
return _parse(ast, filename)
if token in {TokenType.ANY, TokenType.ALL}:
type_ = All if token is TokenType.ALL else Any
assert ntoken is TokenType.LPAREN
next(ast) # advance the iterator to get rid of the LPAREN
stream = []
args: T.List[IR] = []
while token is not TokenType.RPAREN:
(token, value), _ = next(ast)
if token is TokenType.COMMA:
args.append(_parse(lookahead(iter(stream)), filename))
stream.clear()
else:
stream.append((token, value))
if stream:
args.append(_parse(lookahead(iter(stream)), filename))
return type_(filename, args)
if token is TokenType.NOT:
next(ast) # advance the iterator to get rid of the LPAREN
stream = []
# Mypy can't figure out that token is overridden inside the while loop
while token is not TokenType.RPAREN: # type: ignore
(token, value), _ = next(ast)
stream.append((token, value))
return Not(filename, _parse(lookahead(iter(stream)), filename))
raise MesonBugException(f'Unhandled Cargo token: {token}')
def parse(ast: _LEX_STREAM, filename: str) -> IR:
"""Parse the tokenized list into Meson AST.
:param ast: An iterable of Tokens
:param filename: The name of the file being parsed
:return: An mparser Node to be used as a conditional
"""
ast_i: _LEX_STREAM_AH = lookahead(iter(ast))
return _parse(ast_i, filename)
@functools.singledispatch
def ir_to_meson(ir: T.Any) -> mparser.BaseNode:
raise NotImplementedError
@ir_to_meson.register
def _(ir: String) -> mparser.BaseNode:
return builder.string(ir.value, ir.filename)
@ir_to_meson.register
def _(ir: Identifier) -> mparser.BaseNode:
host_machine = builder.identifier('host_machine', ir.filename)
if ir.value == "target_arch":
return builder.method('cpu_family', host_machine)
elif ir.value in {"target_os", "target_family"}:
return builder.method('system', host_machine)
elif ir.value == "target_endian":
return builder.method('endian', host_machine)
raise MesonBugException(f"Unhandled Cargo identifier: {ir.value}")
@ir_to_meson.register
def _(ir: Equal) -> mparser.BaseNode:
return builder.equal(ir_to_meson(ir.lhs), ir_to_meson(ir.rhs))
@ir_to_meson.register
def _(ir: Not) -> mparser.BaseNode:
return builder.not_(ir_to_meson(ir.value), ir.filename)
@ir_to_meson.register
def _(ir: Any) -> mparser.BaseNode:
args = iter(reversed(ir.args))
last = next(args)
cur = builder.or_(ir_to_meson(next(args)), ir_to_meson(last))
for a in args:
cur = builder.or_(ir_to_meson(a), cur)
return cur
@ir_to_meson.register
def _(ir: All) -> mparser.BaseNode:
args = iter(reversed(ir.args))
last = next(args)
cur = builder.and_(ir_to_meson(next(args)), ir_to_meson(last))
for a in args:
cur = builder.and_(ir_to_meson(a), cur)
return cur

@ -36,7 +36,7 @@ from mesonbuild.mesonlib import python_command, setup_vsenv
import mesonbuild.modules.pkgconfig
from unittests.allplatformstests import AllPlatformTests
from unittests.cargotests import CargoVersionTest
from unittests.cargotests import CargoVersionTest, CargoCfgTest
from unittests.darwintests import DarwinTests
from unittests.failuretests import FailureTests
from unittests.linuxcrosstests import LinuxCrossArmTests, LinuxCrossMingwTests

@ -5,6 +5,8 @@ from __future__ import annotations
import unittest
import typing as T
from mesonbuild.cargo import builder, cfg
from mesonbuild.cargo.cfg import TokenType
from mesonbuild.cargo.version import convert
@ -59,3 +61,126 @@ class CargoVersionTest(unittest.TestCase):
with self.subTest():
self.assertListEqual(convert(data), expected)
class CargoCfgTest(unittest.TestCase):
def test_lex(self) -> None:
cases: T.List[T.Tuple[str, T.List[T.Tuple[TokenType, T.Optional[str]]]]] = [
('"unix"', [(TokenType.STRING, 'unix')]),
('unix', [(TokenType.IDENTIFIER, 'unix')]),
('not(unix)', [
(TokenType.NOT, None),
(TokenType.LPAREN, None),
(TokenType.IDENTIFIER, 'unix'),
(TokenType.RPAREN, None),
]),
('any(unix, windows)', [
(TokenType.ANY, None),
(TokenType.LPAREN, None),
(TokenType.IDENTIFIER, 'unix'),
(TokenType.COMMA, None),
(TokenType.IDENTIFIER, 'windows'),
(TokenType.RPAREN, None),
]),
('target_arch = "x86_64"', [
(TokenType.IDENTIFIER, 'target_arch'),
(TokenType.EQUAL, None),
(TokenType.STRING, 'x86_64'),
]),
('all(target_arch = "x86_64", unix)', [
(TokenType.ALL, None),
(TokenType.LPAREN, None),
(TokenType.IDENTIFIER, 'target_arch'),
(TokenType.EQUAL, None),
(TokenType.STRING, 'x86_64'),
(TokenType.COMMA, None),
(TokenType.IDENTIFIER, 'unix'),
(TokenType.RPAREN, None),
]),
]
for data, expected in cases:
with self.subTest():
self.assertListEqual(list(cfg.lexer(data)), expected)
def test_parse(self) -> None:
cases = [
('target_os = "windows"', cfg.Equal('', cfg.Identifier('', "target_os"), cfg.String('', "windows"))),
('target_arch = "x86"', cfg.Equal('', cfg.Identifier('', "target_arch"), cfg.String('', "x86"))),
('target_family = "unix"', cfg.Equal('', cfg.Identifier('', "target_family"), cfg.String('', "unix"))),
('any(target_arch = "x86", target_arch = "x86_64")',
cfg.Any(
'', [
cfg.Equal('', cfg.Identifier('', "target_arch"), cfg.String('', "x86")),
cfg.Equal('', cfg.Identifier('', "target_arch"), cfg.String('', "x86_64")),
])),
('all(target_arch = "x86", target_os = "linux")',
cfg.All(
'', [
cfg.Equal('', cfg.Identifier('', "target_arch"), cfg.String('', "x86")),
cfg.Equal('', cfg.Identifier('', "target_os"), cfg.String('', "linux")),
])),
('not(all(target_arch = "x86", target_os = "linux"))',
cfg.Not(
'',
cfg.All(
'', [
cfg.Equal('', cfg.Identifier('', "target_arch"), cfg.String('', "x86")),
cfg.Equal('', cfg.Identifier('', "target_os"), cfg.String('', "linux")),
]))),
]
for data, expected in cases:
with self.subTest():
self.assertEqual(cfg.parse(iter(cfg.lexer(data)), ''), expected)
def test_ir_to_meson(self) -> None:
HOST_MACHINE = builder.identifier('host_machine', '')
cases = [
('target_os = "windows"',
builder.equal(builder.method('system', HOST_MACHINE),
builder.string('windows', ''))),
('target_arch = "x86"',
builder.equal(builder.method('cpu_family', HOST_MACHINE),
builder.string('x86', ''))),
('target_family = "unix"',
builder.equal(builder.method('system', HOST_MACHINE),
builder.string('unix', ''))),
('not(target_arch = "x86")',
builder.not_(builder.equal(
builder.method('cpu_family', HOST_MACHINE),
builder.string('x86', '')), '')),
('any(target_arch = "x86", target_arch = "x86_64")',
builder.or_(
builder.equal(builder.method('cpu_family', HOST_MACHINE),
builder.string('x86', '')),
builder.equal(builder.method('cpu_family', HOST_MACHINE),
builder.string('x86_64', '')))),
('any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")',
builder.or_(
builder.equal(builder.method('cpu_family', HOST_MACHINE),
builder.string('x86', '')),
builder.or_(
builder.equal(builder.method('cpu_family', HOST_MACHINE),
builder.string('x86_64', '')),
builder.equal(builder.method('cpu_family', HOST_MACHINE),
builder.string('aarch64', ''))))),
('all(target_arch = "x86", target_arch = "x86_64")',
builder.and_(
builder.equal(builder.method('cpu_family', HOST_MACHINE),
builder.string('x86', '')),
builder.equal(builder.method('cpu_family', HOST_MACHINE),
builder.string('x86_64', '')))),
('all(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")',
builder.and_(
builder.equal(builder.method('cpu_family', HOST_MACHINE),
builder.string('x86', '')),
builder.and_(
builder.equal(builder.method('cpu_family', HOST_MACHINE),
builder.string('x86_64', '')),
builder.equal(builder.method('cpu_family', HOST_MACHINE),
builder.string('aarch64', ''))))),
]
for data, expected in cases:
with self.subTest():
value = cfg.ir_to_meson(cfg.parse(iter(cfg.lexer(data)), ''))
self.assertEqual(value, expected)

Loading…
Cancel
Save