|
|
|
@ -14,6 +14,7 @@ |
|
|
|
|
|
|
|
|
|
import enum |
|
|
|
|
import os.path |
|
|
|
|
import string |
|
|
|
|
import typing as T |
|
|
|
|
|
|
|
|
|
from .. import coredata |
|
|
|
@ -45,7 +46,131 @@ class CudaCompiler(Compiler): |
|
|
|
|
LINKER_PREFIX = '-Xlinker=' |
|
|
|
|
language = 'cuda' |
|
|
|
|
|
|
|
|
|
_universal_flags = {_Phase.COMPILER: ['-I', '-D', '-U', '-E'], _Phase.LINKER: ['-l', '-L']} # type: T.Dict[_Phase, T.List[str]] |
|
|
|
|
# NVCC flags taking no arguments. |
|
|
|
|
_FLAG_PASSTHRU_NOARGS = { |
|
|
|
|
# NVCC --long-option, NVCC -short-option CUDA Toolkit 11.2.1 Reference |
|
|
|
|
'--objdir-as-tempdir', '-objtemp', # 4.2.1.2 |
|
|
|
|
'--generate-dependency-targets', '-MP', # 4.2.1.12 |
|
|
|
|
'--allow-unsupported-compiler', '-allow-unsupported-compiler', # 4.2.1.14 |
|
|
|
|
'--link', # 4.2.2.1 |
|
|
|
|
'--lib', '-lib', # 4.2.2.2 |
|
|
|
|
'--device-link', '-dlink', # 4.2.2.3 |
|
|
|
|
'--device-c', '-dc', # 4.2.2.4 |
|
|
|
|
'--device-w', '-dw', # 4.2.2.5 |
|
|
|
|
'--cuda', '-cuda', # 4.2.2.6 |
|
|
|
|
'--compile', '-c', # 4.2.2.7 |
|
|
|
|
'--fatbin', '-fatbin', # 4.2.2.8 |
|
|
|
|
'--cubin', '-cubin', # 4.2.2.9 |
|
|
|
|
'--ptx', '-ptx', # 4.2.2.10 |
|
|
|
|
'--preprocess', '-E', # 4.2.2.11 |
|
|
|
|
'--generate-dependencies', '-M', # 4.2.2.12 |
|
|
|
|
'--generate-nonsystem-dependencies', '-MM', # 4.2.2.13 |
|
|
|
|
'--generate-dependencies-with-compile', '-MD', # 4.2.2.14 |
|
|
|
|
'--generate-nonsystem-dependencies-with-compile', '-MMD', # 4.2.2.15 |
|
|
|
|
'--run', # 4.2.2.16 |
|
|
|
|
'--profile', '-pg', # 4.2.3.1 |
|
|
|
|
'--debug', '-g', # 4.2.3.2 |
|
|
|
|
'--device-debug', '-G', # 4.2.3.3 |
|
|
|
|
'--extensible-whole-program', '-ewp', # 4.2.3.4 |
|
|
|
|
'--generate-line-info', '-lineinfo', # 4.2.3.5 |
|
|
|
|
'--dlink-time-opt', '-dlto', # 4.2.3.8 |
|
|
|
|
'--no-exceptions', '-noeh', # 4.2.3.11 |
|
|
|
|
'--shared', '-shared', # 4.2.3.12 |
|
|
|
|
'--no-host-device-initializer-list', '-nohdinitlist', # 4.2.3.15 |
|
|
|
|
'--expt-relaxed-constexpr', '-expt-relaxed-constexpr', # 4.2.3.16 |
|
|
|
|
'--extended-lambda', '-extended-lambda', # 4.2.3.17 |
|
|
|
|
'--expt-extended-lambda', '-expt-extended-lambda', # 4.2.3.18 |
|
|
|
|
'--m32', '-m32', # 4.2.3.20 |
|
|
|
|
'--m64', '-m64', # 4.2.3.21 |
|
|
|
|
'--forward-unknown-to-host-compiler', '-forward-unknown-to-host-compiler', # 4.2.5.1 |
|
|
|
|
'--forward-unknown-to-host-linker', '-forward-unknown-to-host-linker', # 4.2.5.2 |
|
|
|
|
'--dont-use-profile', '-noprof', # 4.2.5.3 |
|
|
|
|
'--dryrun', '-dryrun', # 4.2.5.5 |
|
|
|
|
'--verbose', '-v', # 4.2.5.6 |
|
|
|
|
'--keep', '-keep', # 4.2.5.7 |
|
|
|
|
'--save-temps', '-save-temps', # 4.2.5.9 |
|
|
|
|
'--clean-targets', '-clean', # 4.2.5.10 |
|
|
|
|
'--no-align-double', # 4.2.5.16 |
|
|
|
|
'--no-device-link', '-nodlink', # 4.2.5.17 |
|
|
|
|
'--allow-unsupported-compiler', '-allow-unsupported-compiler', # 4.2.5.18 |
|
|
|
|
'--use_fast_math', '-use_fast_math', # 4.2.7.7 |
|
|
|
|
'--extra-device-vectorization', '-extra-device-vectorization', # 4.2.7.12 |
|
|
|
|
'--compile-as-tools-patch', '-astoolspatch', # 4.2.7.13 |
|
|
|
|
'--keep-device-functions', '-keep-device-functions', # 4.2.7.14 |
|
|
|
|
'--disable-warnings', '-w', # 4.2.8.1 |
|
|
|
|
'--source-in-ptx', '-src-in-ptx', # 4.2.8.2 |
|
|
|
|
'--restrict', '-restrict', # 4.2.8.3 |
|
|
|
|
'--Wno-deprecated-gpu-targets', '-Wno-deprecated-gpu-targets', # 4.2.8.4 |
|
|
|
|
'--Wno-deprecated-declarations', '-Wno-deprecated-declarations', # 4.2.8.5 |
|
|
|
|
'--Wreorder', '-Wreorder', # 4.2.8.6 |
|
|
|
|
'--Wdefault-stream-launch', '-Wdefault-stream-launch', # 4.2.8.7 |
|
|
|
|
'--Wext-lambda-captures-this', '-Wext-lambda-captures-this', # 4.2.8.8 |
|
|
|
|
'--display-error-number', '-err-no', # 4.2.8.10 |
|
|
|
|
'--resource-usage', '-res-usage', # 4.2.8.14 |
|
|
|
|
'--help', '-h', # 4.2.8.15 |
|
|
|
|
'--version', '-V', # 4.2.8.16 |
|
|
|
|
'--list-gpu-code', '-code-ls', # 4.2.8.20 |
|
|
|
|
'--list-gpu-arch', '-arch-ls', # 4.2.8.21 |
|
|
|
|
} |
|
|
|
|
# Dictionary of NVCC flags taking either one argument or a comma-separated list. |
|
|
|
|
# Maps --long to -short options, because the short options are more GCC-like. |
|
|
|
|
_FLAG_LONG2SHORT_WITHARGS = { |
|
|
|
|
'--output-file': '-o', # 4.2.1.1 |
|
|
|
|
'--pre-include': '-include', # 4.2.1.3 |
|
|
|
|
'--library': '-l', # 4.2.1.4 |
|
|
|
|
'--define-macro': '-D', # 4.2.1.5 |
|
|
|
|
'--undefine-macro': '-U', # 4.2.1.6 |
|
|
|
|
'--include-path': '-I', # 4.2.1.7 |
|
|
|
|
'--system-include': '-isystem', # 4.2.1.8 |
|
|
|
|
'--library-path': '-L', # 4.2.1.9 |
|
|
|
|
'--output-directory': '-odir', # 4.2.1.10 |
|
|
|
|
'--dependency-output': '-MF', # 4.2.1.11 |
|
|
|
|
'--compiler-bindir': '-ccbin', # 4.2.1.13 |
|
|
|
|
'--archiver-binary': '-arbin', # 4.2.1.15 |
|
|
|
|
'--cudart': '-cudart', # 4.2.1.16 |
|
|
|
|
'--cudadevrt': '-cudadevrt', # 4.2.1.17 |
|
|
|
|
'--libdevice-directory': '-ldir', # 4.2.1.18 |
|
|
|
|
'--target-directory': '-target-dir', # 4.2.1.19 |
|
|
|
|
'--optimization-info': '-opt-info', # 4.2.3.6 |
|
|
|
|
'--optimize': '-O', # 4.2.3.7 |
|
|
|
|
'--ftemplate-backtrace-limit': '-ftemplate-backtrace-limit', # 4.2.3.9 |
|
|
|
|
'--ftemplate-depth': '-ftemplate-depth', # 4.2.3.10 |
|
|
|
|
'--x': '-x', # 4.2.3.13 |
|
|
|
|
'--std': '-std', # 4.2.3.14 |
|
|
|
|
'--machine': '-m', # 4.2.3.19 |
|
|
|
|
'--compiler-options': '-Xcompiler', # 4.2.4.1 |
|
|
|
|
'--linker-options': '-Xlinker', # 4.2.4.2 |
|
|
|
|
'--archive-options': '-Xarchive', # 4.2.4.3 |
|
|
|
|
'--ptxas-options': '-Xptxas', # 4.2.4.4 |
|
|
|
|
'--nvlink-options': '-Xnvlink', # 4.2.4.5 |
|
|
|
|
'--threads': '-t', # 4.2.5.4 |
|
|
|
|
'--keep-dir': '-keep-dir', # 4.2.5.8 |
|
|
|
|
'--run-args': '-run-args', # 4.2.5.11 |
|
|
|
|
'--input-drive-prefix': '-idp', # 4.2.5.12 |
|
|
|
|
'--dependency-drive-prefix': '-ddp', # 4.2.5.13 |
|
|
|
|
'--drive-prefix': '-dp', # 4.2.5.14 |
|
|
|
|
'--dependency-target-name': '-MT', # 4.2.5.15 |
|
|
|
|
'--default-stream': '-default-stream', # 4.2.6.1 |
|
|
|
|
'--gpu-architecture': '-arch', # 4.2.7.1 |
|
|
|
|
'--gpu-code': '-code', # 4.2.7.2 |
|
|
|
|
'--generate-code': '-gencode', # 4.2.7.3 |
|
|
|
|
'--relocatable-device-code': '-rdc', # 4.2.7.4 |
|
|
|
|
'--entries': '-e', # 4.2.7.5 |
|
|
|
|
'--maxrregcount': '-maxrregcount', # 4.2.7.6 |
|
|
|
|
'--ftz': '-ftz', # 4.2.7.8 |
|
|
|
|
'--prec-div': '-prec-div', # 4.2.7.9 |
|
|
|
|
'--prec-sqrt': '-prec-sqrt', # 4.2.7.10 |
|
|
|
|
'--fmad': '-fmad', # 4.2.7.11 |
|
|
|
|
'--Werror': '-Werror', # 4.2.8.9 |
|
|
|
|
'--diag-error': '-diag-error', # 4.2.8.11 |
|
|
|
|
'--diag-suppress': '-diag-suppress', # 4.2.8.12 |
|
|
|
|
'--diag-warn': '-diag-warn', # 4.2.8.13 |
|
|
|
|
'--options-file': '-optf', # 4.2.8.17 |
|
|
|
|
'--time': '-time', # 4.2.8.18 |
|
|
|
|
'--qpp-config': '-qpp-config', # 4.2.8.19 |
|
|
|
|
} |
|
|
|
|
# Reverse map -short to --long options. |
|
|
|
|
_FLAG_SHORT2LONG_WITHARGS = {v:k for k,v in _FLAG_LONG2SHORT_WITHARGS.items()} |
|
|
|
|
|
|
|
|
|
def __init__(self, exelist: T.List[str], version: str, for_machine: MachineChoice, |
|
|
|
|
is_cross: bool, exe_wrapper: T.Optional['ExternalProgram'], |
|
|
|
@ -60,23 +185,261 @@ class CudaCompiler(Compiler): |
|
|
|
|
self.warn_args = {level: self._to_host_flags(flags) for level, flags in host_compiler.warn_args.items()} |
|
|
|
|
|
|
|
|
|
@classmethod |
|
|
|
|
def _to_host_flags(cls, flags: T.List[str], phase: _Phase = _Phase.COMPILER) -> T.List[str]: |
|
|
|
|
return [cls._to_host_flag(f, phase=phase) for f in flags] |
|
|
|
|
def _shield_nvcc_list_arg(cls, arg: str, listmode: bool=True) -> str: |
|
|
|
|
""" |
|
|
|
|
Shield an argument against both splitting by NVCC's list-argument |
|
|
|
|
parse logic, and interpretation by any shell. |
|
|
|
|
|
|
|
|
|
NVCC seems to consider every comma , that is neither escaped by \ nor inside |
|
|
|
|
a double-quoted string a split-point. Single-quotes do not provide protection |
|
|
|
|
against splitting; In fact, after splitting they are \-escaped. Unfortunately, |
|
|
|
|
double-quotes don't protect against shell expansion. What follows is a |
|
|
|
|
complex dance to accomodate everybody. |
|
|
|
|
""" |
|
|
|
|
|
|
|
|
|
SQ = "'" |
|
|
|
|
DQ = '"' |
|
|
|
|
CM = "," |
|
|
|
|
BS = "\\" |
|
|
|
|
DQSQ = DQ+SQ+DQ |
|
|
|
|
quotable = set(string.whitespace+'"$`\\') |
|
|
|
|
|
|
|
|
|
if CM not in arg or not listmode: |
|
|
|
|
if SQ not in arg: |
|
|
|
|
# If any of the special characters "$`\ or whitespace are present, single-quote. |
|
|
|
|
# Otherwise return bare. |
|
|
|
|
if set(arg).intersection(quotable): |
|
|
|
|
return SQ+arg+SQ |
|
|
|
|
else: |
|
|
|
|
return arg # Easy case: no splits, no quoting. |
|
|
|
|
else: |
|
|
|
|
# There are single quotes. Double-quote them, and single-quote the |
|
|
|
|
# strings between them. |
|
|
|
|
l = [cls._shield_nvcc_list_arg(s) for s in arg.split(SQ)] |
|
|
|
|
l = sum([[s, DQSQ] for s in l][:-1], []) # Interleave l with DQSQs |
|
|
|
|
|
|
|
|
|
# The list l now has the structure of shielded strings interleaved |
|
|
|
|
# with double-quoted single-quotes. |
|
|
|
|
# |
|
|
|
|
# Plain concatenation would result in the tripling of the length of |
|
|
|
|
# a string made up only of single quotes. See if we can merge some |
|
|
|
|
# DQSQs together first. |
|
|
|
|
def isdqsq(x:str) -> bool: |
|
|
|
|
return x.startswith(SQ) and x.endswith(SQ) and x[1:-1].strip(SQ) == '' |
|
|
|
|
for i in range(1, len(l)-2, 2): |
|
|
|
|
if isdqsq(l[i]) and l[i+1] == '' and isdqsq(l[i+2]): |
|
|
|
|
l[i+2] = l[i][:-1]+l[i+2][1:] |
|
|
|
|
l[i] = '' |
|
|
|
|
|
|
|
|
|
# With DQSQs merged, simply concatenate everything together and return. |
|
|
|
|
return ''.join(l) |
|
|
|
|
else: |
|
|
|
|
# A comma is present, and list mode was active. |
|
|
|
|
# We apply (what we guess is) the (primitive) NVCC splitting rule: |
|
|
|
|
l = [''] |
|
|
|
|
instring = False |
|
|
|
|
argit = iter(arg) |
|
|
|
|
for c in argit: |
|
|
|
|
if c == CM and not instring: |
|
|
|
|
l.append('') |
|
|
|
|
elif c == DQ: |
|
|
|
|
l[-1] += c |
|
|
|
|
instring = not instring |
|
|
|
|
elif c == BS: |
|
|
|
|
try: |
|
|
|
|
l[-1] += next(argit) |
|
|
|
|
except StopIteration: |
|
|
|
|
break |
|
|
|
|
else: |
|
|
|
|
l[-1] += c |
|
|
|
|
|
|
|
|
|
# Shield individual strings, without listmode, then return them with |
|
|
|
|
# escaped commas between them. |
|
|
|
|
l = [cls._shield_nvcc_list_arg(s, listmode=False) for s in l] |
|
|
|
|
return '\,'.join(l) |
|
|
|
|
|
|
|
|
|
@classmethod |
|
|
|
|
def _to_host_flag(cls, flag: str, phase: _Phase) -> str: |
|
|
|
|
if not flag[0] in ['-', '/'] or flag[:2] in cls._universal_flags[phase]: |
|
|
|
|
return flag |
|
|
|
|
def _to_host_flags(cls, flags: T.List[str], phase: _Phase = _Phase.COMPILER) -> T.List[str]: |
|
|
|
|
""" |
|
|
|
|
Translate generic "GCC-speak" plus particular "NVCC-speak" flags to NVCC flags. |
|
|
|
|
|
|
|
|
|
NVCC's "short" flags have broad similarities to the GCC standard, but have |
|
|
|
|
gratuitous, irritating differences. |
|
|
|
|
""" |
|
|
|
|
|
|
|
|
|
xflags = [] |
|
|
|
|
flagit = iter(flags) |
|
|
|
|
|
|
|
|
|
for flag in flagit: |
|
|
|
|
# The CUDA Toolkit Documentation, in 4.1. Command Option Types and Notation, |
|
|
|
|
# specifies that NVCC does not parse the standard flags as GCC does. It has |
|
|
|
|
# its own strategy, to wit: |
|
|
|
|
# |
|
|
|
|
# nvcc recognizes three types of command options: boolean options, single |
|
|
|
|
# value options, and list options. |
|
|
|
|
# |
|
|
|
|
# Boolean options do not have an argument; they are either specified on a |
|
|
|
|
# command line or not. Single value options must be specified at most once, |
|
|
|
|
# and list options may be repeated. Examples of each of these option types |
|
|
|
|
# are, respectively: --verbose (switch to verbose mode), --output-file |
|
|
|
|
# (specify output file), and --include-path (specify include path). |
|
|
|
|
# |
|
|
|
|
# Single value options and list options must have arguments, which must |
|
|
|
|
# follow the name of the option itself by either one of more spaces or an |
|
|
|
|
# equals character. When a one-character short name such as -I, -l, and -L |
|
|
|
|
# is used, the value of the option may also immediately follow the option |
|
|
|
|
# itself without being seperated by spaces or an equal character. The |
|
|
|
|
# individual values of list options may be separated by commas in a single |
|
|
|
|
# instance of the option, or the option may be repeated, or any |
|
|
|
|
# combination of these two cases. |
|
|
|
|
# |
|
|
|
|
# One strange consequence of this choice is that directory and filenames that |
|
|
|
|
# contain commas (',') cannot be passed to NVCC (at least, not as easily as |
|
|
|
|
# in GCC). Another strange consequence is that it is legal to supply flags |
|
|
|
|
# such as |
|
|
|
|
# |
|
|
|
|
# -lpthread,rt,dl,util |
|
|
|
|
# -l pthread,rt,dl,util |
|
|
|
|
# -l=pthread,rt,dl,util |
|
|
|
|
# |
|
|
|
|
# and each of the above alternatives is equivalent to GCC-speak |
|
|
|
|
# |
|
|
|
|
# -lpthread -lrt -ldl -lutil |
|
|
|
|
# -l pthread -l rt -l dl -l util |
|
|
|
|
# -l=pthread -l=rt -l=dl -l=util |
|
|
|
|
# |
|
|
|
|
# *With the exception of commas in the name*, GCC-speak for these list flags |
|
|
|
|
# is a strict subset of NVCC-speak, so we passthrough those flags. |
|
|
|
|
# |
|
|
|
|
# The -D macro-define flag is documented as somehow shielding commas from |
|
|
|
|
# splitting a definition. Balanced parentheses, braces and single-quotes |
|
|
|
|
# around the comma are not sufficient, but balanced double-quotes are. The |
|
|
|
|
# shielding appears to work with -l, -I, -L flags as well, for instance. |
|
|
|
|
# |
|
|
|
|
# Since our goal is to replicate GCC-speak as much as possible, we check for |
|
|
|
|
# commas in all list-arguments and shield them with double-quotes. We make |
|
|
|
|
# an exception for -D (where this would be value-changing) and -U (because |
|
|
|
|
# it isn't possible to define a macro with a comma in the name). |
|
|
|
|
|
|
|
|
|
if flag in cls._FLAG_PASSTHRU_NOARGS: |
|
|
|
|
xflags.append(flag) |
|
|
|
|
continue |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Handle breakup of flag-values into a flag-part and value-part. |
|
|
|
|
if flag[:1] not in '-/': |
|
|
|
|
# This is not a flag. It's probably a file input. Pass it through. |
|
|
|
|
xflags.append(flag) |
|
|
|
|
continue |
|
|
|
|
elif flag[:1] == '/': |
|
|
|
|
# This is ambiguously either an MVSC-style /switch or an absolute path |
|
|
|
|
# to a file. For some magical reason the following works acceptably in |
|
|
|
|
# both cases. |
|
|
|
|
wrap = '"' if ',' in flag else '' |
|
|
|
|
xflags.append(f'-X{phase.value}={wrap}{flag}{wrap}') |
|
|
|
|
continue |
|
|
|
|
elif len(flag) >= 2 and flag[0] == '-' and flag[1] in 'IDULlmOxmte': |
|
|
|
|
# This is a single-letter short option. These options (with the |
|
|
|
|
# exception of -o) are allowed to receive their argument with neither |
|
|
|
|
# space nor = sign before them. Detect and separate them in that event. |
|
|
|
|
if flag[2:3] == '': # -I something |
|
|
|
|
try: |
|
|
|
|
val = next(flagit) |
|
|
|
|
except StopIteration: |
|
|
|
|
pass |
|
|
|
|
elif flag[2:3] == '=': # -I=something |
|
|
|
|
val = flag[3:] |
|
|
|
|
else: # -Isomething |
|
|
|
|
val = flag[2:] |
|
|
|
|
flag = flag[:2] # -I |
|
|
|
|
elif flag in cls._FLAG_LONG2SHORT_WITHARGS or \ |
|
|
|
|
flag in cls._FLAG_SHORT2LONG_WITHARGS: |
|
|
|
|
# This is either -o or a multi-letter flag, and it is receiving its |
|
|
|
|
# value isolated. |
|
|
|
|
try: |
|
|
|
|
val = next(flagit) # -o something |
|
|
|
|
except StopIteration: |
|
|
|
|
pass |
|
|
|
|
elif flag.split('=',1)[0] in cls._FLAG_LONG2SHORT_WITHARGS or \ |
|
|
|
|
flag.split('=',1)[0] in cls._FLAG_SHORT2LONG_WITHARGS: |
|
|
|
|
# This is either -o or a multi-letter flag, and it is receiving its |
|
|
|
|
# value after an = sign. |
|
|
|
|
flag, val = flag.split('=',1) # -o=something |
|
|
|
|
else: |
|
|
|
|
# This is a flag, and it's foreign to NVCC. |
|
|
|
|
# |
|
|
|
|
# We do not know whether this GCC-speak flag takes an isolated |
|
|
|
|
# argument. Assuming it does not (the vast majority indeed don't), |
|
|
|
|
# wrap this argument in an -Xcompiler flag and send it down to NVCC. |
|
|
|
|
if flag == '-ffast-math': |
|
|
|
|
xflags.append('-use_fast_math') |
|
|
|
|
xflags.append('-Xcompiler='+flag) |
|
|
|
|
elif flag == '-fno-fast-math': |
|
|
|
|
xflags.append('-ftz=false') |
|
|
|
|
xflags.append('-prec-div=true') |
|
|
|
|
xflags.append('-prec-sqrt=true') |
|
|
|
|
xflags.append('-Xcompiler='+flag) |
|
|
|
|
elif flag == '-freciprocal-math': |
|
|
|
|
xflags.append('-prec-div=false') |
|
|
|
|
xflags.append('-Xcompiler='+flag) |
|
|
|
|
elif flag == '-fno-reciprocal-math': |
|
|
|
|
xflags.append('-prec-div=true') |
|
|
|
|
xflags.append('-Xcompiler='+flag) |
|
|
|
|
else: |
|
|
|
|
xflags.append('-Xcompiler='+cls._shield_nvcc_list_arg(flag)) |
|
|
|
|
# The above should securely handle GCC's -Wl, -Wa, -Wp, arguments. |
|
|
|
|
continue |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
assert val is not None # Should only trip if there is a missing argument. |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Take care of the various NVCC-supported flags that need special handling. |
|
|
|
|
flag = cls._FLAG_LONG2SHORT_WITHARGS.get(flag,flag) |
|
|
|
|
|
|
|
|
|
if flag in {'-include','-isystem','-I','-L','-l'}: |
|
|
|
|
# These flags are known to GCC, but list-valued in NVCC. They potentially |
|
|
|
|
# require double-quoting to prevent NVCC interpreting the flags as lists |
|
|
|
|
# when GCC would not have done so. |
|
|
|
|
# |
|
|
|
|
# We avoid doing this quoting for -D to avoid redefining macros and for |
|
|
|
|
# -U because it isn't possible to define a macro with a comma in the name. |
|
|
|
|
# -U with comma arguments is impossible in GCC-speak (and thus unambiguous |
|
|
|
|
#in NVCC-speak, albeit unportable). |
|
|
|
|
if len(flag) == 2: |
|
|
|
|
xflags.append(flag+cls._shield_nvcc_list_arg(val)) |
|
|
|
|
else: |
|
|
|
|
xflags.append(flag) |
|
|
|
|
xflags.append(cls._shield_nvcc_list_arg(val)) |
|
|
|
|
elif flag == '-O': |
|
|
|
|
# Handle optimization levels GCC knows about that NVCC does not. |
|
|
|
|
if val == 'fast': |
|
|
|
|
xflags.append('-O3') |
|
|
|
|
xflags.append('-use_fast_math') |
|
|
|
|
xflags.append('-Xcompiler') |
|
|
|
|
xflags.append(flag+val) |
|
|
|
|
elif val in {'s', 'g', 'z'}: |
|
|
|
|
xflags.append('-Xcompiler') |
|
|
|
|
xflags.append(flag+val) |
|
|
|
|
else: |
|
|
|
|
xflags.append(flag+val) |
|
|
|
|
elif flag in {'-D', '-U', '-m', '-t'}: |
|
|
|
|
xflags.append(flag+val) # For style, keep glued. |
|
|
|
|
elif flag in {'-std'}: |
|
|
|
|
xflags.append(flag+'='+val) # For style, keep glued. |
|
|
|
|
else: |
|
|
|
|
xflags.append(flag) |
|
|
|
|
xflags.append(val) |
|
|
|
|
|
|
|
|
|
return '-X{}={}'.format(phase.value, flag) |
|
|
|
|
return xflags |
|
|
|
|
|
|
|
|
|
def needs_static_linker(self) -> bool: |
|
|
|
|
return False |
|
|
|
|
|
|
|
|
|
def thread_link_flags(self, environment: 'Environment') -> T.List[str]: |
|
|
|
|
return self._to_host_flags(self.host_compiler.thread_link_flags(environment)) |
|
|
|
|
return self._to_host_flags(self.host_compiler.thread_link_flags(environment), _Phase.LINKER) |
|
|
|
|
|
|
|
|
|
def sanity_check(self, work_dir: str, environment: 'Environment') -> None: |
|
|
|
|
def sanity_check(self, work_dir: str, env: 'Environment') -> None: |
|
|
|
|
mlog.debug('Sanity testing ' + self.get_display_language() + ' compiler:', ' '.join(self.exelist)) |
|
|
|
|
mlog.debug('Is cross compiler: %s.' % str(self.is_cross)) |
|
|
|
|
|
|
|
|
@ -120,7 +483,18 @@ class CudaCompiler(Compiler): |
|
|
|
|
# environment set up properly. Of course, this only works for native |
|
|
|
|
# builds; For cross builds we must still use the exe_wrapper (if any). |
|
|
|
|
self.detected_cc = '' |
|
|
|
|
flags = ['-w', '-cudart', 'static', source_name] |
|
|
|
|
flags = [] |
|
|
|
|
|
|
|
|
|
# Disable warnings, compile with statically-linked runtime for minimum |
|
|
|
|
# reliance on the system. |
|
|
|
|
flags += ['-w', '-cudart', 'static', source_name] |
|
|
|
|
|
|
|
|
|
# Use the -ccbin option, if available, even during sanity checking. |
|
|
|
|
# Otherwise, on systems where CUDA does not support the default compiler, |
|
|
|
|
# NVCC becomes unusable. |
|
|
|
|
flags += self.get_ccbin_args(env.coredata.options) |
|
|
|
|
|
|
|
|
|
# If cross-compiling, we can't run the sanity check, only compile it. |
|
|
|
|
if self.is_cross and self.exe_wrapper is None: |
|
|
|
|
# Linking cross built apps is painful. You can't really |
|
|
|
|
# tell if you should use -nostdlib or not and for example |
|
|
|
@ -200,10 +574,14 @@ class CudaCompiler(Compiler): |
|
|
|
|
|
|
|
|
|
def get_options(self) -> 'KeyedOptionDictType': |
|
|
|
|
opts = super().get_options() |
|
|
|
|
key = OptionKey('std', machine=self.for_machine, lang=self.language) |
|
|
|
|
opts.update({key: coredata.UserComboOption('C++ language standard to use with cuda', |
|
|
|
|
['none', 'c++03', 'c++11', 'c++14'], |
|
|
|
|
'none')}) |
|
|
|
|
std_key = OptionKey('std', machine=self.for_machine, lang=self.language) |
|
|
|
|
ccbindir_key = OptionKey('ccbindir', machine=self.for_machine, lang=self.language) |
|
|
|
|
opts.update({ |
|
|
|
|
std_key: coredata.UserComboOption('C++ language standard to use with CUDA', |
|
|
|
|
['none', 'c++03', 'c++11', 'c++14', 'c++17'], 'none'), |
|
|
|
|
ccbindir_key: coredata.UserStringOption('CUDA non-default toolchain directory to use (-ccbin)', |
|
|
|
|
''), |
|
|
|
|
}) |
|
|
|
|
return opts |
|
|
|
|
|
|
|
|
|
def _to_host_compiler_options(self, options: 'KeyedOptionDictType') -> 'KeyedOptionDictType': |
|
|
|
@ -211,7 +589,7 @@ class CudaCompiler(Compiler): |
|
|
|
|
return OptionOverrideProxy(overrides, self.host_compiler.get_options()) |
|
|
|
|
|
|
|
|
|
def get_option_compile_args(self, options: 'KeyedOptionDictType') -> T.List[str]: |
|
|
|
|
args = [] |
|
|
|
|
args = self.get_ccbin_args(options) |
|
|
|
|
# On Windows, the version of the C++ standard used by nvcc is dictated by |
|
|
|
|
# the combination of CUDA version and MSVC version; the --std= is thus ignored |
|
|
|
|
# and attempting to use it will result in a warning: https://stackoverflow.com/a/51272091/741027 |
|
|
|
@ -223,27 +601,17 @@ class CudaCompiler(Compiler): |
|
|
|
|
|
|
|
|
|
return args + self._to_host_flags(self.host_compiler.get_option_compile_args(self._to_host_compiler_options(options))) |
|
|
|
|
|
|
|
|
|
@classmethod |
|
|
|
|
def _cook_link_args(cls, args: T.List[str]) -> T.List[str]: |
|
|
|
|
# Prepare link args for nvcc |
|
|
|
|
cooked = [] # type: T.List[str] |
|
|
|
|
for arg in args: |
|
|
|
|
if arg.startswith('-Wl,'): # strip GNU-style -Wl prefix |
|
|
|
|
arg = arg.replace('-Wl,', '', 1) |
|
|
|
|
arg = arg.replace(' ', '\\') # espace whitespace |
|
|
|
|
cooked.append(arg) |
|
|
|
|
return cls._to_host_flags(cooked, _Phase.LINKER) |
|
|
|
|
|
|
|
|
|
def get_option_link_args(self, options: 'KeyedOptionDictType') -> T.List[str]: |
|
|
|
|
return self._cook_link_args(self.host_compiler.get_option_link_args(self._to_host_compiler_options(options))) |
|
|
|
|
args = self.get_ccbin_args(options) |
|
|
|
|
return args + self._to_host_flags(self.host_compiler.get_option_link_args(self._to_host_compiler_options(options)), _Phase.LINKER) |
|
|
|
|
|
|
|
|
|
def get_soname_args(self, env: 'Environment', prefix: str, shlib_name: str, |
|
|
|
|
suffix: str, soversion: str, |
|
|
|
|
darwin_versions: T.Tuple[str, str], |
|
|
|
|
is_shared_module: bool) -> T.List[str]: |
|
|
|
|
return self._cook_link_args(self.host_compiler.get_soname_args( |
|
|
|
|
return self._to_host_flags(self.host_compiler.get_soname_args( |
|
|
|
|
env, prefix, shlib_name, suffix, soversion, darwin_versions, |
|
|
|
|
is_shared_module)) |
|
|
|
|
is_shared_module), _Phase.LINKER) |
|
|
|
|
|
|
|
|
|
def get_compile_only_args(self) -> T.List[str]: |
|
|
|
|
return ['-c'] |
|
|
|
@ -275,26 +643,26 @@ class CudaCompiler(Compiler): |
|
|
|
|
def get_include_args(self, path: str, is_system: bool) -> T.List[str]: |
|
|
|
|
if path == '': |
|
|
|
|
path = '.' |
|
|
|
|
return ['-I' + path] |
|
|
|
|
return ['-isystem=' + path] if is_system else ['-I' + path] |
|
|
|
|
|
|
|
|
|
def get_compile_debugfile_args(self, rel_obj: str, pch: bool = False) -> T.List[str]: |
|
|
|
|
return self._to_host_flags(self.host_compiler.get_compile_debugfile_args(rel_obj, pch)) |
|
|
|
|
|
|
|
|
|
def get_link_debugfile_args(self, targetfile: str) -> T.List[str]: |
|
|
|
|
return self._cook_link_args(self.host_compiler.get_link_debugfile_args(targetfile)) |
|
|
|
|
return self._to_host_flags(self.host_compiler.get_link_debugfile_args(targetfile), _Phase.LINKER) |
|
|
|
|
|
|
|
|
|
def get_depfile_suffix(self) -> str: |
|
|
|
|
return 'd' |
|
|
|
|
|
|
|
|
|
def get_buildtype_linker_args(self, buildtype: str) -> T.List[str]: |
|
|
|
|
return self._cook_link_args(self.host_compiler.get_buildtype_linker_args(buildtype)) |
|
|
|
|
return self._to_host_flags(self.host_compiler.get_buildtype_linker_args(buildtype), _Phase.LINKER) |
|
|
|
|
|
|
|
|
|
def build_rpath_args(self, env: 'Environment', build_dir: str, from_dir: str, |
|
|
|
|
rpath_paths: str, build_rpath: str, |
|
|
|
|
install_rpath: str) -> T.Tuple[T.List[str], T.Set[bytes]]: |
|
|
|
|
(rpath_args, rpath_dirs_to_remove) = self.host_compiler.build_rpath_args( |
|
|
|
|
env, build_dir, from_dir, rpath_paths, build_rpath, install_rpath) |
|
|
|
|
return (self._cook_link_args(rpath_args), rpath_dirs_to_remove) |
|
|
|
|
return (self._to_host_flags(rpath_args, _Phase.LINKER), rpath_dirs_to_remove) |
|
|
|
|
|
|
|
|
|
def linker_to_compiler_args(self, args: T.List[str]) -> T.List[str]: |
|
|
|
|
return args |
|
|
|
@ -310,7 +678,7 @@ class CudaCompiler(Compiler): |
|
|
|
|
return ['-o', target] |
|
|
|
|
|
|
|
|
|
def get_std_exe_link_args(self) -> T.List[str]: |
|
|
|
|
return self._cook_link_args(self.host_compiler.get_std_exe_link_args()) |
|
|
|
|
return self._to_host_flags(self.host_compiler.get_std_exe_link_args(), _Phase.LINKER) |
|
|
|
|
|
|
|
|
|
def find_library(self, libname: str, env: 'Environment', extra_dirs: T.List[str], |
|
|
|
|
libtype: LibType = LibType.PREFER_SHARED) -> T.Optional[T.List[str]]: |
|
|
|
@ -326,13 +694,21 @@ class CudaCompiler(Compiler): |
|
|
|
|
host_crt_compile_args = self.host_compiler.get_crt_compile_args(crt_val, buildtype) |
|
|
|
|
if any(arg in ['/MDd', '/MD', '/MTd'] for arg in host_crt_compile_args): |
|
|
|
|
host_link_arg_overrides += ['/NODEFAULTLIB:LIBCMT.lib'] |
|
|
|
|
return self._cook_link_args(host_link_arg_overrides + self.host_compiler.get_crt_link_args(crt_val, buildtype)) |
|
|
|
|
return self._to_host_flags(host_link_arg_overrides + self.host_compiler.get_crt_link_args(crt_val, buildtype), _Phase.LINKER) |
|
|
|
|
|
|
|
|
|
def get_target_link_args(self, target: 'BuildTarget') -> T.List[str]: |
|
|
|
|
return self._cook_link_args(super().get_target_link_args(target)) |
|
|
|
|
return self._to_host_flags(super().get_target_link_args(target), _Phase.LINKER) |
|
|
|
|
|
|
|
|
|
def get_dependency_compile_args(self, dep: 'Dependency') -> T.List[str]: |
|
|
|
|
return self._to_host_flags(super().get_dependency_compile_args(dep)) |
|
|
|
|
|
|
|
|
|
def get_dependency_link_args(self, dep: 'Dependency') -> T.List[str]: |
|
|
|
|
return self._cook_link_args(super().get_dependency_link_args(dep)) |
|
|
|
|
return self._to_host_flags(super().get_dependency_link_args(dep), _Phase.LINKER) |
|
|
|
|
|
|
|
|
|
def get_ccbin_args(self, options: 'KeyedOptionDictType') -> T.List[str]: |
|
|
|
|
key = OptionKey('ccbindir', machine=self.for_machine, lang=self.language) |
|
|
|
|
ccbindir = options[key].value |
|
|
|
|
if isinstance(ccbindir, str) and ccbindir != '': |
|
|
|
|
return [self._shield_nvcc_list_arg('-ccbin='+ccbindir, False)] |
|
|
|
|
else: |
|
|
|
|
return [] |
|
|
|
|