CUDA support on Windows

pull/5957/head
Aleksey Gurtovoy 5 years ago committed by Dylan Baker
parent 1670fca36f
commit 6ac5db50c9
  1. 26
      mesonbuild/backend/backends.py
  2. 8
      mesonbuild/backend/ninjabackend.py
  3. 2
      mesonbuild/build.py
  4. 13
      mesonbuild/compilers/compilers.py
  5. 119
      mesonbuild/compilers/cuda.py
  6. 6
      mesonbuild/compilers/mixins/clike.py
  7. 4
      mesonbuild/environment.py
  8. 20
      mesonbuild/linkers.py
  9. 30
      mesonbuild/mesonlib.py
  10. 20
      test cases/cuda/6 std/main.cu
  11. 4
      test cases/cuda/6 std/meson.build
  12. 20
      test cases/cuda/7 static vs runtime/main.cu
  13. 4
      test cases/cuda/7 static vs runtime/meson.build
  14. 20
      test cases/cuda/8 release/main.cu
  15. 4
      test cases/cuda/8 release/meson.build
  16. 20
      test cases/cuda/9 optimize for space/main.cu
  17. 4
      test cases/cuda/9 optimize for space/meson.build

@ -20,7 +20,7 @@ from .. import mesonlib
from .. import mlog
import json
import subprocess
from ..mesonlib import MachineChoice, MesonException, OrderedSet
from ..mesonlib import MachineChoice, MesonException, OrderedSet, OptionOverrideProxy
from ..mesonlib import classify_unity_sources
from ..mesonlib import File
from ..compilers import CompilerArgs, VisualStudioLikeCompiler
@ -105,28 +105,6 @@ class TestSerialisation:
self.protocol = protocol
self.priority = priority
class OptionProxy:
def __init__(self, value):
self.value = value
class OptionOverrideProxy:
'''Mimic an option list but transparently override
selected option values.'''
def __init__(self, overrides, *options):
self.overrides = overrides
self.options = options
def __getitem__(self, option_name):
for opts in self.options:
if option_name in opts:
return self._get_override(option_name, opts[option_name])
raise KeyError('Option not found', option_name)
def _get_override(self, option_name, base_opt):
if option_name in self.overrides:
return OptionProxy(base_opt.validate_value(self.overrides[option_name]))
return base_opt
def get_backend_from_name(backend, build):
if backend == 'ninja':
from . import ninjabackend
@ -650,7 +628,7 @@ class Backend:
elif isinstance(dep, dependencies.ExternalLibrary):
commands += dep.get_link_args('vala')
else:
commands += dep.get_compile_args()
commands += compiler.get_dependency_compile_args(dep)
# Qt needs -fPIC for executables
# XXX: We should move to -fPIC for all executables
if isinstance(target, build.Executable):

@ -1882,8 +1882,6 @@ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47485'''))
return compiler.get_no_stdinc_args()
def get_compile_debugfile_args(self, compiler, target, objfile):
if not isinstance(compiler, VisualStudioLikeCompiler):
return []
# The way MSVC uses PDB files is documented exactly nowhere so
# the following is what we have been able to decipher via
# reverse engineering.
@ -2511,16 +2509,16 @@ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47485'''))
# For 'automagic' deps: Boost and GTest. Also dependency('threads').
# pkg-config puts the thread flags itself via `Cflags:`
commands += target.link_args
commands += linker.get_target_link_args(target)
# External deps must be last because target link libraries may depend on them.
for dep in target.get_external_deps():
# Extend without reordering or de-dup to preserve `-L -l` sets
# https://github.com/mesonbuild/meson/issues/1718
commands.extend_preserving_lflags(dep.get_link_args())
commands.extend_preserving_lflags(linker.get_dependency_link_args(dep))
for d in target.get_dependencies():
if isinstance(d, build.StaticLibrary):
for dep in d.get_external_deps():
commands.extend_preserving_lflags(dep.get_link_args())
commands.extend_preserving_lflags(linker.get_dependency_link_args(dep))
# Add link args specific to this BuildTarget type that must not be overridden by dependencies
commands += self.get_target_type_link_args_post_dependencies(target, linker)

@ -1247,7 +1247,7 @@ You probably should put it in link_with instead.''')
'''
linker, _ = self.get_clink_dynamic_linker_and_stdlibs()
# Mixing many languages with MSVC is not supported yet so ignore stdlibs.
if linker and linker.get_id() in {'msvc', 'clang-cl', 'intel-cl', 'llvm', 'dmd'}:
if linker and linker.get_id() in {'msvc', 'clang-cl', 'intel-cl', 'llvm', 'dmd', 'nvcc'}:
return True
return False

@ -222,8 +222,8 @@ cuda_optimization_args = {'0': [],
'g': ['-O0'],
'1': ['-O1'],
'2': ['-O2'],
'3': ['-O3', '-Otime'],
's': ['-O3', '-Ospace']
'3': ['-O3'],
's': ['-O3']
}
cuda_debug_args = {False: [],
@ -1144,6 +1144,15 @@ class Compiler:
env, prefix, shlib_name, suffix, soversion,
darwin_versions, is_shared_module)
def get_target_link_args(self, target):
return target.link_args
def get_dependency_compile_args(self, dep):
return dep.get_compile_args()
def get_dependency_link_args(self, dep):
return dep.get_link_args()
@enum.unique
class CompilerType(enum.Enum):

@ -14,9 +14,11 @@
import os.path
import typing
from functools import partial
from .. import coredata
from .. import mlog
from ..mesonlib import EnvironmentException, MachineChoice, Popen_safe
from ..mesonlib import EnvironmentException, MachineChoice, Popen_safe, OptionOverrideProxy, is_windows
from .compilers import (Compiler, cuda_buildtype_args, cuda_optimization_args,
cuda_debug_args)
@ -28,19 +30,29 @@ class CudaCompiler(Compiler):
LINKER_PREFIX = '-Xlinker='
def __init__(self, exelist, version, for_machine: MachineChoice, is_cross, exe_wrapper=None, **kwargs):
_universal_flags = {'compiler': ['-I', '-D', '-U', '-E'], 'linker': ['-l', '-L']}
def __init__(self, exelist, version, for_machine: MachineChoice, is_cross, exe_wrapper, host_compiler, **kwargs):
if not hasattr(self, 'language'):
self.language = 'cuda'
super().__init__(exelist, version, for_machine, **kwargs)
self.is_cross = is_cross
self.exe_wrapper = exe_wrapper
self.host_compiler = host_compiler
self.base_options = host_compiler.base_options
self.id = 'nvcc'
default_warn_args = []
self.warn_args = {'0': [],
'1': default_warn_args,
'2': default_warn_args + ['-Xcompiler=-Wextra'],
'3': default_warn_args + ['-Xcompiler=-Wextra',
'-Xcompiler=-Wpedantic']}
self.warn_args = {level: self._to_host_flags(flags) for level, flags in host_compiler.warn_args.items()}
@classmethod
def _to_host_flags(cls, flags, phase='compiler'):
return list(map(partial(cls._to_host_flag, phase=phase), flags))
@classmethod
def _to_host_flag(cls, flag, phase):
if not flag[0] in ['-', '/'] or flag[:2] in cls._universal_flags[phase]:
return flag
return '-X{}={}'.format(phase, flag)
def needs_static_linker(self):
return False
@ -55,7 +67,7 @@ class CudaCompiler(Compiler):
return []
def thread_link_flags(self, environment):
return self._cook_link_args(super().thread_link_flags())
return self._to_host_flags(self.host_compiler.thread_link_flags(environment))
def sanity_check(self, work_dir, environment):
mlog.debug('Sanity testing ' + self.get_display_language() + ' compiler:', ' '.join(self.exelist))
@ -163,16 +175,48 @@ class CudaCompiler(Compiler):
int main () {{ return 0; }}'''
return self.compiles(t.format(**fargs), env, extra_args, dependencies)
@staticmethod
def _cook_link_args(args: typing.List[str]) -> typing.List[str]:
"""Fixup arguments."""
return [a.replace(' ', '\\') for a in args]
def get_options(self):
opts = super().get_options()
opts.update({'cuda_std': coredata.UserComboOption('C++ language standard to use',
['none', 'c++03', 'c++11', 'c++14'],
'none')})
return opts
def _to_host_compiler_options(self, options):
overrides = {name: opt.value for name, opt in options.copy().items()}
return OptionOverrideProxy(overrides, self.host_compiler.get_options())
def get_option_compile_args(self, options):
args = []
# On Windows, the version of the C++ standard used by nvcc is dictated by
# the combination of CUDA version and MSVC verion; the --std= is thus ignored
# and attempting to use it will result in a warning: https://stackoverflow.com/a/51272091/741027
if not is_windows():
std = options['cuda_std']
if std.value != 'none':
args.append('--std=' + std.value)
return args + self._to_host_flags(self.host_compiler.get_option_compile_args(self._to_host_compiler_options(options)))
@classmethod
def _cook_link_args(cls, args: typing.List[str]) -> typing.List[str]:
# Prepare link args for nvcc
cooked = [] # type: typing.List[str]
for arg in args:
if arg.startswith('-Wl,'): # strip GNU-style -Wl prefix
arg = arg.replace('-Wl,', '', 1)
arg = arg.replace(' ', '\\') # espace whitespace
cooked.append(arg)
return cls._to_host_flags(cooked, 'linker')
def get_option_link_args(self, options):
return self._cook_link_args(self.host_compiler.get_option_link_args(self._to_host_compiler_options(options)))
def name_string(self):
return ' '.join(self.exelist)
def get_soname_args(self, *args):
return self._cook_link_args(super().get_soname_args(*args))
return self._cook_link_args(self.host_compiler.get_soname_args(*args))
def get_dependency_gen_args(self, outtarget, outfile):
return []
@ -184,6 +228,9 @@ class CudaCompiler(Compiler):
return ['-O0']
def get_optimization_args(self, optimization_level):
# alternatively, consider simply redirecting this to the host compiler, which would
# give us more control over options like "optimize for space" (which nvcc doesn't support):
# return self._to_host_flags(self.host_compiler.get_optimization_args(optimization_level))
return cuda_optimization_args[optimization_level]
def get_debug_args(self, is_debug):
@ -196,30 +243,45 @@ class CudaCompiler(Compiler):
return self.warn_args[level]
def get_buildtype_args(self, buildtype):
return cuda_buildtype_args[buildtype]
# nvcc doesn't support msvc's "Edit and Continue" PDB format; "downgrade" to
# a regular PDB to avoid cl's warning to that effect (D9025 : overriding '/ZI' with '/Zi')
host_args = ['/Zi' if arg == '/ZI' else arg for arg in self.host_compiler.get_buildtype_args(buildtype)]
return cuda_buildtype_args[buildtype] + self._to_host_flags(host_args)
def get_include_args(self, path, is_system):
if path == '':
path = '.'
return ['-I' + path]
def get_compile_debugfile_args(self, rel_obj, **kwargs):
return self._to_host_flags(self.host_compiler.get_compile_debugfile_args(rel_obj, **kwargs))
def get_link_debugfile_args(self, targetfile):
return self._cook_link_args(self.host_compiler.get_link_debugfile_args(targetfile))
def depfile_for_object(self, objfile):
return objfile + '.' + self.get_depfile_suffix()
def get_depfile_suffix(self):
return 'd'
def get_linker_debug_crt_args(self) -> typing.List[str]:
return self._cook_link_args(self.host_compiler.get_linker_debug_crt_args())
def get_buildtype_linker_args(self, buildtype):
return self._cook_link_args(self.host_compiler.get_buildtype_linker_args(buildtype))
def build_rpath_args(self, env: 'Environment', build_dir: str, from_dir: str,
rpath_paths: str, build_rpath: str,
install_rpath: str) -> typing.List[str]:
return self._cook_link_args(super().build_rpath_args(
return self._cook_link_args(self.host_compiler.build_rpath_args(
env, build_dir, from_dir, rpath_paths, build_rpath, install_rpath))
def linker_to_compiler_args(self, args):
return args
def get_pic_args(self):
return ['-Xcompiler=-fPIC']
return self._to_host_flags(self.host_compiler.get_pic_args())
def compute_parameters_with_absolute_paths(self, parameter_list, build_dir):
return []
@ -228,4 +290,25 @@ class CudaCompiler(Compiler):
return ['-o', target]
def get_std_exe_link_args(self) -> typing.List[str]:
return []
return self._cook_link_args(self.host_compiler.get_std_exe_link_args())
def get_crt_compile_args(self, crt_val, buildtype):
return self._to_host_flags(self.host_compiler.get_crt_compile_args(crt_val, buildtype))
def get_crt_link_args(self, crt_val, buildtype):
# nvcc defaults to static, release version of msvc runtime and provides no
# native option to override it; override it with /NODEFAULTLIB
host_link_arg_overrides = []
host_crt_compile_args = self.host_compiler.get_crt_compile_args(crt_val, buildtype)
if any(arg in ['/MDd', '/MD', '/MTd'] for arg in host_crt_compile_args):
host_link_arg_overrides += ['/NODEFAULTLIB:LIBCMT.lib']
return self._cook_link_args(host_link_arg_overrides + self.host_compiler.get_crt_link_args(crt_val, buildtype))
def get_target_link_args(self, target):
return self._cook_link_args(super().get_target_link_args(target))
def get_dependency_compile_args(self, dep):
return self._to_host_flags(super().get_dependency_compile_args(dep))
def get_dependency_link_args(self, dep):
return self._cook_link_args(super().get_dependency_link_args(dep))

@ -1056,6 +1056,12 @@ class CLikeCompiler:
raise mesonlib.MesonException('Cannot find frameworks with non-clang compiler')
return self.find_framework_impl(name, env, extra_dirs, allow_system)
def get_crt_compile_args(self, crt_val: str, buildtype: str) -> typing.List[str]:
return []
def get_crt_link_args(self, crt_val: str, buildtype: str) -> typing.List[str]:
return []
def thread_flags(self, env):
host_m = env.machines[self.for_machine]
if host_m.is_haiku() or host_m.is_darwin():

@ -1000,6 +1000,7 @@ class Environment:
def detect_cuda_compiler(self, for_machine):
popen_exceptions = {}
is_cross = not self.machines.matches_build_machine(for_machine)
compilers, ccache, exe_wrap = self._get_compilers('cuda', for_machine)
for compiler in compilers:
if isinstance(compiler, str):
@ -1028,8 +1029,9 @@ class Environment:
# Luckily, the "V" also makes it very simple to extract
# the full version:
version = out.strip().split('V')[-1]
cpp_compiler = self.detect_cpp_compiler(for_machine)
linker = CudaLinker(compiler, for_machine, 'nvlink', CudaCompiler.LINKER_PREFIX, version=CudaLinker.parse_version())
return CudaCompiler(ccache + compiler, version, for_machine, exe_wrap, linker=linker)
return CudaCompiler(ccache + compiler, version, for_machine, is_cross, exe_wrap, host_compiler=cpp_compiler, linker=linker)
raise EnvironmentException('Could not find suitable CUDA compiler: "' + ' '.join(compilers) + '"')
def detect_fortran_compiler(self, for_machine: MachineChoice):

@ -938,6 +938,23 @@ class CudaLinker(DynamicLinker):
# we need the most verbose version output. Luckily starting with V
return out.strip().split('V')[-1]
def get_accepts_rsp(self) -> bool:
# nvcc does not support response files
return False
def get_lib_prefix(self) -> str:
if not mesonlib.is_windows():
return ''
# nvcc doesn't recognize Meson's default .a extension for static libraries on
# Windows and passes it to cl as an object file, resulting in 'warning D9024 :
# unrecognized source file type 'xxx.a', object file assumed'.
#
# nvcc's --library= option doesn't help: it takes the library name without the
# extension and assumes that the extension on Windows is .lib; prefixing the
# library with -Xlinker= seems to work.
from .compilers import CudaCompiler
return CudaCompiler.LINKER_PREFIX
def get_output_args(self, outname: str) -> typing.List[str]:
return ['-o', outname]
@ -954,3 +971,6 @@ class CudaLinker(DynamicLinker):
suffix: str, soversion: str, darwin_versions: typing.Tuple[str, str],
is_shared_module: bool) -> typing.List[str]:
return []
def get_std_shared_lib_args(self) -> typing.List[str]:
return ['-shared']

@ -1495,3 +1495,33 @@ def get_wine_shortpath(winecmd, wine_paths):
len(wine_path)))
return wine_path.strip(';')
class OptionProxy:
def __init__(self, value):
self.value = value
class OptionOverrideProxy:
'''Mimic an option list but transparently override
selected option values.'''
def __init__(self, overrides, *options):
self.overrides = overrides
self.options = options
def __getitem__(self, option_name):
for opts in self.options:
if option_name in opts:
return self._get_override(option_name, opts[option_name])
raise KeyError('Option not found', option_name)
def _get_override(self, option_name, base_opt):
if option_name in self.overrides:
return OptionProxy(base_opt.validate_value(self.overrides[option_name]))
return base_opt
def copy(self):
result = {}
for opts in self.options:
for option_name in opts:
result[option_name] = self._get_override(option_name, opts[option_name])
return result

@ -0,0 +1,20 @@
#include <cuda_runtime.h>
#include <iostream>
auto cuda_devices() {
int result = 0;
cudaGetDeviceCount(&result);
return result;
}
int main() {
int n = cuda_devices();
if (n == 0) {
std::cout << "No Cuda hardware found. Exiting.\n";
return 0;
}
std::cout << "Found " << n << "Cuda devices.\n";
return 0;
}

@ -0,0 +1,4 @@
project('C++ std', 'cuda', version : '1.0.0', default_options : ['cuda_std=c++14'])
exe = executable('prog', 'main.cu')
test('cudatest', exe)

@ -0,0 +1,20 @@
#include <cuda_runtime.h>
#include <iostream>
int cuda_devices() {
int result = 0;
cudaGetDeviceCount(&result);
return result;
}
int main() {
int n = cuda_devices();
if (n == 0) {
std::cout << "No Cuda hardware found. Exiting.\n";
return 0;
}
std::cout << "Found " << n << "Cuda devices.\n";
return 0;
}

@ -0,0 +1,4 @@
project('static msvc runtime', 'cuda', version : '1.0.0', default_options : ['b_vscrt=mtd'])
exe = executable('prog', 'main.cu')
test('cudatest', exe)

@ -0,0 +1,20 @@
#include <cuda_runtime.h>
#include <iostream>
int cuda_devices() {
int result = 0;
cudaGetDeviceCount(&result);
return result;
}
int main() {
int n = cuda_devices();
if (n == 0) {
std::cout << "No Cuda hardware found. Exiting.\n";
return 0;
}
std::cout << "Found " << n << "Cuda devices.\n";
return 0;
}

@ -0,0 +1,4 @@
project('release', 'cuda', version : '1.0.0', default_options : ['buildtype=release'])
exe = executable('prog', 'main.cu')
test('cudatest', exe)

@ -0,0 +1,20 @@
#include <cuda_runtime.h>
#include <iostream>
int cuda_devices() {
int result = 0;
cudaGetDeviceCount(&result);
return result;
}
int main() {
int n = cuda_devices();
if (n == 0) {
std::cout << "No Cuda hardware found. Exiting.\n";
return 0;
}
std::cout << "Found " << n << "Cuda devices.\n";
return 0;
}

@ -0,0 +1,4 @@
project('optimize for space', 'cuda', version : '1.0.0', default_options : ['optimization=s'])
exe = executable('prog', 'main.cu')
test('cudatest', exe)
Loading…
Cancel
Save