CUDA support on Windows

5 years ago · 6ac5db50c9
parent 1670fca36f
commit 6ac5db50c9
17 changed files with 273 additions and 51 deletions
--- a/mesonbuild/backend/backends.py
+++ b/mesonbuild/backend/backends.py
@ -20,7 +20,7 @@ from .. import mesonlib
 from .. import mlog
 import json
 import subprocess
-from ..mesonlib import MachineChoice, MesonException, OrderedSet
+from ..mesonlib import MachineChoice, MesonException, OrderedSet, OptionOverrideProxy
 from ..mesonlib import classify_unity_sources
 from ..mesonlib import File
 from ..compilers import CompilerArgs, VisualStudioLikeCompiler
@ -105,28 +105,6 @@ class TestSerialisation:
        self.protocol = protocol
        self.priority = priority

-class OptionProxy:
-    def __init__(self, value):
-        self.value = value
-
-class OptionOverrideProxy:
-    '''Mimic an option list but transparently override
-    selected option values.'''
-    def __init__(self, overrides, *options):
-        self.overrides = overrides
-        self.options = options
-
-    def __getitem__(self, option_name):
-        for opts in self.options:
-            if option_name in opts:
-                return self._get_override(option_name, opts[option_name])
-        raise KeyError('Option not found', option_name)
-
-    def _get_override(self, option_name, base_opt):
-        if option_name in self.overrides:
-            return OptionProxy(base_opt.validate_value(self.overrides[option_name]))
-        return base_opt
-
 def get_backend_from_name(backend, build):
    if backend == 'ninja':
        from . import ninjabackend
@ -650,7 +628,7 @@ class Backend:
                elif isinstance(dep, dependencies.ExternalLibrary):
                    commands += dep.get_link_args('vala')
            else:
-                commands += dep.get_compile_args()
+                commands += compiler.get_dependency_compile_args(dep)
            # Qt needs -fPIC for executables
            # XXX: We should move to -fPIC for all executables
            if isinstance(target, build.Executable):
--- a/mesonbuild/backend/ninjabackend.py
+++ b/mesonbuild/backend/ninjabackend.py
@ -1882,8 +1882,6 @@ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47485'''))
        return compiler.get_no_stdinc_args()

    def get_compile_debugfile_args(self, compiler, target, objfile):
-        if not isinstance(compiler, VisualStudioLikeCompiler):
-            return []
        # The way MSVC uses PDB files is documented exactly nowhere so
        # the following is what we have been able to decipher via
        # reverse engineering.
@ -2511,16 +2509,16 @@ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47485'''))
            # For 'automagic' deps: Boost and GTest. Also dependency('threads').
            # pkg-config puts the thread flags itself via `Cflags:`

-            commands += target.link_args
+            commands += linker.get_target_link_args(target)
            # External deps must be last because target link libraries may depend on them.
            for dep in target.get_external_deps():
                # Extend without reordering or de-dup to preserve `-L -l` sets
                # https://github.com/mesonbuild/meson/issues/1718
-                commands.extend_preserving_lflags(dep.get_link_args())
+                commands.extend_preserving_lflags(linker.get_dependency_link_args(dep))
            for d in target.get_dependencies():
                if isinstance(d, build.StaticLibrary):
                    for dep in d.get_external_deps():
-                        commands.extend_preserving_lflags(dep.get_link_args())
+                        commands.extend_preserving_lflags(linker.get_dependency_link_args(dep))

        # Add link args specific to this BuildTarget type that must not be overridden by dependencies
        commands += self.get_target_type_link_args_post_dependencies(target, linker)
--- a/mesonbuild/build.py
+++ b/mesonbuild/build.py
@ -1247,7 +1247,7 @@ You probably should put it in link_with instead.''')
        '''
        linker, _ = self.get_clink_dynamic_linker_and_stdlibs()
        # Mixing many languages with MSVC is not supported yet so ignore stdlibs.
-        if linker and linker.get_id() in {'msvc', 'clang-cl', 'intel-cl', 'llvm', 'dmd'}:
+        if linker and linker.get_id() in {'msvc', 'clang-cl', 'intel-cl', 'llvm', 'dmd', 'nvcc'}:
            return True
        return False

--- a/mesonbuild/compilers/compilers.py
+++ b/mesonbuild/compilers/compilers.py
@ -222,8 +222,8 @@ cuda_optimization_args = {'0': [],
                          'g': ['-O0'],
                          '1': ['-O1'],
                          '2': ['-O2'],
-                          '3': ['-O3', '-Otime'],
-                          's': ['-O3', '-Ospace']
+                          '3': ['-O3'],
+                          's': ['-O3']
                          }

 cuda_debug_args = {False: [],
@ -1144,6 +1144,15 @@ class Compiler:
            env, prefix, shlib_name, suffix, soversion,
            darwin_versions, is_shared_module)

+    def get_target_link_args(self, target):
+        return target.link_args
+
+    def get_dependency_compile_args(self, dep):
+        return dep.get_compile_args()
+
+    def get_dependency_link_args(self, dep):
+        return dep.get_link_args()
+

@enum.unique
 class CompilerType(enum.Enum):
--- a/mesonbuild/compilers/cuda.py
+++ b/mesonbuild/compilers/cuda.py
@ -14,9 +14,11 @@

 import os.path
 import typing
+from functools import partial

+from .. import coredata
 from .. import mlog
-from ..mesonlib import EnvironmentException, MachineChoice, Popen_safe
+from ..mesonlib import EnvironmentException, MachineChoice, Popen_safe, OptionOverrideProxy, is_windows
 from .compilers import (Compiler, cuda_buildtype_args, cuda_optimization_args,
                        cuda_debug_args)

@ -28,19 +30,29 @@ class CudaCompiler(Compiler):

    LINKER_PREFIX = '-Xlinker='

-    def __init__(self, exelist, version, for_machine: MachineChoice, is_cross, exe_wrapper=None, **kwargs):
+    _universal_flags = {'compiler': ['-I', '-D', '-U', '-E'], 'linker': ['-l', '-L']}
+
+    def __init__(self, exelist, version, for_machine: MachineChoice, is_cross, exe_wrapper, host_compiler, **kwargs):
        if not hasattr(self, 'language'):
            self.language = 'cuda'
        super().__init__(exelist, version, for_machine, **kwargs)
        self.is_cross = is_cross
        self.exe_wrapper = exe_wrapper
+        self.host_compiler = host_compiler
+        self.base_options = host_compiler.base_options
        self.id = 'nvcc'
-        default_warn_args = []
-        self.warn_args = {'0': [],
-                          '1': default_warn_args,
-                          '2': default_warn_args + ['-Xcompiler=-Wextra'],
-                          '3': default_warn_args + ['-Xcompiler=-Wextra',
-                                                    '-Xcompiler=-Wpedantic']}
+        self.warn_args = {level: self._to_host_flags(flags) for level, flags in host_compiler.warn_args.items()}
+
+    @classmethod
+    def _to_host_flags(cls, flags, phase='compiler'):
+        return list(map(partial(cls._to_host_flag, phase=phase), flags))
+
+    @classmethod
+    def _to_host_flag(cls, flag, phase):
+        if not flag[0] in ['-', '/'] or flag[:2] in cls._universal_flags[phase]:
+            return flag
+
+        return '-X{}={}'.format(phase, flag)

    def needs_static_linker(self):
        return False
@ -55,7 +67,7 @@ class CudaCompiler(Compiler):
        return []

    def thread_link_flags(self, environment):
-        return self._cook_link_args(super().thread_link_flags())
+        return self._to_host_flags(self.host_compiler.thread_link_flags(environment))

    def sanity_check(self, work_dir, environment):
        mlog.debug('Sanity testing ' + self.get_display_language() + ' compiler:', ' '.join(self.exelist))
@ -163,16 +175,48 @@ class CudaCompiler(Compiler):
        int main () {{ return 0; }}'''
        return self.compiles(t.format(**fargs), env, extra_args, dependencies)

-    @staticmethod
-    def _cook_link_args(args: typing.List[str]) -> typing.List[str]:
-        """Fixup arguments."""
-        return [a.replace(' ', '\\') for a in args]
+    def get_options(self):
+        opts = super().get_options()
+        opts.update({'cuda_std': coredata.UserComboOption('C++ language standard to use',
+                                                          ['none', 'c++03', 'c++11', 'c++14'],
+                                                          'none')})
+        return opts
+
+    def _to_host_compiler_options(self, options):
+        overrides = {name: opt.value for name, opt in options.copy().items()}
+        return OptionOverrideProxy(overrides, self.host_compiler.get_options())
+
+    def get_option_compile_args(self, options):
+        args = []
+        # On Windows, the version of the C++ standard used by nvcc is dictated by
+        # the combination of CUDA version and MSVC verion; the --std= is thus ignored
+        # and attempting to use it will result in a warning: https://stackoverflow.com/a/51272091/741027
+        if not is_windows():
+            std = options['cuda_std']
+            if std.value != 'none':
+                args.append('--std=' + std.value)
+
+        return args + self._to_host_flags(self.host_compiler.get_option_compile_args(self._to_host_compiler_options(options)))
+
+    @classmethod
+    def _cook_link_args(cls, args: typing.List[str]) -> typing.List[str]:
+        # Prepare link args for nvcc
+        cooked = []  # type: typing.List[str]
+        for arg in args:
+            if arg.startswith('-Wl,'): # strip GNU-style -Wl prefix
+                arg = arg.replace('-Wl,', '', 1)
+            arg = arg.replace(' ', '\\') # espace whitespace
+            cooked.append(arg)
+        return cls._to_host_flags(cooked, 'linker')
+
+    def get_option_link_args(self, options):
+        return self._cook_link_args(self.host_compiler.get_option_link_args(self._to_host_compiler_options(options)))

    def name_string(self):
        return ' '.join(self.exelist)

    def get_soname_args(self, *args):
-        return self._cook_link_args(super().get_soname_args(*args))
+        return self._cook_link_args(self.host_compiler.get_soname_args(*args))

    def get_dependency_gen_args(self, outtarget, outfile):
        return []
@ -184,6 +228,9 @@ class CudaCompiler(Compiler):
        return ['-O0']

    def get_optimization_args(self, optimization_level):
+        # alternatively, consider simply redirecting this to the host compiler, which would
+        # give us more control over options like "optimize for space" (which nvcc doesn't support):
+        # return self._to_host_flags(self.host_compiler.get_optimization_args(optimization_level))
        return cuda_optimization_args[optimization_level]

    def get_debug_args(self, is_debug):
@ -196,30 +243,45 @@ class CudaCompiler(Compiler):
        return self.warn_args[level]

    def get_buildtype_args(self, buildtype):
-        return cuda_buildtype_args[buildtype]
+        # nvcc doesn't support msvc's "Edit and Continue" PDB format; "downgrade" to
+        # a regular PDB to avoid cl's warning to that effect (D9025 : overriding '/ZI' with '/Zi')
+        host_args = ['/Zi' if arg == '/ZI' else arg for arg in self.host_compiler.get_buildtype_args(buildtype)]
+        return cuda_buildtype_args[buildtype] + self._to_host_flags(host_args)

    def get_include_args(self, path, is_system):
        if path == '':
            path = '.'
        return ['-I' + path]

+    def get_compile_debugfile_args(self, rel_obj, **kwargs):
+        return self._to_host_flags(self.host_compiler.get_compile_debugfile_args(rel_obj, **kwargs))
+
+    def get_link_debugfile_args(self, targetfile):
+        return self._cook_link_args(self.host_compiler.get_link_debugfile_args(targetfile))
+
    def depfile_for_object(self, objfile):
        return objfile + '.' + self.get_depfile_suffix()

    def get_depfile_suffix(self):
        return 'd'

+    def get_linker_debug_crt_args(self) -> typing.List[str]:
+        return self._cook_link_args(self.host_compiler.get_linker_debug_crt_args())
+
+    def get_buildtype_linker_args(self, buildtype):
+        return self._cook_link_args(self.host_compiler.get_buildtype_linker_args(buildtype))
+
    def build_rpath_args(self, env: 'Environment', build_dir: str, from_dir: str,
                         rpath_paths: str, build_rpath: str,
                         install_rpath: str) -> typing.List[str]:
-        return self._cook_link_args(super().build_rpath_args(
+        return self._cook_link_args(self.host_compiler.build_rpath_args(
            env, build_dir, from_dir, rpath_paths, build_rpath, install_rpath))

    def linker_to_compiler_args(self, args):
        return args

    def get_pic_args(self):
-        return ['-Xcompiler=-fPIC']
+        return self._to_host_flags(self.host_compiler.get_pic_args())

    def compute_parameters_with_absolute_paths(self, parameter_list, build_dir):
        return []
@ -228,4 +290,25 @@ class CudaCompiler(Compiler):
        return ['-o', target]

    def get_std_exe_link_args(self) -> typing.List[str]:
-        return []
+        return self._cook_link_args(self.host_compiler.get_std_exe_link_args())
+
+    def get_crt_compile_args(self, crt_val, buildtype):
+        return self._to_host_flags(self.host_compiler.get_crt_compile_args(crt_val, buildtype))
+
+    def get_crt_link_args(self, crt_val, buildtype):
+        # nvcc defaults to static, release version of msvc runtime and provides no
+        # native option to override it; override it with /NODEFAULTLIB
+        host_link_arg_overrides = []
+        host_crt_compile_args = self.host_compiler.get_crt_compile_args(crt_val, buildtype)
+        if any(arg in ['/MDd', '/MD', '/MTd'] for arg in host_crt_compile_args):
+            host_link_arg_overrides += ['/NODEFAULTLIB:LIBCMT.lib']
+        return self._cook_link_args(host_link_arg_overrides + self.host_compiler.get_crt_link_args(crt_val, buildtype))
+
+    def get_target_link_args(self, target):
+        return self._cook_link_args(super().get_target_link_args(target))
+
+    def get_dependency_compile_args(self, dep):
+        return self._to_host_flags(super().get_dependency_compile_args(dep))
+
+    def get_dependency_link_args(self, dep):
+        return self._cook_link_args(super().get_dependency_link_args(dep))
--- a/mesonbuild/compilers/mixins/clike.py
+++ b/mesonbuild/compilers/mixins/clike.py
@ -1056,6 +1056,12 @@ class CLikeCompiler:
            raise mesonlib.MesonException('Cannot find frameworks with non-clang compiler')
        return self.find_framework_impl(name, env, extra_dirs, allow_system)

+    def get_crt_compile_args(self, crt_val: str, buildtype: str) -> typing.List[str]:
+        return []
+
+    def get_crt_link_args(self, crt_val: str, buildtype: str) -> typing.List[str]:
+        return []
+
    def thread_flags(self, env):
        host_m = env.machines[self.for_machine]
        if host_m.is_haiku() or host_m.is_darwin():
--- a/mesonbuild/environment.py
+++ b/mesonbuild/environment.py
@ -1000,6 +1000,7 @@ class Environment:

    def detect_cuda_compiler(self, for_machine):
        popen_exceptions = {}
+        is_cross = not self.machines.matches_build_machine(for_machine)
        compilers, ccache, exe_wrap = self._get_compilers('cuda', for_machine)
        for compiler in compilers:
            if isinstance(compiler, str):
@ -1028,8 +1029,9 @@ class Environment:
            # Luckily, the "V" also makes it very simple to extract
            # the full version:
            version = out.strip().split('V')[-1]
+            cpp_compiler = self.detect_cpp_compiler(for_machine)
            linker = CudaLinker(compiler, for_machine, 'nvlink', CudaCompiler.LINKER_PREFIX, version=CudaLinker.parse_version())
-            return CudaCompiler(ccache + compiler, version, for_machine, exe_wrap, linker=linker)
+            return CudaCompiler(ccache + compiler, version, for_machine, is_cross, exe_wrap, host_compiler=cpp_compiler, linker=linker)
        raise EnvironmentException('Could not find suitable CUDA compiler: "' + ' '.join(compilers) + '"')

    def detect_fortran_compiler(self, for_machine: MachineChoice):
--- a/mesonbuild/linkers.py
+++ b/mesonbuild/linkers.py
@ -938,6 +938,23 @@ class CudaLinker(DynamicLinker):
        # we need the most verbose version output. Luckily starting with V
        return out.strip().split('V')[-1]

+    def get_accepts_rsp(self) -> bool:
+        # nvcc does not support response files
+        return False
+
+    def get_lib_prefix(self) -> str:
+        if not mesonlib.is_windows():
+            return ''
+        # nvcc doesn't recognize Meson's default .a extension for static libraries on
+        # Windows and passes it to cl as an object file, resulting in 'warning D9024 :
+        # unrecognized source file type 'xxx.a', object file assumed'.
+        #
+        # nvcc's --library= option doesn't help: it takes the library name without the
+        # extension and assumes that the extension on Windows is .lib; prefixing the
+        # library with -Xlinker= seems to work.
+        from .compilers import CudaCompiler
+        return CudaCompiler.LINKER_PREFIX
+
    def get_output_args(self, outname: str) -> typing.List[str]:
        return ['-o', outname]

@ -954,3 +971,6 @@ class CudaLinker(DynamicLinker):
                        suffix: str, soversion: str, darwin_versions: typing.Tuple[str, str],
                        is_shared_module: bool) -> typing.List[str]:
        return []
+
+    def get_std_shared_lib_args(self) -> typing.List[str]:
+        return ['-shared']
--- a/mesonbuild/mesonlib.py
+++ b/mesonbuild/mesonlib.py
@ -1495,3 +1495,33 @@ def get_wine_shortpath(winecmd, wine_paths):
                len(wine_path)))

    return wine_path.strip(';')
+
+
+class OptionProxy:
+    def __init__(self, value):
+        self.value = value
+
+class OptionOverrideProxy:
+    '''Mimic an option list but transparently override
+    selected option values.'''
+    def __init__(self, overrides, *options):
+        self.overrides = overrides
+        self.options = options
+
+    def __getitem__(self, option_name):
+        for opts in self.options:
+            if option_name in opts:
+                return self._get_override(option_name, opts[option_name])
+        raise KeyError('Option not found', option_name)
+
+    def _get_override(self, option_name, base_opt):
+        if option_name in self.overrides:
+            return OptionProxy(base_opt.validate_value(self.overrides[option_name]))
+        return base_opt
+
+    def copy(self):
+        result = {}
+        for opts in self.options:
+            for option_name in opts:
+                result[option_name] = self._get_override(option_name, opts[option_name])
+        return result
--- a/cases/cuda/6
+++ b/cases/cuda/6
@ -0,0 +1,20 @@
+#include <cuda_runtime.h>
+#include <iostream>
+
+auto cuda_devices() {
+    int result = 0;
+    cudaGetDeviceCount(&result);
+    return result;
+}
+
+
+int main() {
+    int n = cuda_devices();
+    if (n == 0) {
+        std::cout << "No Cuda hardware found. Exiting.\n";
+        return 0;
+    }
+
+    std::cout << "Found " << n << "Cuda devices.\n";
+    return 0;
+}
--- a/std/meson.build
+++ b/std/meson.build
@ -0,0 +1,4 @@
+project('C++ std', 'cuda', version : '1.0.0', default_options : ['cuda_std=c++14'])
+
+exe = executable('prog', 'main.cu')
+test('cudatest', exe)
--- a/runtime/main.cu
+++ b/runtime/main.cu
@ -0,0 +1,20 @@
+#include <cuda_runtime.h>
+#include <iostream>
+
+int cuda_devices() {
+    int result = 0;
+    cudaGetDeviceCount(&result);
+    return result;
+}
+
+
+int main() {
+    int n = cuda_devices();
+    if (n == 0) {
+        std::cout << "No Cuda hardware found. Exiting.\n";
+        return 0;
+    }
+
+    std::cout << "Found " << n << "Cuda devices.\n";
+    return 0;
+}
--- a/runtime/meson.build
+++ b/runtime/meson.build
@ -0,0 +1,4 @@
+project('static msvc runtime', 'cuda', version : '1.0.0', default_options : ['b_vscrt=mtd'])
+
+exe = executable('prog', 'main.cu')
+test('cudatest', exe)
--- a/release/main.cu
+++ b/release/main.cu
@ -0,0 +1,20 @@
+#include <cuda_runtime.h>
+#include <iostream>
+
+int cuda_devices() {
+    int result = 0;
+    cudaGetDeviceCount(&result);
+    return result;
+}
+
+
+int main() {
+    int n = cuda_devices();
+    if (n == 0) {
+        std::cout << "No Cuda hardware found. Exiting.\n";
+        return 0;
+    }
+
+    std::cout << "Found " << n << "Cuda devices.\n";
+    return 0;
+}
--- a/release/meson.build
+++ b/release/meson.build
@ -0,0 +1,4 @@
+project('release', 'cuda', version : '1.0.0', default_options : ['buildtype=release'])
+
+exe = executable('prog', 'main.cu')
+test('cudatest', exe)
--- a/space/main.cu
+++ b/space/main.cu
@ -0,0 +1,20 @@
+#include <cuda_runtime.h>
+#include <iostream>
+
+int cuda_devices() {
+    int result = 0;
+    cudaGetDeviceCount(&result);
+    return result;
+}
+
+
+int main() {
+    int n = cuda_devices();
+    if (n == 0) {
+        std::cout << "No Cuda hardware found. Exiting.\n";
+        return 0;
+    }
+
+    std::cout << "Found " << n << "Cuda devices.\n";
+    return 0;
+}
--- a/space/meson.build
+++ b/space/meson.build
@ -0,0 +1,4 @@
+project('optimize for space', 'cuda', version : '1.0.0', default_options : ['optimization=s'])
+
+exe = executable('prog', 'main.cu')
+test('cudatest', exe)