Merge pull request #8994 from SoapGentoo/cuda-fixes

Cuda fixes
3 years ago · 06bf056264
parent 793175f665 5c45a26c27
commit 06bf056264
5 changed files with 49 additions and 17 deletions
--- a/mesonbuild/compilers/cuda.py
+++ b/mesonbuild/compilers/cuda.py
@ -290,8 +290,7 @@ class CudaCompiler(Compiler):
                raise ValueError("-Xcompiler flag merging failed, unknown argument form!")
        return xflags

-    @classmethod
-    def _to_host_flags(cls, flags: T.List[str], phase: _Phase = _Phase.COMPILER) -> T.List[str]:
+    def _to_host_flags(self, flags: T.List[str], phase: _Phase = _Phase.COMPILER) -> T.List[str]:
        """
        Translate generic "GCC-speak" plus particular "NVCC-speak" flags to NVCC flags.

@ -353,7 +352,7 @@ class CudaCompiler(Compiler):
            # an exception for -D (where this would be value-changing) and -U (because
            # it isn't possible to define a macro with a comma in the name).

-            if flag in cls._FLAG_PASSTHRU_NOARGS:
+            if flag in self._FLAG_PASSTHRU_NOARGS:
                xflags.append(flag)
                continue

@ -384,19 +383,23 @@ class CudaCompiler(Compiler):
                else:                            # -Isomething
                    val = flag[2:]
                flag = flag[:2]                  # -I
-            elif flag in cls._FLAG_LONG2SHORT_WITHARGS or \
-                 flag in cls._FLAG_SHORT2LONG_WITHARGS:
+            elif flag in self._FLAG_LONG2SHORT_WITHARGS or \
+                 flag in self._FLAG_SHORT2LONG_WITHARGS:
                # This is either -o or a multi-letter flag, and it is receiving its
                # value isolated.
                try:
                    val = next(flagit)           # -o something
                except StopIteration:
                    pass
-            elif flag.split('=',1)[0] in cls._FLAG_LONG2SHORT_WITHARGS or \
-                 flag.split('=',1)[0] in cls._FLAG_SHORT2LONG_WITHARGS:
+            elif flag.split('=',1)[0] in self._FLAG_LONG2SHORT_WITHARGS or \
+                 flag.split('=',1)[0] in self._FLAG_SHORT2LONG_WITHARGS:
                # This is either -o or a multi-letter flag, and it is receiving its
                # value after an = sign.
                flag, val = flag.split('=',1)    # -o=something
+            # Some dependencies (e.g., BoostDependency) add unspaced "-isystem/usr/include" arguments
+            elif flag.startswith('-isystem'):
+                val = flag[8:].strip()
+                flag = flag[:8]
            else:
                # This is a flag, and it's foreign to NVCC.
                #
@ -418,7 +421,7 @@ class CudaCompiler(Compiler):
                    xflags.append('-prec-div=true')
                    xflags.append('-Xcompiler='+flag)
                else:
-                    xflags.append('-Xcompiler='+cls._shield_nvcc_list_arg(flag))
+                    xflags.append('-Xcompiler='+self._shield_nvcc_list_arg(flag))
                    # The above should securely handle GCC's -Wl, -Wa, -Wp, arguments.
                continue

@ -427,7 +430,7 @@ class CudaCompiler(Compiler):


            # Take care of the various NVCC-supported flags that need special handling.
-            flag = cls._FLAG_LONG2SHORT_WITHARGS.get(flag,flag)
+            flag = self._FLAG_LONG2SHORT_WITHARGS.get(flag,flag)

            if   flag in {'-include','-isystem','-I','-L','-l'}:
                # These flags are known to GCC, but list-valued in NVCC. They potentially
@ -439,10 +442,14 @@ class CudaCompiler(Compiler):
                # -U with comma arguments is impossible in GCC-speak (and thus unambiguous
                #in NVCC-speak, albeit unportable).
                if len(flag) == 2:
-                    xflags.append(flag+cls._shield_nvcc_list_arg(val))
+                    xflags.append(flag+self._shield_nvcc_list_arg(val))
+                elif flag == '-isystem' and val in self.host_compiler.get_default_include_dirs():
+                    # like GnuLikeCompiler, we have to filter out include directories specified
+                    # with -isystem that overlap with the host compiler's search path
+                    pass
                else:
                    xflags.append(flag)
-                    xflags.append(cls._shield_nvcc_list_arg(val))
+                    xflags.append(self._shield_nvcc_list_arg(val))
            elif flag == '-O':
                # Handle optimization levels GCC knows about that NVCC does not.
                if   val == 'fast':
@ -463,7 +470,7 @@ class CudaCompiler(Compiler):
                xflags.append(flag)
                xflags.append(val)

-        return cls._merge_flags(xflags)
+        return self._merge_flags(xflags)

    def needs_static_linker(self) -> bool:
        return False
@ -759,3 +766,12 @@ class CudaCompiler(Compiler):
            return [self._shield_nvcc_list_arg('-ccbin='+ccbindir, False)]
        else:
            return []
+
+    def get_profile_generate_args(self) -> T.List[str]:
+        return ['-Xcompiler=' + x for x in self.host_compiler.get_profile_generate_args()]
+
+    def get_profile_use_args(self) -> T.List[str]:
+        return ['-Xcompiler=' + x for x in self.host_compiler.get_profile_use_args()]
+
+    def get_disable_assert_args(self) -> T.List[str]:
+        return self.host_compiler.get_disable_assert_args()
--- a/mesonbuild/linkers/linkers.py
+++ b/mesonbuild/linkers/linkers.py
@ -1397,8 +1397,6 @@ class CudaLinker(PosixDynamicLinkerMixin, DynamicLinker):
        return False

    def get_lib_prefix(self) -> str:
-        if not mesonlib.is_windows():
-            return ''
        # nvcc doesn't recognize Meson's default .a extension for static libraries on
        # Windows and passes it to cl as an object file, resulting in 'warning D9024 :
        # unrecognized source file type 'xxx.a', object file assumed'.
@ -1406,6 +1404,12 @@ class CudaLinker(PosixDynamicLinkerMixin, DynamicLinker):
        # nvcc's --library= option doesn't help: it takes the library name without the
        # extension and assumes that the extension on Windows is .lib; prefixing the
        # library with -Xlinker= seems to work.
+        #
+        # On Linux, we have to use rely on -Xlinker= too, since nvcc/nvlink chokes on
+        # versioned shared libraries:
+        #
+        #   nvcc fatal : Don't know what to do with 'subprojects/foo/libbar.so.0.1.2'
+        #
        from ..compilers import CudaCompiler
        return CudaCompiler.LINKER_PREFIX

--- a/shared/shared/meson.build
+++ b/shared/shared/meson.build
@ -1,5 +1,7 @@
 libkernels = shared_library('kernels', 'kernels.cu',
                            cuda_args: ['-DTAG_IS_SHARED=1', '-DTAG_IS_BUILDING=1'],
-                            gnu_symbol_visibility: 'hidden')
+                            gnu_symbol_visibility: 'hidden',
+                            soversion : 1,
+                            version : '1.2.3')
 libkernels = declare_dependency(compile_args: ['-DTAG_IS_SHARED=1'],
                                link_with:    libkernels)
--- a/release/main.cu
+++ b/release/main.cu
@ -1,6 +1,10 @@
 #include <cuda_runtime.h>
 #include <iostream>

+#ifndef NDEBUG
+#error "NDEBUG not defined, this is a Meson bug"
+#endif
+
 int cuda_devices(void) {
    int result = 0;
    cudaGetDeviceCount(&result);
--- a/release/meson.build
+++ b/release/meson.build
@ -1,4 +1,10 @@
-project('release', 'cuda', version : '1.0.0', default_options : ['buildtype=release'])
+project('release', 'cpp', 'cuda', version : '1.0.0', default_options : ['buildtype=release', 'b_ndebug=if-release'])

-exe = executable('prog', 'main.cu')
+# We don't actually need boost, but it serves as a common dependency
+# that has the potential to add "-isystem/usr/include" to the compile
+# line. By making it optional, we test that system search paths get
+# removed without unnecessarily failing the test if boost is absent.
+boost_dep = dependency('boost', include_type : 'system', required : false)
+
+exe = executable('prog', 'main.cu', dependencies : boost_dep)
 test('cudatest', exe)