From e632a816276f8263530fac661025eb4b7da35ef3 Mon Sep 17 00:00:00 2001 From: Olexa Bilaniuk Date: Sun, 24 Feb 2019 08:56:42 -0500 Subject: [PATCH 1/5] Rework CUDA sanity check. PArtially addresses #4961. Also make the sanity check do double duty as a GPU architecture detection test. --- mesonbuild/compilers/cuda.py | 150 +++++++++++++++++++---------------- 1 file changed, 81 insertions(+), 69 deletions(-) diff --git a/mesonbuild/compilers/cuda.py b/mesonbuild/compilers/cuda.py index 66dcf3383..8f3486443 100644 --- a/mesonbuild/compilers/cuda.py +++ b/mesonbuild/compilers/cuda.py @@ -47,35 +47,97 @@ class CudaCompiler(Compiler): return [] def sanity_check(self, work_dir, environment): - source_name = os.path.join(work_dir, 'sanitycheckcuda.cu') - binary_name = os.path.join(work_dir, 'sanitycheckcuda') - extra_flags = self.get_cross_extra_flags(environment, link=False) - if self.is_cross: - extra_flags += self.get_compile_only_args() - - code = ''' -__global__ void kernel (void) { - -} + mlog.debug('Sanity testing ' + self.get_display_language() + ' compiler:', ' '.join(self.exelist)) + mlog.debug('Is cross compiler: %s.' % str(self.is_cross)) - int main(int argc,char** argv){ + sname = 'sanitycheckcuda.cu' + code = r''' + #include + #include + + __global__ void kernel (void) {} + + int main(void){ + struct cudaDeviceProp prop; + int count, i; + cudaError_t ret = cudaGetDeviceCount(&count); + if(ret != cudaSuccess){ + fprintf(stderr, "%d\n", (int)ret); + }else{ + for(i=0;i Date: Sun, 24 Feb 2019 08:58:56 -0500 Subject: [PATCH 2/5] Solve NVCC -Werror problem. Partially addresses #4961. --- mesonbuild/compilers/cuda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mesonbuild/compilers/cuda.py b/mesonbuild/compilers/cuda.py index 8f3486443..d1964fdbf 100644 --- a/mesonbuild/compilers/cuda.py +++ b/mesonbuild/compilers/cuda.py @@ -188,7 +188,7 @@ class CudaCompiler(Compiler): return cuda_debug_args[is_debug] def get_werror_args(self): - return ['-Werror'] + return ['-Werror=cross-execution-space-call,deprecated-declarations,reorder'] def get_linker_exelist(self): return self.exelist[:] From e54fd996bbc251e195ab4f451993d02783b267c4 Mon Sep 17 00:00:00 2001 From: Olexa Bilaniuk Date: Sun, 24 Feb 2019 09:00:25 -0500 Subject: [PATCH 3/5] Allow 'Auto'-mode flags to use the compiler's detected GPU architectures. --- mesonbuild/modules/unstable_cuda.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/mesonbuild/modules/unstable_cuda.py b/mesonbuild/modules/unstable_cuda.py index 941b15a71..15f3a172f 100644 --- a/mesonbuild/modules/unstable_cuda.py +++ b/mesonbuild/modules/unstable_cuda.py @@ -77,10 +77,18 @@ class CudaModule(ExtensionModule): @staticmethod def _break_arch_string(s): - s = re.sub('[ \t,;]+', ';', s) + s = re.sub('[ \t\r\n,;]+', ';', s) s = s.strip(';').split(';') return s + @staticmethod + def _detected_cc_from_compiler(c): + if isinstance(c, CompilerHolder): + c = c.compiler + if isinstance(c, CudaCompiler): + return c.detected_cc + return '' + @staticmethod def _version_from_compiler(c): if isinstance(c, CompilerHolder): @@ -97,7 +105,8 @@ class CudaModule(ExtensionModule): if len(args) < 1: raise argerror else: - cuda_version = self._version_from_compiler(args[0]) + compiler = args[0] + cuda_version = self._version_from_compiler(compiler) if cuda_version == 'unknown': raise argerror @@ -108,7 +117,8 @@ class CudaModule(ExtensionModule): raise InvalidArguments('''The special architectures 'All', 'Common' and 'Auto' must appear alone, as a positional argument!''') arch_list = arch_list[0] if len(arch_list) == 1 else arch_list - detected = flatten([kwargs.get('detected', [])]) + detected = kwargs.get('detected', self._detected_cc_from_compiler(compiler)) + detected = flatten([detected]) detected = [self._break_arch_string(a) for a in detected] detected = flatten(detected) if not set(detected).isdisjoint({'All', 'Common', 'Auto'}): From 6d15594d441723f0f575c0b9e67f35c4b46a6120 Mon Sep 17 00:00:00 2001 From: Olexa Bilaniuk Date: Sun, 24 Feb 2019 09:14:41 -0500 Subject: [PATCH 4/5] Correct Volta+Tegra -> Xavier. --- mesonbuild/modules/unstable_cuda.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mesonbuild/modules/unstable_cuda.py b/mesonbuild/modules/unstable_cuda.py index 15f3a172f..1a749737a 100644 --- a/mesonbuild/modules/unstable_cuda.py +++ b/mesonbuild/modules/unstable_cuda.py @@ -158,7 +158,7 @@ class CudaModule(ExtensionModule): cuda_limit_gpu_architecture = '7.0' # noqa: E221 if version_compare(cuda_version, '>=9.0'): - cuda_known_gpu_architectures += ['Volta', 'Volta+Tegra'] # noqa: E221 + cuda_known_gpu_architectures += ['Volta', 'Xavier'] # noqa: E221 cuda_common_gpu_architectures += ['7.0', '7.0+PTX'] # noqa: E221 cuda_all_gpu_architectures += ['7.0', '7.0+PTX', '7.2', '7.2+PTX'] # noqa: E221 @@ -225,7 +225,7 @@ class CudaModule(ExtensionModule): 'Pascal': (['6.0', '6.1'], ['6.1']), 'Pascal+Tegra': (['6.2'], []), 'Volta': (['7.0'], ['7.0']), - 'Volta+Tegra': (['7.2'], []), + 'Xavier': (['7.2'], []), 'Turing': (['7.5'], ['7.5']), }.get(arch_name, (None, None)) From 104397a4293f78d3cbdd84f380cefb84ca54ec99 Mon Sep 17 00:00:00 2001 From: Olexa Bilaniuk Date: Sun, 24 Feb 2019 09:24:50 -0500 Subject: [PATCH 5/5] [skip ci] Update the CUDA module documentation. --- docs/markdown/Cuda-module.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/docs/markdown/Cuda-module.md b/docs/markdown/Cuda-module.md index caa175646..f161eacd5 100644 --- a/docs/markdown/Cuda-module.md +++ b/docs/markdown/Cuda-module.md @@ -71,6 +71,14 @@ mixed with architecture names or compute capabilities. Their interpretation is: | `'Common'` | Relatively common CCs supported by given NVCC compiler. Generally excludes Tegra and Tesla devices. | | `'Auto'` | The CCs provided by the `detected:` keyword, filtered for support by given NVCC compiler. | +As a special case, when `nvcc_arch_flags()` is invoked with + +- an NVCC `compiler` object `nvcc`, +- `'Auto'` mode and +- no `detected:` keyword, + +Meson uses `nvcc`'s architecture auto-detection results. + The supported architecture names and their corresponding compute capabilities are: @@ -85,7 +93,7 @@ are: | `'Pascal'` | 6.0, 6.1 | | `'Pascal+Tegra'` | 6.2 | | `'Volta'` | 7.0 | -| `'Volta+Tegra'` | 7.2 | +| `'Xavier'` | 7.2 | | `'Turing'` | 7.5 |