Merge pull request #4972 from obilaniu/cudafixes

CUDA fixes
pull/4971/head
Jussi Pakkanen 6 years ago committed by GitHub
commit 41fb5c2960
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 10
      docs/markdown/Cuda-module.md
  2. 152
      mesonbuild/compilers/cuda.py
  3. 20
      mesonbuild/modules/unstable_cuda.py

@ -71,6 +71,14 @@ mixed with architecture names or compute capabilities. Their interpretation is:
| `'Common'` | Relatively common CCs supported by given NVCC compiler. Generally excludes Tegra and Tesla devices. | | `'Common'` | Relatively common CCs supported by given NVCC compiler. Generally excludes Tegra and Tesla devices. |
| `'Auto'` | The CCs provided by the `detected:` keyword, filtered for support by given NVCC compiler. | | `'Auto'` | The CCs provided by the `detected:` keyword, filtered for support by given NVCC compiler. |
As a special case, when `nvcc_arch_flags()` is invoked with
- an NVCC `compiler` object `nvcc`,
- `'Auto'` mode and
- no `detected:` keyword,
Meson uses `nvcc`'s architecture auto-detection results.
The supported architecture names and their corresponding compute capabilities The supported architecture names and their corresponding compute capabilities
are: are:
@ -85,7 +93,7 @@ are:
| `'Pascal'` | 6.0, 6.1 | | `'Pascal'` | 6.0, 6.1 |
| `'Pascal+Tegra'` | 6.2 | | `'Pascal+Tegra'` | 6.2 |
| `'Volta'` | 7.0 | | `'Volta'` | 7.0 |
| `'Volta+Tegra'` | 7.2 | | `'Xavier'` | 7.2 |
| `'Turing'` | 7.5 | | `'Turing'` | 7.5 |

@ -47,35 +47,97 @@ class CudaCompiler(Compiler):
return [] return []
def sanity_check(self, work_dir, environment): def sanity_check(self, work_dir, environment):
source_name = os.path.join(work_dir, 'sanitycheckcuda.cu') mlog.debug('Sanity testing ' + self.get_display_language() + ' compiler:', ' '.join(self.exelist))
binary_name = os.path.join(work_dir, 'sanitycheckcuda') mlog.debug('Is cross compiler: %s.' % str(self.is_cross))
extra_flags = self.get_cross_extra_flags(environment, link=False)
if self.is_cross:
extra_flags += self.get_compile_only_args()
code = '''
__global__ void kernel (void) {
}
int main(int argc,char** argv){ sname = 'sanitycheckcuda.cu'
code = r'''
#include <cuda_runtime.h>
#include <stdio.h>
__global__ void kernel (void) {}
int main(void){
struct cudaDeviceProp prop;
int count, i;
cudaError_t ret = cudaGetDeviceCount(&count);
if(ret != cudaSuccess){
fprintf(stderr, "%d\n", (int)ret);
}else{
for(i=0;i<count;i++){
if(cudaGetDeviceProperties(&prop, i) == cudaSuccess){
fprintf(stdout, "%d.%d\n", prop.major, prop.minor);
}
}
}
fflush(stderr);
fflush(stdout);
return 0; return 0;
} }
''' '''
binname = sname.rsplit('.', 1)[0]
binname += '_cross' if self.is_cross else ''
source_name = os.path.join(work_dir, sname)
binary_name = os.path.join(work_dir, binname + '.exe')
with open(source_name, 'w') as ofile: with open(source_name, 'w') as ofile:
ofile.write(code) ofile.write(code)
pc = subprocess.Popen(self.exelist + extra_flags + [source_name, '-o', binary_name])
pc.wait() # The Sanity Test for CUDA language will serve as both a sanity test
# and a native-build GPU architecture detection test, useful later.
#
# For this second purpose, NVCC has very handy flags, --run and
# --run-args, that allow one to run an application with the
# environment set up properly. Of course, this only works for native
# builds; For cross builds we must still use the exe_wrapper (if any).
self.detected_cc = ''
flags = ['-w', '-cudart', 'static', source_name]
if self.is_cross and self.exe_wrapper is None:
# Linking cross built apps is painful. You can't really
# tell if you should use -nostdlib or not and for example
# on OSX the compiler binary is the same but you need
# a ton of compiler flags to differentiate between
# arm and x86_64. So just compile.
flags += self.get_compile_only_args()
flags += self.get_output_args(binary_name)
# Compile sanity check
cmdlist = self.exelist + flags
mlog.debug('Sanity check compiler command line: ', ' '.join(cmdlist))
pc, stdo, stde = Popen_safe(cmdlist, cwd=work_dir)
mlog.debug('Sanity check compile stdout: ')
mlog.debug(stdo)
mlog.debug('-----\nSanity check compile stderr:')
mlog.debug(stde)
mlog.debug('-----')
if pc.returncode != 0: if pc.returncode != 0:
raise EnvironmentException('Cuda compiler %s can not compile programs.' % self.name_string()) raise EnvironmentException('Compiler {0} can not compile programs.'.format(self.name_string()))
# Run sanity check (if possible)
if self.is_cross: if self.is_cross:
# Can't check if the binaries run so we have to assume they do if self.exe_wrapper is None:
return return
pe = subprocess.Popen(binary_name) else:
cmdlist = self.exe_wrapper + [binary_name]
else:
cmdlist = self.exelist + ['--run', '"' + binary_name + '"']
mlog.debug('Sanity check run command line: ', ' '.join(cmdlist))
pe, stdo, stde = Popen_safe(cmdlist, cwd=work_dir)
mlog.debug('Sanity check run stdout: ')
mlog.debug(stdo)
mlog.debug('-----\nSanity check run stderr:')
mlog.debug(stde)
mlog.debug('-----')
pe.wait() pe.wait()
if pe.returncode != 0: if pe.returncode != 0:
raise EnvironmentException('Executables created by Cuda compiler %s are not runnable.' % self.name_string()) raise EnvironmentException('Executables created by {0} compiler {1} are not runnable.'.format(self.language, self.name_string()))
# Interpret the result of the sanity test.
# As mentionned above, it is not only a sanity test but also a GPU
# architecture detection test.
if stde == '':
self.detected_cc = stdo
else:
mlog.debug('cudaGetDeviceCount() returned ' + stde)
def get_compiler_check_args(self): def get_compiler_check_args(self):
return super().get_compiler_check_args() + [] return super().get_compiler_check_args() + []
@ -92,56 +154,6 @@ __global__ void kernel (void) {
int main () {{ return 0; }}''' int main () {{ return 0; }}'''
return self.compiles(t.format(**fargs), env, extra_args, dependencies) return self.compiles(t.format(**fargs), env, extra_args, dependencies)
def sanity_check_impl(self, work_dir, environment, sname, code):
mlog.debug('Sanity testing ' + self.get_display_language() + ' compiler:', ' '.join(self.exelist))
mlog.debug('Is cross compiler: %s.' % str(self.is_cross))
extra_flags = []
source_name = os.path.join(work_dir, sname)
binname = sname.rsplit('.', 1)[0]
if self.is_cross:
binname += '_cross'
if self.exe_wrapper is None:
# Linking cross built apps is painful. You can't really
# tell if you should use -nostdlib or not and for example
# on OSX the compiler binary is the same but you need
# a ton of compiler flags to differentiate between
# arm and x86_64. So just compile.
extra_flags += self.get_cross_extra_flags(environment, link=False)
extra_flags += self.get_compile_only_args()
else:
extra_flags += self.get_cross_extra_flags(environment, link=True)
# Is a valid executable output for all toolchains and platforms
binname += '.exe'
# Write binary check source
binary_name = os.path.join(work_dir, binname)
with open(source_name, 'w') as ofile:
ofile.write(code)
# Compile sanity check
cmdlist = self.exelist + extra_flags + [source_name] + self.get_output_args(binary_name)
pc, stdo, stde = Popen_safe(cmdlist, cwd=work_dir)
mlog.debug('Sanity check compiler command line:', ' '.join(cmdlist))
mlog.debug('Sanity check compile stdout:')
mlog.debug(stdo)
mlog.debug('-----\nSanity check compile stderr:')
mlog.debug(stde)
mlog.debug('-----')
if pc.returncode != 0:
raise EnvironmentException('Compiler {0} can not compile programs.'.format(self.name_string()))
# Run sanity check
if self.is_cross:
if self.exe_wrapper is None:
# Can't check if the binaries run so we have to assume they do
return
cmdlist = self.exe_wrapper + [binary_name]
else:
cmdlist = [binary_name]
mlog.debug('Running test binary command: ' + ' '.join(cmdlist))
pe = subprocess.Popen(cmdlist)
pe.wait()
if pe.returncode != 0:
raise EnvironmentException('Executables created by {0} compiler {1} are not runnable.'.format(self.language, self.name_string()))
@staticmethod @staticmethod
def _cook_link_args(args): def _cook_link_args(args):
""" """
@ -176,7 +188,7 @@ __global__ void kernel (void) {
return cuda_debug_args[is_debug] return cuda_debug_args[is_debug]
def get_werror_args(self): def get_werror_args(self):
return ['-Werror'] return ['-Werror=cross-execution-space-call,deprecated-declarations,reorder']
def get_linker_exelist(self): def get_linker_exelist(self):
return self.exelist[:] return self.exelist[:]

@ -77,10 +77,18 @@ class CudaModule(ExtensionModule):
@staticmethod @staticmethod
def _break_arch_string(s): def _break_arch_string(s):
s = re.sub('[ \t,;]+', ';', s) s = re.sub('[ \t\r\n,;]+', ';', s)
s = s.strip(';').split(';') s = s.strip(';').split(';')
return s return s
@staticmethod
def _detected_cc_from_compiler(c):
if isinstance(c, CompilerHolder):
c = c.compiler
if isinstance(c, CudaCompiler):
return c.detected_cc
return ''
@staticmethod @staticmethod
def _version_from_compiler(c): def _version_from_compiler(c):
if isinstance(c, CompilerHolder): if isinstance(c, CompilerHolder):
@ -97,7 +105,8 @@ class CudaModule(ExtensionModule):
if len(args) < 1: if len(args) < 1:
raise argerror raise argerror
else: else:
cuda_version = self._version_from_compiler(args[0]) compiler = args[0]
cuda_version = self._version_from_compiler(compiler)
if cuda_version == 'unknown': if cuda_version == 'unknown':
raise argerror raise argerror
@ -108,7 +117,8 @@ class CudaModule(ExtensionModule):
raise InvalidArguments('''The special architectures 'All', 'Common' and 'Auto' must appear alone, as a positional argument!''') raise InvalidArguments('''The special architectures 'All', 'Common' and 'Auto' must appear alone, as a positional argument!''')
arch_list = arch_list[0] if len(arch_list) == 1 else arch_list arch_list = arch_list[0] if len(arch_list) == 1 else arch_list
detected = flatten([kwargs.get('detected', [])]) detected = kwargs.get('detected', self._detected_cc_from_compiler(compiler))
detected = flatten([detected])
detected = [self._break_arch_string(a) for a in detected] detected = [self._break_arch_string(a) for a in detected]
detected = flatten(detected) detected = flatten(detected)
if not set(detected).isdisjoint({'All', 'Common', 'Auto'}): if not set(detected).isdisjoint({'All', 'Common', 'Auto'}):
@ -148,7 +158,7 @@ class CudaModule(ExtensionModule):
cuda_limit_gpu_architecture = '7.0' # noqa: E221 cuda_limit_gpu_architecture = '7.0' # noqa: E221
if version_compare(cuda_version, '>=9.0'): if version_compare(cuda_version, '>=9.0'):
cuda_known_gpu_architectures += ['Volta', 'Volta+Tegra'] # noqa: E221 cuda_known_gpu_architectures += ['Volta', 'Xavier'] # noqa: E221
cuda_common_gpu_architectures += ['7.0', '7.0+PTX'] # noqa: E221 cuda_common_gpu_architectures += ['7.0', '7.0+PTX'] # noqa: E221
cuda_all_gpu_architectures += ['7.0', '7.0+PTX', '7.2', '7.2+PTX'] # noqa: E221 cuda_all_gpu_architectures += ['7.0', '7.0+PTX', '7.2', '7.2+PTX'] # noqa: E221
@ -215,7 +225,7 @@ class CudaModule(ExtensionModule):
'Pascal': (['6.0', '6.1'], ['6.1']), 'Pascal': (['6.0', '6.1'], ['6.1']),
'Pascal+Tegra': (['6.2'], []), 'Pascal+Tegra': (['6.2'], []),
'Volta': (['7.0'], ['7.0']), 'Volta': (['7.0'], ['7.0']),
'Volta+Tegra': (['7.2'], []), 'Xavier': (['7.2'], []),
'Turing': (['7.5'], ['7.5']), 'Turing': (['7.5'], ['7.5']),
}.get(arch_name, (None, None)) }.get(arch_name, (None, None))

Loading…
Cancel
Save