Merge pull request #3919 from mesonbuild/cudarebase

Add Cuda support
pull/4819/head
Jussi Pakkanen 6 years ago committed by GitHub
commit 379b42c5b1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 7
      docs/markdown/snippets/cuda.md
  2. 2
      mesonbuild/compilers/__init__.py
  3. 22
      mesonbuild/compilers/compilers.py
  4. 202
      mesonbuild/compilers/cuda.py
  5. 44
      mesonbuild/environment.py
  6. 1
      run_project_tests.py
  7. 5
      test cases/cuda/1 simple/meson.build
  8. 30
      test cases/cuda/1 simple/prog.cu
  9. 13
      test cases/cuda/2 split/lib.cu
  10. 7
      test cases/cuda/2 split/main.cpp
  11. 7
      test cases/cuda/2 split/meson.build
  12. 13
      test cases/cuda/2 split/static/lib.cu
  13. 13
      test cases/cuda/2 split/static/libsta.cu
  14. 7
      test cases/cuda/2 split/static/main_static.cpp
  15. 4
      test cases/cuda/2 split/static/meson.build

@ -0,0 +1,7 @@
## Cuda support
Compiling Cuda source code is now supported, though only with the
Ninja backend. This has been tested only on Linux for now.
Because NVidia's Cuda compiler does not produce `.d` dependency files,
dependency tracking does not work.

@ -72,6 +72,7 @@ __all__ = [
'JavaCompiler', 'JavaCompiler',
'LLVMDCompiler', 'LLVMDCompiler',
'MonoCompiler', 'MonoCompiler',
'NvidiaCudaCompiler',
'VisualStudioCsCompiler', 'VisualStudioCsCompiler',
'NAGFortranCompiler', 'NAGFortranCompiler',
'ObjCCompiler', 'ObjCCompiler',
@ -153,6 +154,7 @@ from .d import (
GnuDCompiler, GnuDCompiler,
LLVMDCompiler, LLVMDCompiler,
) )
from .cuda import CudaCompiler
from .fortran import ( from .fortran import (
FortranCompiler, FortranCompiler,
G95FortranCompiler, G95FortranCompiler,

@ -37,6 +37,7 @@ lib_suffixes = ('a', 'lib', 'dll', 'dylib', 'so')
lang_suffixes = { lang_suffixes = {
'c': ('c',), 'c': ('c',),
'cpp': ('cpp', 'cc', 'cxx', 'c++', 'hh', 'hpp', 'ipp', 'hxx'), 'cpp': ('cpp', 'cc', 'cxx', 'c++', 'hh', 'hpp', 'ipp', 'hxx'),
'cuda': ('cu',),
# f90, f95, f03, f08 are for free-form fortran ('f90' recommended) # f90, f95, f03, f08 are for free-form fortran ('f90' recommended)
# f, for, ftn, fpp are for fixed-form fortran ('f' or 'for' recommended) # f, for, ftn, fpp are for fixed-form fortran ('f' or 'for' recommended)
'fortran': ('f90', 'f95', 'f03', 'f08', 'f', 'for', 'ftn', 'fpp'), 'fortran': ('f90', 'f95', 'f03', 'f08', 'f', 'for', 'ftn', 'fpp'),
@ -58,7 +59,7 @@ clib_langs = ('objcpp', 'cpp', 'objc', 'c', 'fortran',)
# List of languages that can be linked with C code directly by the linker # List of languages that can be linked with C code directly by the linker
# used in build.py:process_compilers() and build.py:get_dynamic_linker() # used in build.py:process_compilers() and build.py:get_dynamic_linker()
# XXX: Add Rust to this? # XXX: Add Rust to this?
clink_langs = ('d',) + clib_langs clink_langs = ('d', 'cuda') + clib_langs
clink_suffixes = () clink_suffixes = ()
for _l in clink_langs + ('vala',): for _l in clink_langs + ('vala',):
clink_suffixes += lang_suffixes[_l] clink_suffixes += lang_suffixes[_l]
@ -69,6 +70,7 @@ soregex = re.compile(r'.*\.so(\.[0-9]+)?(\.[0-9]+)?(\.[0-9]+)?$')
# Environment variables that each lang uses. # Environment variables that each lang uses.
cflags_mapping = {'c': 'CFLAGS', cflags_mapping = {'c': 'CFLAGS',
'cpp': 'CXXFLAGS', 'cpp': 'CXXFLAGS',
'cuda': 'CUFLAGS',
'objc': 'OBJCFLAGS', 'objc': 'OBJCFLAGS',
'objcpp': 'OBJCXXFLAGS', 'objcpp': 'OBJCXXFLAGS',
'fortran': 'FFLAGS', 'fortran': 'FFLAGS',
@ -143,6 +145,13 @@ armclang_buildtype_args = {'plain': [],
'custom': [], 'custom': [],
} }
cuda_buildtype_args = {'plain': [],
'debug': [],
'debugoptimized': [],
'release': [],
'minsize': [],
}
arm_buildtype_args = {'plain': [], arm_buildtype_args = {'plain': [],
'debug': ['-O0', '--debug'], 'debug': ['-O0', '--debug'],
'debugoptimized': ['-O1', '--debug'], 'debugoptimized': ['-O1', '--debug'],
@ -345,6 +354,17 @@ msvc_optimization_args = {'0': [],
's': ['/O1'], # Implies /Os. 's': ['/O1'], # Implies /Os.
} }
cuda_optimization_args = {'0': [],
'g': ['-O0'],
'1': ['-O1'],
'2': ['-O2'],
'3': ['-O3', '-Otime'],
's': ['-O3', '-Ospace']
}
cuda_debug_args = {False: [],
True: ['-g']}
clike_debug_args = {False: [], clike_debug_args = {False: [],
True: ['-g']} True: ['-g']}

@ -0,0 +1,202 @@
# Copyright 2012-2017 The Meson development team
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import subprocess, os.path
from .. import mlog
from ..mesonlib import EnvironmentException, Popen_safe
from .compilers import Compiler, cuda_buildtype_args, cuda_optimization_args, cuda_debug_args
class CudaCompiler(Compiler):
def __init__(self, exelist, version, is_cross, exe_wrapper=None):
if not hasattr(self, 'language'):
self.language = 'cuda'
super().__init__(exelist, version)
self.is_cross = is_cross
self.exe_wrapper = exe_wrapper
self.id = 'nvcc'
default_warn_args = []
self.warn_args = {'1': default_warn_args,
'2': default_warn_args + ['-Wextra'],
'3': default_warn_args + ['-Wextra', '-Wpedantic']}
def needs_static_linker(self):
return False
def get_display_language(self):
return 'Cuda'
def get_no_stdinc_args(self):
return []
def sanity_check(self, work_dir, environment):
source_name = os.path.join(work_dir, 'sanitycheckcuda.cu')
binary_name = os.path.join(work_dir, 'sanitycheckcuda')
extra_flags = self.get_cross_extra_flags(environment, link=False)
if self.is_cross:
extra_flags += self.get_compile_only_args()
code = '''
__global__ void kernel (void) {
}
int main(int argc,char** argv){
return 0;
}
'''
with open(source_name, 'w') as ofile:
ofile.write(code)
pc = subprocess.Popen(self.exelist + extra_flags + [source_name, '-o', binary_name])
pc.wait()
if pc.returncode != 0:
raise EnvironmentException('Cuda compiler %s can not compile programs.' % self.name_string())
if self.is_cross:
# Can't check if the binaries run so we have to assume they do
return
pe = subprocess.Popen(binary_name)
pe.wait()
if pe.returncode != 0:
raise EnvironmentException('Executables created by Cuda compiler %s are not runnable.' % self.name_string())
def get_compiler_check_args(self):
return super().get_compiler_check_args() + []
def has_header_symbol(self, hname, symbol, prefix, env, extra_args=None, dependencies=None):
if super().has_header_symbol(hname, symbol, prefix, env, extra_args, dependencies):
return True
if extra_args is None:
extra_args = []
fargs = {'prefix': prefix, 'header': hname, 'symbol': symbol}
t = '''{prefix}
#include <{header}>
using {symbol};
int main () {{ return 0; }}'''
return self.compiles(t.format(**fargs), env, extra_args, dependencies)
def sanity_check_impl(self, work_dir, environment, sname, code):
mlog.debug('Sanity testing ' + self.get_display_language() + ' compiler:', ' '.join(self.exelist))
mlog.debug('Is cross compiler: %s.' % str(self.is_cross))
extra_flags = []
source_name = os.path.join(work_dir, sname)
binname = sname.rsplit('.', 1)[0]
if self.is_cross:
binname += '_cross'
if self.exe_wrapper is None:
# Linking cross built apps is painful. You can't really
# tell if you should use -nostdlib or not and for example
# on OSX the compiler binary is the same but you need
# a ton of compiler flags to differentiate between
# arm and x86_64. So just compile.
extra_flags += self.get_cross_extra_flags(environment, link=False)
extra_flags += self.get_compile_only_args()
else:
extra_flags += self.get_cross_extra_flags(environment, link=True)
# Is a valid executable output for all toolchains and platforms
binname += '.exe'
# Write binary check source
binary_name = os.path.join(work_dir, binname)
with open(source_name, 'w') as ofile:
ofile.write(code)
# Compile sanity check
cmdlist = self.exelist + extra_flags + [source_name] + self.get_output_args(binary_name)
pc, stdo, stde = Popen_safe(cmdlist, cwd=work_dir)
mlog.debug('Sanity check compiler command line:', ' '.join(cmdlist))
mlog.debug('Sanity check compile stdout:')
mlog.debug(stdo)
mlog.debug('-----\nSanity check compile stderr:')
mlog.debug(stde)
mlog.debug('-----')
if pc.returncode != 0:
raise EnvironmentException('Compiler {0} can not compile programs.'.format(self.name_string()))
# Run sanity check
if self.is_cross:
if self.exe_wrapper is None:
# Can't check if the binaries run so we have to assume they do
return
cmdlist = self.exe_wrapper + [binary_name]
else:
cmdlist = [binary_name]
mlog.debug('Running test binary command: ' + ' '.join(cmdlist))
pe = subprocess.Popen(cmdlist)
pe.wait()
if pe.returncode != 0:
raise EnvironmentException('Executables created by {0} compiler {1} are not runnable.'.format(self.language, self.name_string()))
def get_output_args(self, target):
return ['-o', target]
def name_string(self):
return ' '.join(self.exelist)
def get_dependency_gen_args(self, outtarget, outfile):
return []
def get_compile_only_args(self):
return ['-c']
def get_no_optimization_args(self):
return ['-O0']
def get_optimization_args(self, optimization_level):
return cuda_optimization_args[optimization_level]
def get_debug_args(self, is_debug):
return cuda_debug_args[is_debug]
def get_linker_exelist(self):
return self.exelist[:]
def get_linker_output_args(self, outputname):
return ['-o', outputname]
def get_warn_args(self, level):
return self.warn_args[level]
def get_buildtype_args(self, buildtype):
return cuda_buildtype_args[buildtype]
def get_include_args(self, path, is_system):
if path == '':
path = '.'
return ['-I' + path]
def depfile_for_object(self, objfile):
return objfile + '.' + self.get_depfile_suffix()
def get_depfile_suffix(self):
return 'd'
def get_buildtype_linker_args(self, buildtype):
return []
def get_std_exe_link_args(self):
return []
def build_rpath_args(self, build_dir, from_dir, rpath_paths, build_rpath, install_rpath):
return []
def get_linker_search_args(self, dirname):
return ['/LIBPATH:' + dirname]
def linker_to_compiler_args(self, args):
return ['/link'] + args
def get_pic_args(self):
return []
def compute_parameters_with_absolute_paths(self, parameter_list, build_dir):
return []

@ -59,6 +59,7 @@ from .compilers import (
IntelFortranCompiler, IntelFortranCompiler,
JavaCompiler, JavaCompiler,
MonoCompiler, MonoCompiler,
CudaCompiler,
VisualStudioCsCompiler, VisualStudioCsCompiler,
NAGFortranCompiler, NAGFortranCompiler,
Open64FortranCompiler, Open64FortranCompiler,
@ -99,6 +100,17 @@ known_cpu_families = (
'x86_64' 'x86_64'
) )
# Environment variables that each lang uses.
cflags_mapping = {'c': 'CFLAGS',
'cpp': 'CXXFLAGS',
'cu': 'CUFLAGS',
'objc': 'OBJCFLAGS',
'objcpp': 'OBJCXXFLAGS',
'fortran': 'FFLAGS',
'd': 'DFLAGS',
'vala': 'VALAFLAGS'}
def detect_gcovr(version='3.1', log=False): def detect_gcovr(version='3.1', log=False):
gcovr_exe = 'gcovr' gcovr_exe = 'gcovr'
try: try:
@ -410,6 +422,7 @@ class Environment:
self.default_d = ['ldc2', 'ldc', 'gdc', 'dmd'] self.default_d = ['ldc2', 'ldc', 'gdc', 'dmd']
self.default_fortran = ['gfortran', 'g95', 'f95', 'f90', 'f77', 'ifort', 'pgfortran'] self.default_fortran = ['gfortran', 'g95', 'f95', 'f90', 'f77', 'ifort', 'pgfortran']
self.default_java = ['javac'] self.default_java = ['javac']
self.default_cuda = ['nvcc']
self.default_rust = ['rustc'] self.default_rust = ['rustc']
self.default_swift = ['swiftc'] self.default_swift = ['swiftc']
self.default_vala = ['valac'] self.default_vala = ['valac']
@ -417,6 +430,7 @@ class Environment:
self.default_strip = ['strip'] self.default_strip = ['strip']
self.vs_static_linker = ['lib'] self.vs_static_linker = ['lib']
self.clang_cl_static_linker = ['llvm-lib'] self.clang_cl_static_linker = ['llvm-lib']
self.cuda_static_linker = ['nvlink']
self.gcc_static_linker = ['gcc-ar'] self.gcc_static_linker = ['gcc-ar']
self.clang_static_linker = ['llvm-ar'] self.clang_static_linker = ['llvm-ar']
self.default_pkgconfig = ['pkg-config'] self.default_pkgconfig = ['pkg-config']
@ -737,6 +751,25 @@ class Environment:
def detect_cpp_compiler(self, want_cross): def detect_cpp_compiler(self, want_cross):
return self._detect_c_or_cpp_compiler('cpp', want_cross) return self._detect_c_or_cpp_compiler('cpp', want_cross)
def detect_cuda_compiler(self, want_cross):
popen_exceptions = {}
compilers, ccache, is_cross, exe_wrap = self._get_compilers('cuda', want_cross)
for compiler in compilers:
if isinstance(compiler, str):
compiler = [compiler]
else:
raise EnvironmentException()
arg = '--version'
try:
p, out, err = Popen_safe(compiler + [arg])
except OSError as e:
popen_exceptions[' '.join(compiler + [arg])] = e
continue
version = search_version(out)
cls = CudaCompiler
return cls(ccache + compiler, version, is_cross, exe_wrap)
raise EnvironmentException('Could not find suitable CUDA compiler: "' + ' '.join(compilers) + '"')
def detect_fortran_compiler(self, want_cross): def detect_fortran_compiler(self, want_cross):
popen_exceptions = {} popen_exceptions = {}
compilers, ccache, is_cross, exe_wrap = self._get_compilers('fortran', want_cross) compilers, ccache, is_cross, exe_wrap = self._get_compilers('fortran', want_cross)
@ -999,6 +1032,10 @@ class Environment:
comp = self.detect_objc_compiler(False) comp = self.detect_objc_compiler(False)
if need_cross_compiler: if need_cross_compiler:
cross_comp = self.detect_objc_compiler(True) cross_comp = self.detect_objc_compiler(True)
elif lang == 'cuda':
comp = self.detect_cuda_compiler(False)
if need_cross_compiler:
cross_comp = self.detect_cuda_compiler(True)
elif lang == 'objcpp': elif lang == 'objcpp':
comp = self.detect_objcpp_compiler(False) comp = self.detect_objcpp_compiler(False)
if need_cross_compiler: if need_cross_compiler:
@ -1057,7 +1094,12 @@ class Environment:
if linker is not None: if linker is not None:
linkers = [linker] linkers = [linker]
else: else:
if isinstance(compiler, compilers.VisualStudioCCompiler): evar = 'AR'
if isinstance(compiler, compilers.CudaCompiler):
linkers = [self.cuda_static_linker, self.default_static_linker]
elif evar in os.environ:
linkers = [shlex.split(os.environ[evar])]
elif isinstance(compiler, compilers.VisualStudioCCompiler):
linkers = [self.vs_static_linker, self.clang_cl_static_linker] linkers = [self.vs_static_linker, self.clang_cl_static_linker]
elif isinstance(compiler, compilers.GnuCompiler): elif isinstance(compiler, compilers.GnuCompiler):
# Use gcc-ar if available; needed for LTO # Use gcc-ar if available; needed for LTO

@ -542,6 +542,7 @@ def detect_tests_to_run():
('objective c++', 'objcpp', backend not in (Backend.ninja, Backend.xcode) or mesonlib.is_windows() or not have_objcpp_compiler()), ('objective c++', 'objcpp', backend not in (Backend.ninja, Backend.xcode) or mesonlib.is_windows() or not have_objcpp_compiler()),
('fortran', 'fortran', backend is not Backend.ninja or not shutil.which('gfortran')), ('fortran', 'fortran', backend is not Backend.ninja or not shutil.which('gfortran')),
('swift', 'swift', backend not in (Backend.ninja, Backend.xcode) or not shutil.which('swiftc')), ('swift', 'swift', backend not in (Backend.ninja, Backend.xcode) or not shutil.which('swiftc')),
('cuda', 'cuda', backend not in (Backend.ninja, Backend.xcode) or not shutil.which('nvcc')),
('python3', 'python3', backend is not Backend.ninja), ('python3', 'python3', backend is not Backend.ninja),
('python', 'python', backend is not Backend.ninja), ('python', 'python', backend is not Backend.ninja),
('fpga', 'fpga', shutil.which('yosys') is None), ('fpga', 'fpga', shutil.which('yosys') is None),

@ -0,0 +1,5 @@
project('simple', 'cuda', version : '1.0.0')
exe = executable('prog', 'prog.cu')
test('cudatest', exe)

@ -0,0 +1,30 @@
#include <iostream>
int main(int argc, char **argv) {
int cuda_devices = 0;
std::cout << "CUDA version: " << CUDART_VERSION << "\n";
cudaGetDeviceCount(&cuda_devices);
if(cuda_devices == 0) {
std::cout << "No Cuda hardware found. Exiting.\n";
return 0;
}
std::cout << "This computer has " << cuda_devices << " Cuda device(s).\n";
cudaDeviceProp props;
cudaGetDeviceProperties(&props, 0);
std::cout << "Properties of device 0.\n\n";
std::cout << " Name: " << props.name << "\n";
std::cout << " Global memory: " << props.totalGlobalMem << "\n";
std::cout << " Shared memory: " << props.sharedMemPerBlock << "\n";
std::cout << " Constant memory: " << props.totalConstMem << "\n";
std::cout << " Block registers: " << props.regsPerBlock << "\n";
std::cout << " Warp size: " << props.warpSize << "\n";
std::cout << " Threads per block: " << props.maxThreadsPerBlock << "\n";
std::cout << " Max block dimensions: [ " << props.maxThreadsDim[0] << ", " << props.maxThreadsDim[1] << ", " << props.maxThreadsDim[2] << " ]" << "\n";
std::cout << " Max grid dimensions: [ " << props.maxGridSize[0] << ", " << props.maxGridSize[1] << ", " << props.maxGridSize[2] << " ]" << "\n";
std::cout << "\n";
return 0;
}

@ -0,0 +1,13 @@
#include <stdio.h>
#include <iostream>
__global__ void kernel (void){
}
int do_cuda_stuff() {
kernel<<<1,1>>>();
printf("Hello, World!\n");
return 0;
}

@ -0,0 +1,7 @@
#include<iostream>
int do_cuda_stuff();
int main(int argc, char **argv) {
return do_cuda_stuff();
}

@ -0,0 +1,7 @@
project('simple', 'cuda', 'cpp')
exe = executable('prog', 'main.cpp', 'lib.cu')
test('cudatest', exe)
subdir('static')

@ -0,0 +1,13 @@
#include <stdio.h>
#include <iostream>
__global__ void kernel (void){
}
int do_cuda_stuff() {
kernel<<<1,1>>>();
printf("Hello, World!\n");
return 0;
}

@ -0,0 +1,13 @@
#include <stdio.h>
#include <iostream>
__global__ void kernel (void){
}
int do_cuda_stuff() {
kernel<<<1,1>>>();
printf("Hello, World!\n");
return 0;
}

@ -0,0 +1,7 @@
#include<iostream>
int do_cuda_stuff();
int main(int argc, char **argv) {
return do_cuda_stuff();
}

@ -0,0 +1,4 @@
l = static_library('clib', 'lib.cu')
exe = executable('staexe', 'main_static.cpp',
link_with : l)
test('static Cuda test', exe)
Loading…
Cancel
Save