Merge pull request #3919 from mesonbuild/cudarebase

Add Cuda support
6 years ago · 379b42c5b1
parent f72522e889 43147c2594
commit 379b42c5b1
15 changed files with 375 additions and 2 deletions
--- a/docs/markdown/snippets/cuda.md
+++ b/docs/markdown/snippets/cuda.md
@ -0,0 +1,7 @@
+## Cuda support
+
+Compiling Cuda source code is now supported, though only with the
+Ninja backend. This has been tested only on Linux for now.
+
+Because NVidia's Cuda compiler does not produce `.d` dependency files,
+dependency tracking does not work.
--- a/mesonbuild/compilers/init.py
+++ b/mesonbuild/compilers/init.py
@ -72,6 +72,7 @@ __all__ = [
    'JavaCompiler',
    'LLVMDCompiler',
    'MonoCompiler',
+    'NvidiaCudaCompiler',
    'VisualStudioCsCompiler',
    'NAGFortranCompiler',
    'ObjCCompiler',
@ -153,6 +154,7 @@ from .d import (
    GnuDCompiler,
    LLVMDCompiler,
 )
+from .cuda import CudaCompiler
 from .fortran import (
    FortranCompiler,
    G95FortranCompiler,
--- a/mesonbuild/compilers/compilers.py
+++ b/mesonbuild/compilers/compilers.py
@ -37,6 +37,7 @@ lib_suffixes = ('a', 'lib', 'dll', 'dylib', 'so')
 lang_suffixes = {
    'c': ('c',),
    'cpp': ('cpp', 'cc', 'cxx', 'c++', 'hh', 'hpp', 'ipp', 'hxx'),
+    'cuda': ('cu',),
    # f90, f95, f03, f08 are for free-form fortran ('f90' recommended)
    # f, for, ftn, fpp are for fixed-form fortran ('f' or 'for' recommended)
    'fortran': ('f90', 'f95', 'f03', 'f08', 'f', 'for', 'ftn', 'fpp'),
@ -58,7 +59,7 @@ clib_langs = ('objcpp', 'cpp', 'objc', 'c', 'fortran',)
 # List of languages that can be linked with C code directly by the linker
 # used in build.py:process_compilers() and build.py:get_dynamic_linker()
 # XXX: Add Rust to this?
-clink_langs = ('d',) + clib_langs
+clink_langs = ('d', 'cuda') + clib_langs
 clink_suffixes = ()
 for _l in clink_langs + ('vala',):
    clink_suffixes += lang_suffixes[_l]
@ -69,6 +70,7 @@ soregex = re.compile(r'.*\.so(\.[0-9]+)?(\.[0-9]+)?(\.[0-9]+)?$')
 # Environment variables that each lang uses.
 cflags_mapping = {'c': 'CFLAGS',
                  'cpp': 'CXXFLAGS',
+                  'cuda': 'CUFLAGS',
                  'objc': 'OBJCFLAGS',
                  'objcpp': 'OBJCXXFLAGS',
                  'fortran': 'FFLAGS',
@ -143,6 +145,13 @@ armclang_buildtype_args = {'plain': [],
                           'custom': [],
                           }

+cuda_buildtype_args = {'plain': [],
+                       'debug': [],
+                       'debugoptimized': [],
+                       'release': [],
+                       'minsize': [],
+                      }
+
 arm_buildtype_args = {'plain': [],
                      'debug': ['-O0', '--debug'],
                      'debugoptimized': ['-O1', '--debug'],
@ -345,6 +354,17 @@ msvc_optimization_args = {'0': [],
                          's': ['/O1'], # Implies /Os.
                          }

+cuda_optimization_args = {'0': [],
+                          'g': ['-O0'],
+                          '1': ['-O1'],
+                          '2': ['-O2'],
+                          '3': ['-O3', '-Otime'],
+                          's': ['-O3', '-Ospace']
+                          }
+
+cuda_debug_args = {False: [],
+                   True: ['-g']}
+
 clike_debug_args = {False: [],
                    True: ['-g']}

--- a/mesonbuild/compilers/cuda.py
+++ b/mesonbuild/compilers/cuda.py
@ -0,0 +1,202 @@
+# Copyright 2012-2017 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import subprocess, os.path
+
+from .. import mlog
+from ..mesonlib import EnvironmentException, Popen_safe
+from .compilers import Compiler, cuda_buildtype_args, cuda_optimization_args, cuda_debug_args
+
+class CudaCompiler(Compiler):
+    def __init__(self, exelist, version, is_cross, exe_wrapper=None):
+        if not hasattr(self, 'language'):
+            self.language = 'cuda'
+        super().__init__(exelist, version)
+        self.is_cross = is_cross
+        self.exe_wrapper = exe_wrapper
+        self.id = 'nvcc'
+        default_warn_args = []
+        self.warn_args = {'1': default_warn_args,
+                          '2': default_warn_args + ['-Wextra'],
+                          '3': default_warn_args + ['-Wextra', '-Wpedantic']}
+
+    def needs_static_linker(self):
+        return False
+
+    def get_display_language(self):
+        return 'Cuda'
+
+    def get_no_stdinc_args(self):
+        return []
+
+    def sanity_check(self, work_dir, environment):
+        source_name = os.path.join(work_dir, 'sanitycheckcuda.cu')
+        binary_name = os.path.join(work_dir, 'sanitycheckcuda')
+        extra_flags = self.get_cross_extra_flags(environment, link=False)
+        if self.is_cross:
+            extra_flags += self.get_compile_only_args()
+
+        code = '''
+__global__ void kernel (void) {
+
+}
+
+        int main(int argc,char** argv){
+            return 0;
+        }
+        '''
+
+        with open(source_name, 'w') as ofile:
+            ofile.write(code)
+        pc = subprocess.Popen(self.exelist + extra_flags + [source_name, '-o', binary_name])
+        pc.wait()
+        if pc.returncode != 0:
+            raise EnvironmentException('Cuda compiler %s can not compile programs.' % self.name_string())
+        if self.is_cross:
+            # Can't check if the binaries run so we have to assume they do
+            return
+        pe = subprocess.Popen(binary_name)
+        pe.wait()
+        if pe.returncode != 0:
+            raise EnvironmentException('Executables created by Cuda compiler %s are not runnable.' % self.name_string())
+
+    def get_compiler_check_args(self):
+        return super().get_compiler_check_args() + []
+
+    def has_header_symbol(self, hname, symbol, prefix, env, extra_args=None, dependencies=None):
+        if super().has_header_symbol(hname, symbol, prefix, env, extra_args, dependencies):
+            return True
+        if extra_args is None:
+            extra_args = []
+        fargs = {'prefix': prefix, 'header': hname, 'symbol': symbol}
+        t = '''{prefix}
+        #include <{header}>
+        using {symbol};
+        int main () {{ return 0; }}'''
+        return self.compiles(t.format(**fargs), env, extra_args, dependencies)
+
+    def sanity_check_impl(self, work_dir, environment, sname, code):
+        mlog.debug('Sanity testing ' + self.get_display_language() + ' compiler:', ' '.join(self.exelist))
+        mlog.debug('Is cross compiler: %s.' % str(self.is_cross))
+
+        extra_flags = []
+        source_name = os.path.join(work_dir, sname)
+        binname = sname.rsplit('.', 1)[0]
+        if self.is_cross:
+            binname += '_cross'
+            if self.exe_wrapper is None:
+                # Linking cross built apps is painful. You can't really
+                # tell if you should use -nostdlib or not and for example
+                # on OSX the compiler binary is the same but you need
+                # a ton of compiler flags to differentiate between
+                # arm and x86_64. So just compile.
+                extra_flags += self.get_cross_extra_flags(environment, link=False)
+                extra_flags += self.get_compile_only_args()
+            else:
+                extra_flags += self.get_cross_extra_flags(environment, link=True)
+        # Is a valid executable output for all toolchains and platforms
+        binname += '.exe'
+        # Write binary check source
+        binary_name = os.path.join(work_dir, binname)
+        with open(source_name, 'w') as ofile:
+            ofile.write(code)
+        # Compile sanity check
+        cmdlist = self.exelist + extra_flags + [source_name] + self.get_output_args(binary_name)
+        pc, stdo, stde = Popen_safe(cmdlist, cwd=work_dir)
+        mlog.debug('Sanity check compiler command line:', ' '.join(cmdlist))
+        mlog.debug('Sanity check compile stdout:')
+        mlog.debug(stdo)
+        mlog.debug('-----\nSanity check compile stderr:')
+        mlog.debug(stde)
+        mlog.debug('-----')
+        if pc.returncode != 0:
+            raise EnvironmentException('Compiler {0} can not compile programs.'.format(self.name_string()))
+        # Run sanity check
+        if self.is_cross:
+            if self.exe_wrapper is None:
+                # Can't check if the binaries run so we have to assume they do
+                return
+            cmdlist = self.exe_wrapper + [binary_name]
+        else:
+            cmdlist = [binary_name]
+        mlog.debug('Running test binary command: ' + ' '.join(cmdlist))
+        pe = subprocess.Popen(cmdlist)
+        pe.wait()
+        if pe.returncode != 0:
+            raise EnvironmentException('Executables created by {0} compiler {1} are not runnable.'.format(self.language, self.name_string()))
+
+    def get_output_args(self, target):
+        return ['-o', target]
+
+    def name_string(self):
+        return ' '.join(self.exelist)
+
+    def get_dependency_gen_args(self, outtarget, outfile):
+        return []
+
+    def get_compile_only_args(self):
+        return ['-c']
+
+    def get_no_optimization_args(self):
+        return ['-O0']
+
+    def get_optimization_args(self, optimization_level):
+        return cuda_optimization_args[optimization_level]
+
+    def get_debug_args(self, is_debug):
+        return cuda_debug_args[is_debug]
+
+    def get_linker_exelist(self):
+        return self.exelist[:]
+
+    def get_linker_output_args(self, outputname):
+        return ['-o', outputname]
+
+    def get_warn_args(self, level):
+        return self.warn_args[level]
+
+    def get_buildtype_args(self, buildtype):
+        return cuda_buildtype_args[buildtype]
+
+    def get_include_args(self, path, is_system):
+        if path == '':
+            path = '.'
+        return ['-I' + path]
+
+    def depfile_for_object(self, objfile):
+        return objfile + '.' + self.get_depfile_suffix()
+
+    def get_depfile_suffix(self):
+        return 'd'
+
+    def get_buildtype_linker_args(self, buildtype):
+        return []
+
+    def get_std_exe_link_args(self):
+        return []
+
+    def build_rpath_args(self, build_dir, from_dir, rpath_paths, build_rpath, install_rpath):
+        return []
+
+    def get_linker_search_args(self, dirname):
+        return ['/LIBPATH:' + dirname]
+
+    def linker_to_compiler_args(self, args):
+        return ['/link'] + args
+
+    def get_pic_args(self):
+        return []
+
+    def compute_parameters_with_absolute_paths(self, parameter_list, build_dir):
+        return []
--- a/mesonbuild/environment.py
+++ b/mesonbuild/environment.py
@ -59,6 +59,7 @@ from .compilers import (
    IntelFortranCompiler,
    JavaCompiler,
    MonoCompiler,
+    CudaCompiler,
    VisualStudioCsCompiler,
    NAGFortranCompiler,
    Open64FortranCompiler,
@ -99,6 +100,17 @@ known_cpu_families = (
    'x86_64'
 )

+# Environment variables that each lang uses.
+cflags_mapping = {'c': 'CFLAGS',
+                  'cpp': 'CXXFLAGS',
+                  'cu': 'CUFLAGS',
+                  'objc': 'OBJCFLAGS',
+                  'objcpp': 'OBJCXXFLAGS',
+                  'fortran': 'FFLAGS',
+                  'd': 'DFLAGS',
+                  'vala': 'VALAFLAGS'}
+
+
 def detect_gcovr(version='3.1', log=False):
    gcovr_exe = 'gcovr'
    try:
@ -410,6 +422,7 @@ class Environment:
        self.default_d = ['ldc2', 'ldc', 'gdc', 'dmd']
        self.default_fortran = ['gfortran', 'g95', 'f95', 'f90', 'f77', 'ifort', 'pgfortran']
        self.default_java = ['javac']
+        self.default_cuda = ['nvcc']
        self.default_rust = ['rustc']
        self.default_swift = ['swiftc']
        self.default_vala = ['valac']
@ -417,6 +430,7 @@ class Environment:
        self.default_strip = ['strip']
        self.vs_static_linker = ['lib']
        self.clang_cl_static_linker = ['llvm-lib']
+        self.cuda_static_linker = ['nvlink']
        self.gcc_static_linker = ['gcc-ar']
        self.clang_static_linker = ['llvm-ar']
        self.default_pkgconfig = ['pkg-config']
@ -737,6 +751,25 @@ class Environment:
    def detect_cpp_compiler(self, want_cross):
        return self._detect_c_or_cpp_compiler('cpp', want_cross)

+    def detect_cuda_compiler(self, want_cross):
+        popen_exceptions = {}
+        compilers, ccache, is_cross, exe_wrap = self._get_compilers('cuda', want_cross)
+        for compiler in compilers:
+            if isinstance(compiler, str):
+                compiler = [compiler]
+            else:
+                raise EnvironmentException()
+            arg = '--version'
+            try:
+                p, out, err = Popen_safe(compiler + [arg])
+            except OSError as e:
+                popen_exceptions[' '.join(compiler + [arg])] = e
+                continue
+            version = search_version(out)
+            cls = CudaCompiler
+            return cls(ccache + compiler, version, is_cross, exe_wrap)
+        raise EnvironmentException('Could not find suitable CUDA compiler: "' + ' '.join(compilers) + '"')
+
    def detect_fortran_compiler(self, want_cross):
        popen_exceptions = {}
        compilers, ccache, is_cross, exe_wrap = self._get_compilers('fortran', want_cross)
@ -999,6 +1032,10 @@ class Environment:
            comp = self.detect_objc_compiler(False)
            if need_cross_compiler:
                cross_comp = self.detect_objc_compiler(True)
+        elif lang == 'cuda':
+            comp = self.detect_cuda_compiler(False)
+            if need_cross_compiler:
+                cross_comp = self.detect_cuda_compiler(True)
        elif lang == 'objcpp':
            comp = self.detect_objcpp_compiler(False)
            if need_cross_compiler:
@ -1057,7 +1094,12 @@ class Environment:
        if linker is not None:
            linkers = [linker]
        else:
-            if isinstance(compiler, compilers.VisualStudioCCompiler):
+            evar = 'AR'
+            if isinstance(compiler, compilers.CudaCompiler):
+                linkers = [self.cuda_static_linker, self.default_static_linker]
+            elif evar in os.environ:
+                linkers = [shlex.split(os.environ[evar])]
+            elif isinstance(compiler, compilers.VisualStudioCCompiler):
                linkers = [self.vs_static_linker, self.clang_cl_static_linker]
            elif isinstance(compiler, compilers.GnuCompiler):
                # Use gcc-ar if available; needed for LTO
--- a/run_project_tests.py
+++ b/run_project_tests.py
@ -542,6 +542,7 @@ def detect_tests_to_run():
        ('objective c++', 'objcpp', backend not in (Backend.ninja, Backend.xcode) or mesonlib.is_windows() or not have_objcpp_compiler()),
        ('fortran', 'fortran', backend is not Backend.ninja or not shutil.which('gfortran')),
        ('swift', 'swift', backend not in (Backend.ninja, Backend.xcode) or not shutil.which('swiftc')),
+        ('cuda', 'cuda', backend not in (Backend.ninja, Backend.xcode) or not shutil.which('nvcc')),
        ('python3', 'python3', backend is not Backend.ninja),
        ('python', 'python', backend is not Backend.ninja),
        ('fpga', 'fpga', shutil.which('yosys') is None),
--- a/simple/meson.build
+++ b/simple/meson.build
@ -0,0 +1,5 @@
+project('simple', 'cuda', version : '1.0.0')
+
+exe = executable('prog', 'prog.cu')
+test('cudatest', exe)
+
--- a/simple/prog.cu
+++ b/simple/prog.cu
@ -0,0 +1,30 @@
+#include <iostream>
+
+int main(int argc, char **argv) {
+    int cuda_devices = 0;
+    std::cout << "CUDA version: " << CUDART_VERSION << "\n";
+    cudaGetDeviceCount(&cuda_devices);
+    if(cuda_devices == 0) {
+        std::cout << "No Cuda hardware found. Exiting.\n";
+        return 0;
+    }
+    std::cout << "This computer has " << cuda_devices << " Cuda device(s).\n";
+    cudaDeviceProp props;
+    cudaGetDeviceProperties(&props, 0);
+    std::cout << "Properties of device 0.\n\n";
+
+    std::cout << "  Name:            " << props.name << "\n";
+    std::cout << "  Global memory:   " << props.totalGlobalMem << "\n";
+    std::cout << "  Shared memory:   " << props.sharedMemPerBlock << "\n";
+    std::cout << "  Constant memory: " << props.totalConstMem << "\n";
+    std::cout << "  Block registers: " << props.regsPerBlock << "\n";
+
+    std::cout << "  Warp size:         " << props.warpSize << "\n";
+    std::cout << "  Threads per block: " << props.maxThreadsPerBlock << "\n";
+    std::cout << "  Max block dimensions: [ " << props.maxThreadsDim[0] << ", " << props.maxThreadsDim[1]  << ", " << props.maxThreadsDim[2] << " ]" << "\n";
+    std::cout << "  Max grid dimensions:  [ " << props.maxGridSize[0] << ", " << props.maxGridSize[1]  << ", " << props.maxGridSize[2] << " ]" << "\n";
+    std::cout << "\n";
+
+    return 0;
+}
+
--- a/split/lib.cu
+++ b/split/lib.cu
@ -0,0 +1,13 @@
+#include <stdio.h>
+#include <iostream>
+
+__global__ void kernel (void){
+}
+
+int do_cuda_stuff() {
+  kernel<<<1,1>>>();
+
+  printf("Hello, World!\n");
+  return 0;
+}
+
--- a/split/main.cpp
+++ b/split/main.cpp
@ -0,0 +1,7 @@
+#include<iostream>
+
+int do_cuda_stuff();
+
+int main(int argc, char **argv) {
+  return do_cuda_stuff();
+}
--- a/split/meson.build
+++ b/split/meson.build
@ -0,0 +1,7 @@
+project('simple', 'cuda', 'cpp')
+
+exe = executable('prog', 'main.cpp', 'lib.cu')
+test('cudatest', exe)
+
+subdir('static')
+
--- a/split/static/lib.cu
+++ b/split/static/lib.cu
@ -0,0 +1,13 @@
+#include <stdio.h>
+#include <iostream>
+
+__global__ void kernel (void){
+}
+
+int do_cuda_stuff() {
+  kernel<<<1,1>>>();
+
+  printf("Hello, World!\n");
+  return 0;
+}
+
--- a/split/static/libsta.cu
+++ b/split/static/libsta.cu
@ -0,0 +1,13 @@
+#include <stdio.h>
+#include <iostream>
+
+__global__ void kernel (void){
+}
+
+int do_cuda_stuff() {
+  kernel<<<1,1>>>();
+
+  printf("Hello, World!\n");
+  return 0;
+}
+
--- a/split/static/main_static.cpp
+++ b/split/static/main_static.cpp
@ -0,0 +1,7 @@
+#include<iostream>
+
+int do_cuda_stuff();
+
+int main(int argc, char **argv) {
+  return do_cuda_stuff();
+}
--- a/split/static/meson.build
+++ b/split/static/meson.build
@ -0,0 +1,4 @@
+l = static_library('clib', 'lib.cu')
+exe = executable('staexe', 'main_static.cpp',
+  link_with : l)
+test('static Cuda test', exe)