parent
b6e5683764
commit
14de8ac5a9
4 changed files with 73 additions and 0 deletions
@ -0,0 +1,5 @@ |
||||
#include "b.h" |
||||
|
||||
__device__ int g[N]; |
||||
|
||||
__device__ void bar(void) { g[threadIdx.x]++; } |
@ -0,0 +1,5 @@ |
||||
#define N 8 |
||||
|
||||
extern __device__ int g[N]; |
||||
|
||||
extern __device__ void bar(void); |
@ -0,0 +1,44 @@ |
||||
#include <stdio.h> |
||||
|
||||
#include "b.h" |
||||
|
||||
__global__ void foo(void) |
||||
{ |
||||
__shared__ int a[N]; |
||||
a[threadIdx.x] = threadIdx.x; |
||||
|
||||
__syncthreads(); |
||||
|
||||
g[threadIdx.x] = a[blockDim.x - threadIdx.x - 1]; |
||||
|
||||
bar(); |
||||
} |
||||
|
||||
int main(void) |
||||
{ |
||||
unsigned int i; |
||||
int *dg, hg[N]; |
||||
int sum = 0; |
||||
|
||||
foo<<<1, N>>>(); |
||||
|
||||
if (cudaGetSymbolAddress((void**)&dg, g)) { |
||||
printf("couldn't get the symbol addr\n"); |
||||
return 1; |
||||
} |
||||
if (cudaMemcpy(hg, dg, N * sizeof(int), cudaMemcpyDeviceToHost)) { |
||||
printf("couldn't memcpy\n"); |
||||
return 1; |
||||
} |
||||
|
||||
for (i = 0; i < N; i++) { |
||||
sum += hg[i]; |
||||
} |
||||
if (sum == 36) { |
||||
printf("PASSED\n"); |
||||
} else { |
||||
printf("FAILED (%d)\n", sum); |
||||
} |
||||
|
||||
return 0; |
||||
} |
@ -0,0 +1,19 @@ |
||||
# example here is inspired by Nvidia's blog post: |
||||
# https://developer.nvidia.com/blog/separate-compilation-linking-cuda-device-code/ |
||||
# code: |
||||
# https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#examples |
||||
|
||||
project('device linking', ['cpp', 'cuda'], version : '1.0.0') |
||||
|
||||
nvcc = meson.get_compiler('cuda') |
||||
cuda = import('unstable-cuda') |
||||
|
||||
arch_flags = cuda.nvcc_arch_flags(nvcc.version(), 'Auto', detected : ['8.0']) |
||||
|
||||
message('NVCC version: ' + nvcc.version()) |
||||
message('NVCC flags: ' + ' '.join(arch_flags)) |
||||
|
||||
# test device linking with -dc (which is equivalent to `--relocatable-device-code true`) |
||||
lib = static_library('devicefuncs', ['b.cu'], cuda_args : ['-dc'] + arch_flags) |
||||
exe = executable('app', 'main.cu', cuda_args : ['-dc'] + arch_flags, link_with : lib, link_args : arch_flags) |
||||
test('cudatest', exe) |
Loading…
Reference in new issue