diff --git a/modules/gpu/src/cuda/bf_radius_match.cu b/modules/gpu/src/cuda/bf_radius_match.cu index 0aa71913b2..c6cba928c1 100644 --- a/modules/gpu/src/cuda/bf_radius_match.cu +++ b/modules/gpu/src/cuda/bf_radius_match.cu @@ -56,7 +56,7 @@ namespace cv { namespace gpu { namespace device __global__ void matchUnrolled(const DevMem2D_ query, int imgIdx, const DevMem2D_ train, float maxDistance, const Mask mask, PtrStepi bestTrainIdx, PtrStepi bestImgIdx, PtrStepf bestDistance, unsigned int* nMatches, int maxCount) { - #if __CUDA_ARCH__ >= 110 + #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110) extern __shared__ int smem[]; @@ -168,7 +168,7 @@ namespace cv { namespace gpu { namespace device __global__ void match(const DevMem2D_ query, int imgIdx, const DevMem2D_ train, float maxDistance, const Mask mask, PtrStepi bestTrainIdx, PtrStepi bestImgIdx, PtrStepf bestDistance, unsigned int* nMatches, int maxCount) { - #if __CUDA_ARCH__ >= 110 + #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110) extern __shared__ int smem[]; diff --git a/modules/gpu/src/cuda/canny.cu b/modules/gpu/src/cuda/canny.cu index f9dd490bf6..95028c2eaf 100644 --- a/modules/gpu/src/cuda/canny.cu +++ b/modules/gpu/src/cuda/canny.cu @@ -261,7 +261,7 @@ namespace cv { namespace gpu { namespace device __global__ void edgesHysteresisLocal(PtrStepi map, ushort2* st, int rows, int cols) { - #if __CUDA_ARCH__ >= 120 + #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 120) __shared__ int smem[18][18]; @@ -358,7 +358,7 @@ namespace cv { namespace gpu { namespace device __global__ void edgesHysteresisGlobal(PtrStepi map, ushort2* st1, ushort2* st2, int rows, int cols, int count) { - #if __CUDA_ARCH__ >= 120 + #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 120 const int stack_size = 512; diff --git a/modules/gpu/src/cuda/column_filter.cu b/modules/gpu/src/cuda/column_filter.cu index 624bd3fbc0..fb52d6e989 100644 --- a/modules/gpu/src/cuda/column_filter.cu +++ b/modules/gpu/src/cuda/column_filter.cu @@ -64,7 +64,7 @@ namespace cv { namespace gpu { namespace device template __global__ void linearColumnFilter(const DevMem2D_ src, PtrStep dst, const int anchor, const B brd) { - #if __CUDA_ARCH__ >= 200 + #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200) const int BLOCK_DIM_X = 16; const int BLOCK_DIM_Y = 16; const int PATCH_PER_BLOCK = 4; diff --git a/modules/gpu/src/cuda/fast.cu b/modules/gpu/src/cuda/fast.cu index 8f904cd985..a511e8f35f 100644 --- a/modules/gpu/src/cuda/fast.cu +++ b/modules/gpu/src/cuda/fast.cu @@ -223,7 +223,7 @@ namespace cv { namespace gpu { namespace device template __global__ void calcKeypoints(const DevMem2Db img, const Mask mask, short2* kpLoc, const unsigned int maxKeypoints, PtrStepi score, const int threshold) { - #if __CUDA_ARCH__ >= 110 + #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110) const int j = threadIdx.x + blockIdx.x * blockDim.x + 3; const int i = threadIdx.y + blockIdx.y * blockDim.y + 3; @@ -325,7 +325,7 @@ namespace cv { namespace gpu { namespace device __global__ void nonmaxSupression(const short2* kpLoc, int count, const DevMem2Di scoreMat, short2* locFinal, float* responseFinal) { - #if __CUDA_ARCH__ >= 110 + #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110) const int kpIdx = threadIdx.x + blockIdx.x * blockDim.x; diff --git a/modules/gpu/src/cuda/hist.cu b/modules/gpu/src/cuda/hist.cu index a7f45719ea..0a08f82f41 100644 --- a/modules/gpu/src/cuda/hist.cu +++ b/modules/gpu/src/cuda/hist.cu @@ -63,7 +63,7 @@ namespace cv { namespace gpu { namespace device #define MERGE_THREADBLOCK_SIZE 256 - #define USE_SMEM_ATOMICS (__CUDA_ARCH__ >= 120) + #define USE_SMEM_ATOMICS (defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 120)) namespace hist { diff --git a/modules/gpu/src/cuda/integral_image.cu b/modules/gpu/src/cuda/integral_image.cu index ead0ddefec..5cb777dd92 100644 --- a/modules/gpu/src/cuda/integral_image.cu +++ b/modules/gpu/src/cuda/integral_image.cu @@ -59,7 +59,7 @@ namespace cv { namespace gpu { namespace device __global__ void shfl_integral_horizontal(const PtrStep_ img, PtrStep_ integral) { - #if __CUDA_ARCH__ >= 300 + #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 300) __shared__ int sums[128]; const int id = threadIdx.x; @@ -299,7 +299,7 @@ namespace cv { namespace gpu { namespace device // block sums. __global__ void shfl_integral_vertical(DevMem2D_ integral) { - #if __CUDA_ARCH__ >= 300 + #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 300) __shared__ unsigned int sums[32][9]; const int tidx = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/modules/gpu/src/cuda/matrix_reductions.cu b/modules/gpu/src/cuda/matrix_reductions.cu index a0be65c3cd..d9b1b5cba4 100644 --- a/modules/gpu/src/cuda/matrix_reductions.cu +++ b/modules/gpu/src/cuda/matrix_reductions.cu @@ -215,7 +215,7 @@ namespace cv { namespace gpu { namespace device maxval[blockIdx.y * gridDim.x + blockIdx.x] = (T)smaxval[0]; } - #if __CUDA_ARCH__ >= 110 + #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110) __shared__ bool is_last; if (tid == 0) @@ -535,7 +535,7 @@ namespace cv { namespace gpu { namespace device findMinMaxLocInSmem(sminval, smaxval, sminloc, smaxloc, tid); - #if __CUDA_ARCH__ >= 110 + #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110) __shared__ bool is_last; if (tid == 0) @@ -841,7 +841,7 @@ namespace cv { namespace gpu { namespace device sumInSmem(scount, tid); - #if __CUDA_ARCH__ >= 110 + #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110) __shared__ bool is_last; if (tid == 0) @@ -1034,7 +1034,7 @@ namespace cv { namespace gpu { namespace device sumInSmem(smem, tid); - #if __CUDA_ARCH__ >= 110 + #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110) __shared__ bool is_last; if (tid == 0) @@ -1115,7 +1115,7 @@ namespace cv { namespace gpu { namespace device sumInSmem(smem, tid); sumInSmem(smem + nthreads, tid); - #if __CUDA_ARCH__ >= 110 + #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110) __shared__ bool is_last; if (tid == 0) @@ -1222,7 +1222,7 @@ namespace cv { namespace gpu { namespace device sumInSmem(smem + nthreads, tid); sumInSmem(smem + 2 * nthreads, tid); - #if __CUDA_ARCH__ >= 110 + #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 110 __shared__ bool is_last; if (tid == 0) @@ -1339,7 +1339,7 @@ namespace cv { namespace gpu { namespace device sumInSmem(smem + 2 * nthreads, tid); sumInSmem(smem + 3 * nthreads, tid); - #if __CUDA_ARCH__ >= 110 + #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110) __shared__ bool is_last; if (tid == 0) @@ -1975,7 +1975,7 @@ namespace cv { namespace gpu { namespace device for (int c = 0; c < cn; ++c) myVal[c] = op.startValue(); - #if __CUDA_ARCH__ >= 200 + #if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 200 // For cc >= 2.0 prefer L1 cache for (int x = threadIdx.x; x < src.cols; x += 256) diff --git a/modules/gpu/src/cuda/pyrlk.cu b/modules/gpu/src/cuda/pyrlk.cu index b06d607687..271660515b 100644 --- a/modules/gpu/src/cuda/pyrlk.cu +++ b/modules/gpu/src/cuda/pyrlk.cu @@ -82,7 +82,7 @@ namespace cv { namespace gpu { namespace device smem3[tid] = val3; __syncthreads(); -#if __CUDA_ARCH__ > 110 +#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ > 110) if (tid < 128) { smem1[tid] = val1 += smem1[tid + 128]; @@ -138,7 +138,7 @@ namespace cv { namespace gpu { namespace device smem2[tid] = val2; __syncthreads(); -#if __CUDA_ARCH__ > 110 +#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ > 110) if (tid < 128) { smem1[tid] = val1 += smem1[tid + 128]; @@ -184,7 +184,7 @@ namespace cv { namespace gpu { namespace device smem1[tid] = val1; __syncthreads(); -#if __CUDA_ARCH__ > 110 +#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ > 110) if (tid < 128) { smem1[tid] = val1 += smem1[tid + 128]; @@ -271,7 +271,7 @@ namespace cv { namespace gpu { namespace device template __global__ void lkSparse(const float2* prevPts, float2* nextPts, uchar* status, float* err, const int level, const int rows, const int cols) { -#if __CUDA_ARCH__ <= 110 +#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ <= 110) __shared__ float smem1[128]; __shared__ float smem2[128]; __shared__ float smem3[128]; diff --git a/modules/gpu/src/cuda/row_filter.cu b/modules/gpu/src/cuda/row_filter.cu index 8963212a23..d40af71496 100644 --- a/modules/gpu/src/cuda/row_filter.cu +++ b/modules/gpu/src/cuda/row_filter.cu @@ -64,7 +64,7 @@ namespace cv { namespace gpu { namespace device template __global__ void linearRowFilter(const DevMem2D_ src, PtrStep dst, const int anchor, const B brd) { - #if __CUDA_ARCH__ >= 200 + #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200) const int BLOCK_DIM_X = 32; const int BLOCK_DIM_Y = 8; const int PATCH_PER_BLOCK = 4; diff --git a/modules/gpu/src/nvidia/NPP_staging/NPP_staging.cu b/modules/gpu/src/nvidia/NPP_staging/NPP_staging.cu index f75453930b..0e6aca0889 100644 --- a/modules/gpu/src/nvidia/NPP_staging/NPP_staging.cu +++ b/modules/gpu/src/nvidia/NPP_staging/NPP_staging.cu @@ -2070,7 +2070,7 @@ NCVStatus nppiStInterpolateFrames(const NppStInterpolationState *pState) //============================================================================== -#if __CUDA_ARCH__ < 200 +#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 200) // FP32 atomic add static __forceinline__ __device__ float _atomicAdd(float *addr, float val)