|
|
|
@ -54,6 +54,14 @@ namespace cv { namespace gpu { namespace device |
|
|
|
|
return T(); |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
__device__ __forceinline__ unsigned int shfl(unsigned int val, int srcLane, int width = warpSize) |
|
|
|
|
{ |
|
|
|
|
#if __CUDA_ARCH__ >= 300 |
|
|
|
|
return (unsigned int) __shfl((int) val, srcLane, width); |
|
|
|
|
#else |
|
|
|
|
return 0; |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
__device__ __forceinline__ double shfl(double val, int srcLane, int width = warpSize) |
|
|
|
|
{ |
|
|
|
|
#if __CUDA_ARCH__ >= 300 |
|
|
|
@ -78,6 +86,14 @@ namespace cv { namespace gpu { namespace device |
|
|
|
|
return T(); |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
__device__ __forceinline__ unsigned int shfl_down(unsigned int val, unsigned int delta, int width = warpSize) |
|
|
|
|
{ |
|
|
|
|
#if __CUDA_ARCH__ >= 300 |
|
|
|
|
return (unsigned int) __shfl_down((int) val, delta, width); |
|
|
|
|
#else |
|
|
|
|
return 0; |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
__device__ __forceinline__ double shfl_down(double val, unsigned int delta, int width = warpSize) |
|
|
|
|
{ |
|
|
|
|
#if __CUDA_ARCH__ >= 300 |
|
|
|
@ -92,6 +108,38 @@ namespace cv { namespace gpu { namespace device |
|
|
|
|
return 0.0; |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
template <typename T> |
|
|
|
|
__device__ __forceinline__ T shfl_up(T val, unsigned int delta, int width = warpSize) |
|
|
|
|
{ |
|
|
|
|
#if __CUDA_ARCH__ >= 300 |
|
|
|
|
return __shfl_up(val, delta, width); |
|
|
|
|
#else |
|
|
|
|
return T(); |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
__device__ __forceinline__ unsigned int shfl_up(unsigned int val, unsigned int delta, int width = warpSize) |
|
|
|
|
{ |
|
|
|
|
#if __CUDA_ARCH__ >= 300 |
|
|
|
|
return (unsigned int) __shfl_up((int) val, delta, width); |
|
|
|
|
#else |
|
|
|
|
return 0; |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
__device__ __forceinline__ double shfl_up(double val, unsigned int delta, int width = warpSize) |
|
|
|
|
{ |
|
|
|
|
#if __CUDA_ARCH__ >= 300 |
|
|
|
|
int lo = __double2loint(val); |
|
|
|
|
int hi = __double2hiint(val); |
|
|
|
|
|
|
|
|
|
lo = __shfl_up(lo, delta, width); |
|
|
|
|
hi = __shfl_up(hi, delta, width); |
|
|
|
|
|
|
|
|
|
return __hiloint2double(hi, lo); |
|
|
|
|
#else |
|
|
|
|
return 0.0; |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
}}} |
|
|
|
|
|
|
|
|
|
#endif // __OPENCV_GPU_WARP_SHUFFLE_HPP__
|
|
|
|
|