From 20f636fcee646d5e7201be16dea0476e23ba9a6c Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Tue, 17 Sep 2013 17:43:12 +0400 Subject: [PATCH] fixed cudev compilation for old pre-Fermi archs --- .../opencv2/cudev/grid/detail/histogram.hpp | 2 + .../include/opencv2/cudev/grid/histogram.hpp | 4 + .../include/opencv2/cudev/ptr2d/texture.hpp | 132 +++++++++++------- .../include/opencv2/cudev/util/atomic.hpp | 41 ++++++ .../opencv2/cudev/util/saturate_cast.hpp | 8 ++ 5 files changed, 135 insertions(+), 52 deletions(-) diff --git a/modules/cudev/include/opencv2/cudev/grid/detail/histogram.hpp b/modules/cudev/include/opencv2/cudev/grid/detail/histogram.hpp index 4c2675098d..a27955d5de 100644 --- a/modules/cudev/include/opencv2/cudev/grid/detail/histogram.hpp +++ b/modules/cudev/include/opencv2/cudev/grid/detail/histogram.hpp @@ -56,6 +56,7 @@ namespace grid_histogram_detail template __global__ void histogram(const SrcPtr src, ResType* hist, const MaskPtr mask, const int rows, const int cols) { + #if CV_CUDEV_ARCH >= 120 __shared__ ResType smem[BIN_COUNT]; const int y = blockIdx.x * blockDim.y + threadIdx.y; @@ -86,6 +87,7 @@ namespace grid_histogram_detail if (histVal > 0) atomicAdd(hist + i, histVal); } + #endif } template diff --git a/modules/cudev/include/opencv2/cudev/grid/histogram.hpp b/modules/cudev/include/opencv2/cudev/grid/histogram.hpp index b81b57febc..ecb1a19c85 100644 --- a/modules/cudev/include/opencv2/cudev/grid/histogram.hpp +++ b/modules/cudev/include/opencv2/cudev/grid/histogram.hpp @@ -57,6 +57,8 @@ namespace cv { namespace cudev { template __host__ void gridHistogram_(const SrcPtr& src, GpuMat_& dst, const MaskPtr& mask, Stream& stream = Stream::Null()) { + CV_Assert( deviceSupports(SHARED_ATOMICS) ); + const int rows = getRows(src); const int cols = getCols(src); @@ -75,6 +77,8 @@ __host__ void gridHistogram_(const SrcPtr& src, GpuMat_& dst, const Mas template __host__ void gridHistogram_(const SrcPtr& src, GpuMat_& dst, Stream& stream = Stream::Null()) { + CV_Assert( deviceSupports(SHARED_ATOMICS) ); + const int rows = getRows(src); const int cols = getCols(src); diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp index b01a2c7b84..095864fcb0 100644 --- a/modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp +++ b/modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp @@ -52,6 +52,40 @@ #include "gpumat.hpp" #include "traits.hpp" +namespace +{ + template struct CvCudevTextureRef + { + typedef texture TexRef; + + static TexRef ref; + + __host__ static void bind(const cv::cudev::GlobPtrSz& mat, + bool normalizedCoords = false, + cudaTextureFilterMode filterMode = cudaFilterModePoint, + cudaTextureAddressMode addressMode = cudaAddressModeClamp) + { + ref.normalized = normalizedCoords; + ref.filterMode = filterMode; + ref.addressMode[0] = addressMode; + ref.addressMode[1] = addressMode; + ref.addressMode[2] = addressMode; + + cudaChannelFormatDesc desc = cudaCreateChannelDesc(); + + CV_CUDEV_SAFE_CALL( cudaBindTexture2D(0, &ref, mat.data, &desc, mat.cols, mat.rows, mat.step) ); + } + + __host__ static void unbind() + { + CV_CUDEV_SAFE_CALL( cudaUnbindTexture(ref) ); + } + }; + + template + typename CvCudevTextureRef::TexRef CvCudevTextureRef::ref; +} + namespace cv { namespace cudev { template struct TexturePtr @@ -63,79 +97,73 @@ template struct TexturePtr __device__ __forceinline__ T operator ()(float y, float x) const { + #if CV_CUDEV_ARCH < 300 + // Use the texture reference + return tex2D(CvCudevTextureRef::ref, x, y); + #else + // Use the texture object return tex2D(texObj, x, y); + #endif } }; template struct Texture : TexturePtr { int rows, cols; + bool cc30; __host__ explicit Texture(const GlobPtrSz& mat, bool normalizedCoords = false, cudaTextureFilterMode filterMode = cudaFilterModePoint, cudaTextureAddressMode addressMode = cudaAddressModeClamp) { - CV_Assert( deviceSupports(FEATURE_SET_COMPUTE_30) ); - - rows = mat.rows; - cols = mat.cols; - - cudaResourceDesc texRes; - std::memset(&texRes, 0, sizeof(texRes)); - texRes.resType = cudaResourceTypePitch2D; - texRes.res.pitch2D.devPtr = mat.data; - texRes.res.pitch2D.height = mat.rows; - texRes.res.pitch2D.width = mat.cols; - texRes.res.pitch2D.pitchInBytes = mat.step; - texRes.res.pitch2D.desc = cudaCreateChannelDesc(); - - cudaTextureDesc texDescr; - std::memset(&texDescr, 0, sizeof(texDescr)); - texDescr.addressMode[0] = addressMode; - texDescr.addressMode[1] = addressMode; - texDescr.addressMode[2] = addressMode; - texDescr.filterMode = filterMode; - texDescr.readMode = cudaReadModeElementType; - texDescr.normalizedCoords = normalizedCoords; - - CV_CUDEV_SAFE_CALL( cudaCreateTextureObject(&this->texObj, &texRes, &texDescr, 0) ); - } - - __host__ explicit Texture(const GpuMat_& mat, - bool normalizedCoords = false, - cudaTextureFilterMode filterMode = cudaFilterModePoint, - cudaTextureAddressMode addressMode = cudaAddressModeClamp) - { - CV_Assert( deviceSupports(FEATURE_SET_COMPUTE_30) ); + cc30 = deviceSupports(FEATURE_SET_COMPUTE_30); rows = mat.rows; cols = mat.cols; - cudaResourceDesc texRes; - std::memset(&texRes, 0, sizeof(texRes)); - texRes.resType = cudaResourceTypePitch2D; - texRes.res.pitch2D.devPtr = mat.data; - texRes.res.pitch2D.height = mat.rows; - texRes.res.pitch2D.width = mat.cols; - texRes.res.pitch2D.pitchInBytes = mat.step; - texRes.res.pitch2D.desc = cudaCreateChannelDesc(); - - cudaTextureDesc texDescr; - std::memset(&texDescr, 0, sizeof(texDescr)); - texDescr.addressMode[0] = addressMode; - texDescr.addressMode[1] = addressMode; - texDescr.addressMode[2] = addressMode; - texDescr.filterMode = filterMode; - texDescr.readMode = cudaReadModeElementType; - texDescr.normalizedCoords = normalizedCoords; - - CV_CUDEV_SAFE_CALL( cudaCreateTextureObject(&this->texObj, &texRes, &texDescr, 0) ); + if (cc30) + { + // Use the texture object + cudaResourceDesc texRes; + std::memset(&texRes, 0, sizeof(texRes)); + texRes.resType = cudaResourceTypePitch2D; + texRes.res.pitch2D.devPtr = mat.data; + texRes.res.pitch2D.height = mat.rows; + texRes.res.pitch2D.width = mat.cols; + texRes.res.pitch2D.pitchInBytes = mat.step; + texRes.res.pitch2D.desc = cudaCreateChannelDesc(); + + cudaTextureDesc texDescr; + std::memset(&texDescr, 0, sizeof(texDescr)); + texDescr.normalizedCoords = normalizedCoords; + texDescr.filterMode = filterMode; + texDescr.addressMode[0] = addressMode; + texDescr.addressMode[1] = addressMode; + texDescr.addressMode[2] = addressMode; + texDescr.readMode = cudaReadModeElementType; + + CV_CUDEV_SAFE_CALL( cudaCreateTextureObject(&this->texObj, &texRes, &texDescr, 0) ); + } + else + { + // Use the texture reference + CvCudevTextureRef::bind(mat, normalizedCoords, filterMode, addressMode); + } } __host__ ~Texture() { - cudaDestroyTextureObject(this->texObj); + if (cc30) + { + // Use the texture object + cudaDestroyTextureObject(this->texObj); + } + else + { + // Use the texture reference + CvCudevTextureRef::unbind(); + } } }; diff --git a/modules/cudev/include/opencv2/cudev/util/atomic.hpp b/modules/cudev/include/opencv2/cudev/util/atomic.hpp index f650c68ec2..2da110231b 100644 --- a/modules/cudev/include/opencv2/cudev/util/atomic.hpp +++ b/modules/cudev/include/opencv2/cudev/util/atomic.hpp @@ -64,11 +64,23 @@ __device__ __forceinline__ uint atomicAdd(uint* address, uint val) __device__ __forceinline__ float atomicAdd(float* address, float val) { +#if CV_CUDEV_ARCH >= 200 return ::atomicAdd(address, val); +#else + int* address_as_i = (int*) address; + int old = *address_as_i, assumed; + do { + assumed = old; + old = ::atomicCAS(address_as_i, assumed, + __float_as_int(val + __int_as_float(assumed))); + } while (assumed != old); + return __int_as_float(old); +#endif } __device__ static double atomicAdd(double* address, double val) { +#if CV_CUDEV_ARCH >= 130 unsigned long long int* address_as_ull = (unsigned long long int*) address; unsigned long long int old = *address_as_ull, assumed; do { @@ -77,6 +89,11 @@ __device__ static double atomicAdd(double* address, double val) __double_as_longlong(val + __longlong_as_double(assumed))); } while (assumed != old); return __longlong_as_double(old); +#else + (void) address; + (void) val; + return 0.0; +#endif } // atomicMin @@ -93,6 +110,7 @@ __device__ __forceinline__ uint atomicMin(uint* address, uint val) __device__ static float atomicMin(float* address, float val) { +#if CV_CUDEV_ARCH >= 120 int* address_as_i = (int*) address; int old = *address_as_i, assumed; do { @@ -101,10 +119,16 @@ __device__ static float atomicMin(float* address, float val) __float_as_int(::fminf(val, __int_as_float(assumed)))); } while (assumed != old); return __int_as_float(old); +#else + (void) address; + (void) val; + return 0.0f; +#endif } __device__ static double atomicMin(double* address, double val) { +#if CV_CUDEV_ARCH >= 130 unsigned long long int* address_as_ull = (unsigned long long int*) address; unsigned long long int old = *address_as_ull, assumed; do { @@ -113,6 +137,11 @@ __device__ static double atomicMin(double* address, double val) __double_as_longlong(::fmin(val, __longlong_as_double(assumed)))); } while (assumed != old); return __longlong_as_double(old); +#else + (void) address; + (void) val; + return 0.0; +#endif } // atomicMax @@ -129,6 +158,7 @@ __device__ __forceinline__ uint atomicMax(uint* address, uint val) __device__ static float atomicMax(float* address, float val) { +#if CV_CUDEV_ARCH >= 120 int* address_as_i = (int*) address; int old = *address_as_i, assumed; do { @@ -137,10 +167,16 @@ __device__ static float atomicMax(float* address, float val) __float_as_int(::fmaxf(val, __int_as_float(assumed)))); } while (assumed != old); return __int_as_float(old); +#else + (void) address; + (void) val; + return 0.0f; +#endif } __device__ static double atomicMax(double* address, double val) { +#if CV_CUDEV_ARCH >= 130 unsigned long long int* address_as_ull = (unsigned long long int*) address; unsigned long long int old = *address_as_ull, assumed; do { @@ -149,6 +185,11 @@ __device__ static double atomicMax(double* address, double val) __double_as_longlong(::fmax(val, __longlong_as_double(assumed)))); } while (assumed != old); return __longlong_as_double(old); +#else + (void) address; + (void) val; + return 0.0; +#endif } }} diff --git a/modules/cudev/include/opencv2/cudev/util/saturate_cast.hpp b/modules/cudev/include/opencv2/cudev/util/saturate_cast.hpp index faa12e32f2..ff7ce85986 100644 --- a/modules/cudev/include/opencv2/cudev/util/saturate_cast.hpp +++ b/modules/cudev/include/opencv2/cudev/util/saturate_cast.hpp @@ -228,7 +228,11 @@ template <> __device__ __forceinline__ int saturate_cast(float v) } template <> __device__ __forceinline__ int saturate_cast(double v) { +#if CV_CUDEV_ARCH >= 130 return __double2int_rn(v); +#else + return saturate_cast((float) v); +#endif } template <> __device__ __forceinline__ uint saturate_cast(schar v) @@ -256,7 +260,11 @@ template <> __device__ __forceinline__ uint saturate_cast(float v) } template <> __device__ __forceinline__ uint saturate_cast(double v) { +#if CV_CUDEV_ARCH >= 130 return __double2uint_rn(v); +#else + return saturate_cast((float) v); +#endif } }}