From a72da12c801bc0066e80a5fc2436f79903834db0 Mon Sep 17 00:00:00 2001 From: Alexey Spizhevoy Date: Tue, 13 Sep 2011 14:15:18 +0000 Subject: [PATCH] Added fix for 0 found features in stitching matcher --- modules/gpu/src/cuda/element_operations.cu | 95 ++++++++++++++++++++++ modules/gpu/src/element_operations.cpp | 55 ++++++++++++- modules/stitching/src/matchers.cpp | 3 +- 3 files changed, 149 insertions(+), 4 deletions(-) diff --git a/modules/gpu/src/cuda/element_operations.cu b/modules/gpu/src/cuda/element_operations.cu index 9f47dabdaa..71574b7346 100644 --- a/modules/gpu/src/cuda/element_operations.cu +++ b/modules/gpu/src/cuda/element_operations.cu @@ -604,6 +604,78 @@ namespace cv { namespace gpu { namespace device template void pow_caller(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream); + ////////////////////////////////////////////////////////////////////////// + // divide + + struct divide_8uc4_32f : binary_function + { + __device__ __forceinline__ uchar4 operator ()(uchar4 a, float b) const + { + return make_uchar4(saturate_cast(a.x / b), saturate_cast(a.y / b), + saturate_cast(a.z / b), saturate_cast(a.w / b)); + } + }; + + template <> struct TransformFunctorTraits : DefaultTransformFunctorTraits + { + enum { smart_block_dim_x = 8 }; + enum { smart_block_dim_y = 8 }; + enum { smart_shift = 8 }; + }; + + void divide_gpu(const DevMem2D_& src1, const DevMem2Df& src2, const DevMem2D_& dst, cudaStream_t stream) + { + transform(static_cast< DevMem2D_ >(src1), src2, static_cast< DevMem2D_ >(dst), divide_8uc4_32f(), stream); + } + + + struct divide_16sc4_32f : binary_function + { + __device__ __forceinline__ short4 operator ()(short4 a, float b) const + { + return make_short4(saturate_cast(a.x / b), saturate_cast(a.y / b), + saturate_cast(a.z / b), saturate_cast(a.w / b)); + } + }; + + template <> struct TransformFunctorTraits : DefaultTransformFunctorTraits + { + enum { smart_block_dim_x = 8 }; + enum { smart_block_dim_y = 8 }; + enum { smart_shift = 8 }; + }; + + void divide_gpu(const DevMem2D_& src1, const DevMem2Df& src2, const DevMem2D_& dst, cudaStream_t stream) + { + transform(static_cast< DevMem2D_ >(src1), src2, static_cast< DevMem2D_ >(dst), divide_16sc4_32f(), stream); + } + + + ////////////////////////////////////////////////////////////////////////// + // multiply + + struct add_16sc4 : binary_function + { + __device__ __forceinline__ short4 operator ()(short4 a, short4 b) const + { + return make_short4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); + } + }; + + template <> struct TransformFunctorTraits : DefaultTransformFunctorTraits + { + enum { smart_block_dim_x = 8 }; + enum { smart_block_dim_y = 8 }; + enum { smart_shift = 8 }; + }; + + void add_gpu(const DevMem2D_& src1, const DevMem2D_& src2, const DevMem2D_& dst, cudaStream_t stream) + { + transform(static_cast< DevMem2D_ >(src1), static_cast< DevMem2D_ >(src2), + static_cast< DevMem2D_ >(dst), add_16sc4(), stream); + } + + ////////////////////////////////////////////////////////////////////////// // multiply @@ -634,6 +706,29 @@ namespace cv { namespace gpu { namespace device transform(static_cast< DevMem2D_ >(src1), src2, static_cast< DevMem2D_ >(dst), multiply_8uc4_32f(), stream); } + struct multiply_16sc4_32f : binary_function + { + __device__ __forceinline__ short4 operator ()(short4 a, float b) const + { + return make_short4(saturate_cast(a.x * b), saturate_cast(a.y * b), + saturate_cast(a.z * b), saturate_cast(a.w * b)); + } + }; + + template <> struct TransformFunctorTraits : DefaultTransformFunctorTraits + { + enum { smart_block_dim_x = 8 }; + enum { smart_block_dim_y = 8 }; + enum { smart_shift = 8 }; + }; + + void multiply_gpu(const DevMem2D_& src1, const DevMem2Df& src2, const DevMem2D_& dst, cudaStream_t stream) + { + transform(static_cast< DevMem2D_ >(src1), src2, + static_cast< DevMem2D_ >(dst), multiply_16sc4_32f(), stream); + } + + ////////////////////////////////////////////////////////////////////////// // multiply (by scalar) diff --git a/modules/gpu/src/element_operations.cpp b/modules/gpu/src/element_operations.cpp index c6b74257bf..68724fe668 100644 --- a/modules/gpu/src/element_operations.cpp +++ b/modules/gpu/src/element_operations.cpp @@ -174,9 +174,21 @@ namespace }; } +namespace cv { namespace gpu { namespace device +{ + void add_gpu(const DevMem2D_& src1, const DevMem2D_& src2, const DevMem2D_& dst, cudaStream_t stream); +}}} + void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream) { - nppArithmCaller(src1, src2, dst, nppiAdd_8u_C1RSfs, nppiAdd_8u_C4RSfs, nppiAdd_32s_C1R, nppiAdd_32f_C1R, StreamAccessor::getStream(stream)); + if (src1.type() == CV_16SC4 && src2.type() == CV_16SC4) + { + CV_Assert(src1.size() == src2.size()); + dst.create(src1.size(), src1.type()); + device::add_gpu(src1, src2, dst, StreamAccessor::getStream(stream)); + } + else + nppArithmCaller(src1, src2, dst, nppiAdd_8u_C1RSfs, nppiAdd_8u_C4RSfs, nppiAdd_32s_C1R, nppiAdd_32f_C1R, StreamAccessor::getStream(stream)); } namespace cv { namespace gpu { namespace device @@ -200,6 +212,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stre namespace cv { namespace gpu { namespace device { void multiply_gpu(const DevMem2D_& src1, const DevMem2Df& src2, const DevMem2D_& dst, cudaStream_t stream); + void multiply_gpu(const DevMem2D_& src1, const DevMem2Df& src2, const DevMem2D_& dst, cudaStream_t stream); template void multiplyScalar_gpu(const DevMem2D& src, float scale, const DevMem2D& dst, cudaStream_t stream); @@ -213,7 +226,17 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stre dst.create(src1.size(), src1.type()); - device::multiply_gpu(src1, src2, dst, StreamAccessor::getStream(stream)); + device::multiply_gpu(static_cast >(src1), static_cast(src2), + static_cast >(dst), StreamAccessor::getStream(stream)); + } + else if (src1.type() == CV_16SC4 && src2.type() == CV_32FC1) + { + CV_Assert(src1.size() == src2.size()); + + dst.create(src1.size(), src1.type()); + + device::multiply_gpu(static_cast >(src1), static_cast(src2), + static_cast >(dst), StreamAccessor::getStream(stream)); } else nppArithmCaller(src1, src2, dst, nppiMul_8u_C1RSfs, nppiMul_8u_C4RSfs, nppiMul_32s_C1R, nppiMul_32f_C1R, StreamAccessor::getStream(stream)); @@ -249,9 +272,35 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& } +namespace cv { namespace gpu { namespace device +{ + void divide_gpu(const DevMem2D_& src1, const DevMem2Df& src2, const DevMem2D_& dst, cudaStream_t stream); + void divide_gpu(const DevMem2D_& src1, const DevMem2Df& src2, const DevMem2D_& dst, cudaStream_t stream); +}}} + + void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream) { - nppArithmCaller(src2, src1, dst, nppiDiv_8u_C1RSfs, nppiDiv_8u_C4RSfs, nppiDiv_32s_C1R, nppiDiv_32f_C1R, StreamAccessor::getStream(stream)); + if (src1.type() == CV_8UC4 && src2.type() == CV_32FC1) + { + CV_Assert(src1.size() == src2.size()); + + dst.create(src1.size(), src1.type()); + + device::divide_gpu(static_cast >(src1), static_cast(src2), + static_cast >(dst), StreamAccessor::getStream(stream)); + } + else if (src1.type() == CV_16SC4 && src2.type() == CV_32FC1) + { + CV_Assert(src1.size() == src2.size()); + + dst.create(src1.size(), src1.type()); + + device::divide_gpu(static_cast >(src1), static_cast(src2), + static_cast >(dst), StreamAccessor::getStream(stream)); + } + else + nppArithmCaller(src2, src1, dst, nppiDiv_8u_C1RSfs, nppiDiv_8u_C4RSfs, nppiDiv_32s_C1R, nppiDiv_32f_C1R, StreamAccessor::getStream(stream)); } void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream) diff --git a/modules/stitching/src/matchers.cpp b/modules/stitching/src/matchers.cpp index af60b8d68f..187423fea6 100644 --- a/modules/stitching/src/matchers.cpp +++ b/modules/stitching/src/matchers.cpp @@ -347,7 +347,8 @@ void FeaturesMatcher::operator ()(const vector &features, vector< vector > near_pairs; for (int i = 0; i < num_images - 1; ++i) for (int j = i + 1; j < num_images; ++j) - near_pairs.push_back(make_pair(i, j)); + if (features[i].keypoints.size() > 0 && features[j].keypoints.size() > 0) + near_pairs.push_back(make_pair(i, j)); pairwise_matches.resize(num_images * num_images); MatchPairsBody body(*this, features, pairwise_matches, near_pairs);