From 48dec9c03afc4c9ab26b536e713d072f4840c007 Mon Sep 17 00:00:00 2001 From: Alexey Spizhevoy Date: Sat, 24 Sep 2011 05:58:29 +0000 Subject: [PATCH] Fixed GPU version of multi-band blending in stitching module --- modules/gpu/src/cuda/element_operations.cu | 19 +++--------- modules/gpu/src/element_operations.cpp | 7 +++-- .../opencv2/stitching/detail/blenders.hpp | 2 +- modules/stitching/src/blenders.cpp | 31 +++++++++++++++++-- 4 files changed, 39 insertions(+), 20 deletions(-) diff --git a/modules/gpu/src/cuda/element_operations.cu b/modules/gpu/src/cuda/element_operations.cu index 7548e901fd..70f3bab8e6 100644 --- a/modules/gpu/src/cuda/element_operations.cu +++ b/modules/gpu/src/cuda/element_operations.cu @@ -654,27 +654,18 @@ namespace cv { namespace gpu { namespace device ////////////////////////////////////////////////////////////////////////// // multiply - struct add_16sc4 : binary_function + template <> struct TransformFunctorTraits< plus > : DefaultTransformFunctorTraits< plus > { - __device__ __forceinline__ short4 operator ()(short4 a, short4 b) const - { - return make_short4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); - } - }; - - template <> struct TransformFunctorTraits : DefaultTransformFunctorTraits - { - enum { smart_block_dim_x = 8 }; enum { smart_block_dim_y = 8 }; - enum { smart_shift = 8 }; + enum { smart_shift = 4 }; }; - void add_gpu(const DevMem2D_& src1, const DevMem2D_& src2, const DevMem2D_& dst, cudaStream_t stream) + template void add_gpu(const DevMem2D src1, const DevMem2D src2, DevMem2D dst, cudaStream_t stream) { - transform(static_cast< DevMem2D_ >(src1), static_cast< DevMem2D_ >(src2), - static_cast< DevMem2D_ >(dst), add_16sc4(), stream); + transform((DevMem2D_)src1, (DevMem2D_)src2, (DevMem2D_)dst, plus(), stream); } + template void add_gpu(const DevMem2D src1, const DevMem2D src2, DevMem2D dst, cudaStream_t stream); ////////////////////////////////////////////////////////////////////////// // multiply diff --git a/modules/gpu/src/element_operations.cpp b/modules/gpu/src/element_operations.cpp index 4cf857ed4b..1173803c98 100644 --- a/modules/gpu/src/element_operations.cpp +++ b/modules/gpu/src/element_operations.cpp @@ -177,16 +177,17 @@ namespace namespace cv { namespace gpu { namespace device { - void add_gpu(const DevMem2D_& src1, const DevMem2D_& src2, const DevMem2D_& dst, cudaStream_t stream); + template + void add_gpu(const DevMem2D src1, const DevMem2D src2, DevMem2D dst, cudaStream_t stream); }}} void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream) { - if (src1.type() == CV_16SC4 && src2.type() == CV_16SC4) + if (src1.depth() == CV_16S && src2.depth() == CV_16S) { CV_Assert(src1.size() == src2.size()); dst.create(src1.size(), src1.type()); - device::add_gpu(src1, src2, dst, StreamAccessor::getStream(stream)); + device::add_gpu(src1.reshape(1), src2.reshape(1), dst.reshape(1), StreamAccessor::getStream(stream)); } else nppArithmCaller(src1, src2, dst, nppiAdd_8u_C1RSfs, nppiAdd_8u_C4RSfs, nppiAdd_32s_C1R, nppiAdd_32f_C1R, StreamAccessor::getStream(stream)); diff --git a/modules/stitching/include/opencv2/stitching/detail/blenders.hpp b/modules/stitching/include/opencv2/stitching/detail/blenders.hpp index e14744787e..b291f93fd0 100644 --- a/modules/stitching/include/opencv2/stitching/detail/blenders.hpp +++ b/modules/stitching/include/opencv2/stitching/detail/blenders.hpp @@ -114,11 +114,11 @@ void CV_EXPORTS normalizeUsingWeightMap(const Mat& weight, Mat& src); void CV_EXPORTS createWeightMap(const Mat& mask, float sharpness, Mat& weight); void CV_EXPORTS createLaplacePyr(const Mat &img, int num_levels, std::vector& pyr); - void CV_EXPORTS createLaplacePyrGpu(const Mat &img, int num_levels, std::vector& pyr); // Restores source image void CV_EXPORTS restoreImageFromLaplacePyr(std::vector& pyr); +void CV_EXPORTS restoreImageFromLaplacePyrGpu(std::vector& pyr); } // namespace detail } // namespace cv diff --git a/modules/stitching/src/blenders.cpp b/modules/stitching/src/blenders.cpp index ccf2d02f41..957fe8d3d6 100644 --- a/modules/stitching/src/blenders.cpp +++ b/modules/stitching/src/blenders.cpp @@ -289,7 +289,10 @@ void MultiBandBlender::blend(Mat &dst, Mat &dst_mask) for (int i = 0; i <= num_bands_; ++i) normalizeUsingWeightMap(dst_band_weights_[i], dst_pyr_laplace_[i]); - restoreImageFromLaplacePyr(dst_pyr_laplace_); + if (can_use_gpu_) + restoreImageFromLaplacePyrGpu(dst_pyr_laplace_); + else + restoreImageFromLaplacePyr(dst_pyr_laplace_); dst_ = dst_pyr_laplace_[0]; dst_ = dst_(Range(0, dst_roi_final_.height), Range(0, dst_roi_final_.width)); @@ -346,6 +349,7 @@ void createLaplacePyr(const Mat &img, int num_levels, vector &pyr) } } + void createLaplacePyrGpu(const Mat &img, int num_levels, vector &pyr) { #ifndef ANDROID @@ -368,9 +372,10 @@ void createLaplacePyrGpu(const Mat &img, int num_levels, vector &pyr) #endif } + void restoreImageFromLaplacePyr(vector &pyr) { - if (pyr.size() == 0) + if (pyr.empty()) return; Mat tmp; for (size_t i = pyr.size() - 1; i > 0; --i) @@ -380,5 +385,27 @@ void restoreImageFromLaplacePyr(vector &pyr) } } + +void restoreImageFromLaplacePyrGpu(vector &pyr) +{ +#ifndef ANDROID + if (pyr.empty()) + return; + + vector gpu_pyr(pyr.size()); + for (size_t i = 0; i < pyr.size(); ++i) + gpu_pyr[i] = pyr[i]; + + gpu::GpuMat tmp; + for (size_t i = pyr.size() - 1; i > 0; --i) + { + gpu::pyrUp(gpu_pyr[i], tmp); + gpu::add(tmp, gpu_pyr[i - 1], gpu_pyr[i - 1]); + } + + pyr[0] = gpu_pyr[0]; +#endif +} + } // namespace detail } // namespace cv