diff --git a/modules/gpufilters/include/opencv2/gpufilters.hpp b/modules/gpufilters/include/opencv2/gpufilters.hpp index c5a61a06e9..76b5b731d2 100644 --- a/modules/gpufilters/include/opencv2/gpufilters.hpp +++ b/modules/gpufilters/include/opencv2/gpufilters.hpp @@ -254,95 +254,16 @@ CV_EXPORTS Ptr createBoxMinFilter(int srcType, Size ksize, Point anchor = Point(-1, -1), int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)); +//////////////////////////////////////////////////////////////////////////////////////////////////// +// 1D Sum Filter - - - - - -/*! -The Base Class for 1D or Row-wise Filters - -This is the base class for linear or non-linear filters that process 1D data. -In particular, such filters are used for the "horizontal" filtering parts in separable filters. -*/ -class CV_EXPORTS BaseRowFilter_GPU -{ -public: - BaseRowFilter_GPU(int ksize_, int anchor_) : ksize(ksize_), anchor(anchor_) {} - virtual ~BaseRowFilter_GPU() {} - virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) = 0; - int ksize, anchor; -}; - -/*! -The Base Class for Column-wise Filters - -This is the base class for linear or non-linear filters that process columns of 2D arrays. -Such filters are used for the "vertical" filtering parts in separable filters. -*/ -class CV_EXPORTS BaseColumnFilter_GPU -{ -public: - BaseColumnFilter_GPU(int ksize_, int anchor_) : ksize(ksize_), anchor(anchor_) {} - virtual ~BaseColumnFilter_GPU() {} - virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) = 0; - int ksize, anchor; -}; - -/*! -The Base Class for Non-Separable 2D Filters. - -This is the base class for linear or non-linear 2D filters. -*/ -class CV_EXPORTS BaseFilter_GPU -{ -public: - BaseFilter_GPU(const Size& ksize_, const Point& anchor_) : ksize(ksize_), anchor(anchor_) {} - virtual ~BaseFilter_GPU() {} - virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) = 0; - Size ksize; - Point anchor; -}; - -/*! -The Base Class for Filter Engine. - -The class can be used to apply an arbitrary filtering operation to an image. -It contains all the necessary intermediate buffers. -*/ -class CV_EXPORTS FilterEngine_GPU -{ -public: - virtual ~FilterEngine_GPU() {} - - virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1), Stream& stream = Stream::Null()) = 0; -}; - - - -//! returns horizontal 1D box filter +//! creates a horizontal 1D box filter //! supports only CV_8UC1 source type and CV_32FC1 sum type -CV_EXPORTS Ptr getRowSumFilter_GPU(int srcType, int sumType, int ksize, int anchor = -1); +CV_EXPORTS Ptr createRowSumFilter(int srcType, int dstType, int ksize, int anchor = -1, int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)); -//! returns vertical 1D box filter +//! creates a vertical 1D box filter //! supports only CV_8UC1 sum type and CV_32FC1 dst type -CV_EXPORTS Ptr getColumnSumFilter_GPU(int sumType, int dstType, int ksize, int anchor = -1); - - - - - - - - - - - - - - - +CV_EXPORTS Ptr createColumnSumFilter(int srcType, int dstType, int ksize, int anchor = -1, int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)); }} // namespace cv { namespace gpu { diff --git a/modules/gpufilters/src/filtering.cpp b/modules/gpufilters/src/filtering.cpp index a8ec83a74c..7f02bdac59 100644 --- a/modules/gpufilters/src/filtering.cpp +++ b/modules/gpufilters/src/filtering.cpp @@ -66,14 +66,8 @@ Ptr cv::gpu::createMorphologyFilter(int, int, InputArray, Point, int) { Ptr cv::gpu::createBoxMaxFilter(int, Size, Point, int, Scalar) { throw_no_cuda(); return Ptr(); } Ptr cv::gpu::createBoxMinFilter(int, Size, Point, int, Scalar) { throw_no_cuda(); return Ptr(); } - - - - -Ptr cv::gpu::getRowSumFilter_GPU(int, int, int, int) { throw_no_cuda(); return Ptr(0); } -Ptr cv::gpu::getColumnSumFilter_GPU(int, int, int, int) { throw_no_cuda(); return Ptr(0); } - - +Ptr cv::gpu::createRowSumFilter(int, int, int, int, int, Scalar) { throw_no_cuda(); return Ptr(); } +Ptr cv::gpu::createColumnSumFilter(int, int, int, int, int, Scalar) { throw_no_cuda(); return Ptr(); } #else @@ -876,145 +870,129 @@ Ptr cv::gpu::createBoxMinFilter(int srcType, Size ksize, Point anchor, i return new NPPRankFilter(RANK_MIN, srcType, ksize, anchor, borderMode, borderVal); } +//////////////////////////////////////////////////////////////////////////////////////////////////// +// 1D Sum Filter +namespace +{ + class NppRowSumFilter : public Filter + { + public: + NppRowSumFilter(int srcType, int dstType, int ksize, int anchor, int borderMode, Scalar borderVal); + void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); + private: + int srcType_, dstType_; + int ksize_; + int anchor_; + int borderMode_; + Scalar borderVal_; + GpuMat srcBorder_; + }; + NppRowSumFilter::NppRowSumFilter(int srcType, int dstType, int ksize, int anchor, int borderMode, Scalar borderVal) : + srcType_(srcType), dstType_(dstType), ksize_(ksize), anchor_(anchor), borderMode_(borderMode), borderVal_(borderVal) + { + CV_Assert( srcType_ == CV_8UC1 ); + CV_Assert( dstType_ == CV_32FC1 ); + normalizeAnchor(anchor_, ksize_); + } + void NppRowSumFilter::apply(InputArray _src, OutputArray _dst, Stream& _stream) + { + GpuMat src = _src.getGpuMat(); + CV_Assert( src.type() == srcType_ ); + gpu::copyMakeBorder(src, srcBorder_, 0, 0, ksize_, ksize_, borderMode_, borderVal_, _stream); + _dst.create(src.size(), dstType_); + GpuMat dst = _dst.getGpuMat(); + GpuMat srcRoi = srcBorder_(Rect(ksize_, 0, src.cols, src.rows)); + cudaStream_t stream = StreamAccessor::getStream(_stream); + NppStreamHandler h(stream); + NppiSize oSizeROI; + oSizeROI.width = src.cols; + oSizeROI.height = src.rows; + nppSafeCall( nppiSumWindowRow_8u32f_C1R(srcRoi.ptr(), static_cast(srcRoi.step), + dst.ptr(), static_cast(dst.step), + oSizeROI, ksize_, anchor_) ); - - - - - - - - - - - - - - - - - - - - - - -namespace -{ - inline void normalizeROI(Rect& roi, const Size& ksize, const Point& anchor, const Size& src_size) - { - if (roi == Rect(0,0,-1,-1)) - roi = Rect(anchor.x, anchor.y, src_size.width - ksize.width, src_size.height - ksize.height); - - CV_Assert(roi.x >= 0 && roi.y >= 0 && roi.width <= src_size.width && roi.height <= src_size.height); - } - - inline void normalizeKernel(const Mat& kernel, GpuMat& gpu_krnl, int type = CV_8U, int* nDivisor = 0, bool reverse = false) - { - int scale = nDivisor && (kernel.depth() == CV_32F || kernel.depth() == CV_64F) ? 256 : 1; - if (nDivisor) *nDivisor = scale; - - Mat temp(kernel.size(), type); - kernel.convertTo(temp, type, scale); - Mat cont_krnl = temp.reshape(1, 1); - - if (reverse) - { - int count = cont_krnl.cols >> 1; - for (int i = 0; i < count; ++i) - { - std::swap(cont_krnl.at(0, i), cont_krnl.at(0, cont_krnl.cols - 1 - i)); - } - } - - gpu_krnl.upload(cont_krnl); + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); } } -//////////////////////////////////////////////////////////////////////////////////////////////////// -// 1D Sum Filter +Ptr cv::gpu::createRowSumFilter(int srcType, int dstType, int ksize, int anchor, int borderMode, Scalar borderVal) +{ + return new NppRowSumFilter(srcType, dstType, ksize, anchor, borderMode, borderVal); +} namespace { - struct NppRowSumFilter : public BaseRowFilter_GPU + class NppColumnSumFilter : public Filter { - NppRowSumFilter(int ksize_, int anchor_) : BaseRowFilter_GPU(ksize_, anchor_) {} - - virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null()) - { - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - - cudaStream_t stream = StreamAccessor::getStream(s); + public: + NppColumnSumFilter(int srcType, int dstType, int ksize, int anchor, int borderMode, Scalar borderVal); - NppStreamHandler h(stream); + void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); - nppSafeCall( nppiSumWindowRow_8u32f_C1R(src.ptr(), static_cast(src.step), - dst.ptr(), static_cast(dst.step), sz, ksize, anchor) ); + private: + int srcType_, dstType_; + int ksize_; + int anchor_; + int borderMode_; + Scalar borderVal_; - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } + GpuMat srcBorder_; }; -} - -Ptr cv::gpu::getRowSumFilter_GPU(int srcType, int sumType, int ksize, int anchor) -{ - CV_Assert(srcType == CV_8UC1 && sumType == CV_32FC1); - normalizeAnchor(anchor, ksize); + NppColumnSumFilter::NppColumnSumFilter(int srcType, int dstType, int ksize, int anchor, int borderMode, Scalar borderVal) : + srcType_(srcType), dstType_(dstType), ksize_(ksize), anchor_(anchor), borderMode_(borderMode), borderVal_(borderVal) + { + CV_Assert( srcType_ == CV_8UC1 ); + CV_Assert( dstType_ == CV_32FC1 ); - return Ptr(new NppRowSumFilter(ksize, anchor)); -} + normalizeAnchor(anchor_, ksize_); + } -namespace -{ - struct NppColumnSumFilter : public BaseColumnFilter_GPU + void NppColumnSumFilter::apply(InputArray _src, OutputArray _dst, Stream& _stream) { - NppColumnSumFilter(int ksize_, int anchor_) : BaseColumnFilter_GPU(ksize_, anchor_) {} - - virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null()) - { - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; + GpuMat src = _src.getGpuMat(); + CV_Assert( src.type() == srcType_ ); - cudaStream_t stream = StreamAccessor::getStream(s); + gpu::copyMakeBorder(src, srcBorder_, ksize_, ksize_, 0, 0, borderMode_, borderVal_, _stream); - NppStreamHandler h(stream); + _dst.create(src.size(), dstType_); + GpuMat dst = _dst.getGpuMat(); - nppSafeCall( nppiSumWindowColumn_8u32f_C1R(src.ptr(), static_cast(src.step), - dst.ptr(), static_cast(dst.step), sz, ksize, anchor) ); + GpuMat srcRoi = srcBorder_(Rect(0, ksize_, src.cols, src.rows)); - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - }; -} + cudaStream_t stream = StreamAccessor::getStream(_stream); + NppStreamHandler h(stream); -Ptr cv::gpu::getColumnSumFilter_GPU(int sumType, int dstType, int ksize, int anchor) -{ - CV_Assert(sumType == CV_8UC1 && dstType == CV_32FC1); + NppiSize oSizeROI; + oSizeROI.width = src.cols; + oSizeROI.height = src.rows; - normalizeAnchor(anchor, ksize); + nppSafeCall( nppiSumWindowColumn_8u32f_C1R(srcRoi.ptr(), static_cast(srcRoi.step), + dst.ptr(), static_cast(dst.step), + oSizeROI, ksize_, anchor_) ); - return Ptr(new NppColumnSumFilter(ksize, anchor)); + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } } - +Ptr cv::gpu::createColumnSumFilter(int srcType, int dstType, int ksize, int anchor, int borderMode, Scalar borderVal) +{ + return new NppColumnSumFilter(srcType, dstType, ksize, anchor, borderMode, borderVal); +} #endif