From 9c3f790edbeb81520ca9de2104924da1b1e3da6a Mon Sep 17 00:00:00 2001 From: filipsladek Date: Tue, 29 Mar 2016 22:30:49 +0200 Subject: [PATCH 1/4] Box filter implemented for CV_32FC1 --- .../include/opencv2/cudafilters.hpp | 2 +- modules/cudafilters/src/filtering.cpp | 37 ++++++++++++++----- 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/modules/cudafilters/include/opencv2/cudafilters.hpp b/modules/cudafilters/include/opencv2/cudafilters.hpp index 2d52265613..6769f06f1c 100644 --- a/modules/cudafilters/include/opencv2/cudafilters.hpp +++ b/modules/cudafilters/include/opencv2/cudafilters.hpp @@ -89,7 +89,7 @@ public: /** @brief Creates a normalized 2D box filter. -@param srcType Input image type. Only CV_8UC1 and CV_8UC4 are supported for now. +@param srcType Input image type. Only CV_8UC1, CV_8UC4 and CV_32FC1 are supported for now. @param dstType Output image type. Only the same type as src is supported for now. @param ksize Kernel size. @param anchor Anchor point. The default value Point(-1, -1) means that the anchor is at the kernel diff --git a/modules/cudafilters/src/filtering.cpp b/modules/cudafilters/src/filtering.cpp index b90a8c1932..587fbe1b90 100644 --- a/modules/cudafilters/src/filtering.cpp +++ b/modules/cudafilters/src/filtering.cpp @@ -103,13 +103,14 @@ namespace void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); private: - typedef NppStatus (*nppFilterBox_t)(const Npp8u* pSrc, Npp32s nSrcStep, Npp8u* pDst, Npp32s nDstStep, + typedef NppStatus (*nppFilterBox8U_t)(const Npp8u* pSrc, Npp32s nSrcStep, Npp8u* pDst, Npp32s nDstStep, + NppiSize oSizeROI, NppiSize oMaskSize, NppiPoint oAnchor); + typedef NppStatus (*nppFilterBox32F_t)(const Npp32f* pSrc, Npp32s nSrcStep, Npp32f* pDst, Npp32s nDstStep, NppiSize oSizeROI, NppiSize oMaskSize, NppiPoint oAnchor); Size ksize_; Point anchor_; int type_; - nppFilterBox_t func_; int borderMode_; Scalar borderVal_; GpuMat srcBorder_; @@ -118,14 +119,10 @@ namespace NPPBoxFilter::NPPBoxFilter(int srcType, int dstType, Size ksize, Point anchor, int borderMode, Scalar borderVal) : ksize_(ksize), anchor_(anchor), type_(srcType), borderMode_(borderMode), borderVal_(borderVal) { - static const nppFilterBox_t funcs[] = {0, nppiFilterBox_8u_C1R, 0, 0, nppiFilterBox_8u_C4R}; - - CV_Assert( srcType == CV_8UC1 || srcType == CV_8UC4 ); + CV_Assert( srcType == CV_8UC1 || srcType == CV_8UC4 || srcType == CV_32FC1); CV_Assert( dstType == srcType ); normalizeAnchor(anchor_, ksize); - - func_ = funcs[CV_MAT_CN(srcType)]; } void NPPBoxFilter::apply(InputArray _src, OutputArray _dst, Stream& _stream) @@ -155,10 +152,30 @@ namespace oAnchor.x = anchor_.x; oAnchor.y = anchor_.y; - nppSafeCall( func_(srcRoi.ptr(), static_cast(srcRoi.step), - dst.ptr(), static_cast(dst.step), - oSizeROI, oMaskSize, oAnchor) ); + const int depth = CV_MAT_DEPTH(type_); + const int cn = CV_MAT_CN(type_); + switch (depth) + { + case CV_8U: + { + static const nppFilterBox8U_t funcs8U[] = { 0, nppiFilterBox_8u_C1R, 0, 0, nppiFilterBox_8u_C4R }; + const nppFilterBox8U_t func8U = funcs8U[cn]; + nppSafeCall(func8U(srcRoi.ptr(), static_cast(srcRoi.step), + dst.ptr(), static_cast(dst.step), + oSizeROI, oMaskSize, oAnchor)); + } + break; + case CV_32F: + { + static const nppFilterBox32F_t funcs32F[] = { 0, nppiFilterBox_32f_C1R, 0, 0, 0 }; + const nppFilterBox32F_t func32F = funcs32F[cn]; + nppSafeCall(func32F(srcRoi.ptr(), static_cast(srcRoi.step), + dst.ptr(), static_cast(dst.step), + oSizeROI, oMaskSize, oAnchor)); + } + break; + } if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); } From 493ac1df3731aa58e78a26f47d2af8d6dd16b7ee Mon Sep 17 00:00:00 2001 From: filipsladek Date: Tue, 29 Mar 2016 22:44:03 +0200 Subject: [PATCH 2/4] code style --- modules/cudafilters/src/filtering.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/cudafilters/src/filtering.cpp b/modules/cudafilters/src/filtering.cpp index 587fbe1b90..fcf54523e1 100644 --- a/modules/cudafilters/src/filtering.cpp +++ b/modules/cudafilters/src/filtering.cpp @@ -152,11 +152,11 @@ namespace oAnchor.x = anchor_.x; oAnchor.y = anchor_.y; - const int depth = CV_MAT_DEPTH(type_); - const int cn = CV_MAT_CN(type_); + const int depth = CV_MAT_DEPTH(type_); + const int cn = CV_MAT_CN(type_); - switch (depth) - { + switch (depth) + { case CV_8U: { static const nppFilterBox8U_t funcs8U[] = { 0, nppiFilterBox_8u_C1R, 0, 0, nppiFilterBox_8u_C4R }; From f635381662248c2e0305fd5d60103c36ba51e784 Mon Sep 17 00:00:00 2001 From: filipsladek Date: Wed, 30 Mar 2016 10:38:22 +0200 Subject: [PATCH 3/4] add testcase for CV_32FC1 --- modules/cudafilters/test/test_filters.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/cudafilters/test/test_filters.cpp b/modules/cudafilters/test/test_filters.cpp index bac81c30b7..332daf2c0c 100644 --- a/modules/cudafilters/test/test_filters.cpp +++ b/modules/cudafilters/test/test_filters.cpp @@ -113,7 +113,7 @@ CUDA_TEST_P(Blur, Accuracy) INSTANTIATE_TEST_CASE_P(CUDA_Filters, Blur, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, - testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)), + testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1)), testing::Values(KSize(cv::Size(3, 3)), KSize(cv::Size(5, 5)), KSize(cv::Size(7, 7))), testing::Values(Anchor(cv::Point(-1, -1)), Anchor(cv::Point(0, 0)), Anchor(cv::Point(2, 2))), testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT)), From a821fde44e170f32d2d290585110b0a2847c3c95 Mon Sep 17 00:00:00 2001 From: filipsladek Date: Thu, 31 Mar 2016 09:05:02 +0200 Subject: [PATCH 4/4] add performance test for CV_32FC1 --- modules/cudafilters/perf/perf_filters.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/cudafilters/perf/perf_filters.cpp b/modules/cudafilters/perf/perf_filters.cpp index 63abf6df74..cac7e8eba5 100644 --- a/modules/cudafilters/perf/perf_filters.cpp +++ b/modules/cudafilters/perf/perf_filters.cpp @@ -53,7 +53,7 @@ DEF_PARAM_TEST(Sz_Type_KernelSz, cv::Size, MatType, int); PERF_TEST_P(Sz_Type_KernelSz, Blur, Combine(CUDA_TYPICAL_MAT_SIZES, - Values(CV_8UC1, CV_8UC4), + Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7))) { declare.time(20.0);