diff --git a/modules/imgproc/perf/opencl/perf_filters.cpp b/modules/imgproc/perf/opencl/perf_filters.cpp index 25b11caad9..57b928c289 100644 --- a/modules/imgproc/perf/opencl/perf_filters.cpp +++ b/modules/imgproc/perf/opencl/perf_filters.cpp @@ -77,6 +77,31 @@ OCL_PERF_TEST_P(BlurFixture, Blur, SANITY_CHECK(dst, eps); } +///////////// SqrBoxFilter //////////////////////// + +typedef tuple SqrBoxFilterParams; +typedef TestBaseWithParam SqrBoxFilterFixture; + +OCL_PERF_TEST_P(SqrBoxFilterFixture, SqrBoxFilter, + ::testing::Combine(OCL_TEST_SIZES, OCL_PERF_ENUM(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), + OCL_PERF_ENUM(Size(3, 3), Size(20, 3), Size(3, 20), Size(20, 20)))) +{ + const SqrBoxFilterParams params = GetParam(); + const Size srcSize = get<0>(params), ksize = get<2>(params); + const int type = get<1>(params), depth = CV_MAT_DEPTH(type), + ddepth = depth == CV_8U ? CV_32S : CV_32F; + const double eps = ddepth == CV_32S ? 0 : 5e-5; + + checkDeviceMaxMemoryAllocSize(srcSize, CV_MAKE_TYPE(ddepth, CV_MAT_CN(type))); + + UMat src(srcSize, type), dst(srcSize, type); + declare.in(src, WARMUP_RNG).out(dst); + + OCL_TEST_CYCLE() cv::sqrBoxFilter(src, dst, ddepth, ksize, Point(-1, -1), false); + + SANITY_CHECK(dst, eps); +} + ///////////// Laplacian//////////////////////// typedef FilterFixture LaplacianFixture; diff --git a/modules/imgproc/perf/opencl/perf_imgproc.cpp b/modules/imgproc/perf/opencl/perf_imgproc.cpp index 7e5a817da5..ae6112e0d2 100644 --- a/modules/imgproc/perf/opencl/perf_imgproc.cpp +++ b/modules/imgproc/perf/opencl/perf_imgproc.cpp @@ -252,22 +252,6 @@ OCL_PERF_TEST_P(CLAHEFixture, CLAHE, OCL_TEST_SIZES) SANITY_CHECK(dst); } -///////////// SqrBoxFilter //////////////////////// - -typedef TestBaseWithParam SqrBoxFilterFixture; - -OCL_PERF_TEST_P(SqrBoxFilterFixture, SqrBoxFilter, OCL_TEST_SIZES) -{ - const Size srcSize = GetParam(); - - UMat src(srcSize, CV_8UC1), dst(srcSize, CV_32SC1); - declare.in(src, WARMUP_RNG).out(dst); - - OCL_TEST_CYCLE() cv::sqrBoxFilter(src, dst, CV_32S, Size(3, 3)); - - SANITY_CHECK(dst); -} - ///////////// Canny //////////////////////// typedef tuple CannyParams; diff --git a/modules/imgproc/src/opencl/boxFilter.cl b/modules/imgproc/src/opencl/boxFilter.cl index 16a15cfe2b..986fc785c6 100644 --- a/modules/imgproc/src/opencl/boxFilter.cl +++ b/modules/imgproc/src/opencl/boxFilter.cl @@ -104,6 +104,12 @@ #define noconvert +#ifdef SQR +#define PROCESS_ELEM(value) (value * value) +#else +#define PROCESS_ELEM(value) value +#endif + struct RectCoords { int x1, y1, x2, y2; @@ -118,7 +124,9 @@ inline WT readSrcPixel(int2 pos, __global const uchar * srcptr, int src_step, co #endif { int src_index = mad24(pos.y, src_step, pos.x * (int)sizeof(ST)); - return convertToWT(*(__global const ST *)(srcptr + src_index)); + WT value = convertToWT(*(__global const ST *)(srcptr + src_index)); + + return PROCESS_ELEM(value); } else { @@ -136,7 +144,9 @@ inline WT readSrcPixel(int2 pos, __global const uchar * srcptr, int src_step, co srcCoords.x2, srcCoords.y2); int src_index = mad24(selected_row, src_step, selected_col * (int)sizeof(ST)); - return convertToWT(*(__global const ST *)(srcptr + src_index)); + WT value = convertToWT(*(__global const ST *)(srcptr + src_index)); + + return PROCESS_ELEM(value); #endif } } diff --git a/modules/imgproc/src/smooth.cpp b/modules/imgproc/src/smooth.cpp index f6a5ffd107..40687a226c 100644 --- a/modules/imgproc/src/smooth.cpp +++ b/modules/imgproc/src/smooth.cpp @@ -67,15 +67,18 @@ namespace cv Box Filter \****************************************************************************************/ -template struct RowSum : public BaseRowFilter +template +struct RowSum : + public BaseRowFilter { - RowSum( int _ksize, int _anchor ) + RowSum( int _ksize, int _anchor ) : + BaseRowFilter() { ksize = _ksize; anchor = _anchor; } - void operator()(const uchar* src, uchar* dst, int width, int cn) + virtual void operator()(const uchar* src, uchar* dst, int width, int cn) { const T* S = (const T*)src; ST* D = (ST*)dst; @@ -98,9 +101,12 @@ template struct RowSum : public BaseRowFilter }; -template struct ColumnSum : public BaseColumnFilter +template +struct ColumnSum : + public BaseColumnFilter { - ColumnSum( int _ksize, int _anchor, double _scale ) + ColumnSum( int _ksize, int _anchor, double _scale ) : + BaseColumnFilter() { ksize = _ksize; anchor = _anchor; @@ -108,9 +114,9 @@ template struct ColumnSum : public BaseColumnFilter sumCount = 0; } - void reset() { sumCount = 0; } + virtual void reset() { sumCount = 0; } - void operator()(const uchar** src, uchar* dst, int dststep, int count, int width) + virtual void operator()(const uchar** src, uchar* dst, int dststep, int count, int width) { int i; ST* SUM; @@ -198,9 +204,12 @@ template struct ColumnSum : public BaseColumnFilter }; -template<> struct ColumnSum : public BaseColumnFilter +template<> +struct ColumnSum : + public BaseColumnFilter { - ColumnSum( int _ksize, int _anchor, double _scale ) + ColumnSum( int _ksize, int _anchor, double _scale ) : + BaseColumnFilter() { ksize = _ksize; anchor = _anchor; @@ -208,9 +217,9 @@ template<> struct ColumnSum : public BaseColumnFilter sumCount = 0; } - void reset() { sumCount = 0; } + virtual void reset() { sumCount = 0; } - void operator()(const uchar** src, uchar* dst, int dststep, int count, int width) + virtual void operator()(const uchar** src, uchar* dst, int dststep, int count, int width) { int i; int* SUM; @@ -339,9 +348,12 @@ template<> struct ColumnSum : public BaseColumnFilter std::vector sum; }; -template<> struct ColumnSum : public BaseColumnFilter +template<> +struct ColumnSum : + public BaseColumnFilter { - ColumnSum( int _ksize, int _anchor, double _scale ) + ColumnSum( int _ksize, int _anchor, double _scale ) : + BaseColumnFilter() { ksize = _ksize; anchor = _anchor; @@ -349,9 +361,9 @@ template<> struct ColumnSum : public BaseColumnFilter sumCount = 0; } - void reset() { sumCount = 0; } + virtual void reset() { sumCount = 0; } - void operator()(const uchar** src, uchar* dst, int dststep, int count, int width) + virtual void operator()(const uchar** src, uchar* dst, int dststep, int count, int width) { int i; int* SUM; @@ -477,9 +489,12 @@ template<> struct ColumnSum : public BaseColumnFilter }; -template<> struct ColumnSum : public BaseColumnFilter +template<> +struct ColumnSum : + public BaseColumnFilter { - ColumnSum( int _ksize, int _anchor, double _scale ) + ColumnSum( int _ksize, int _anchor, double _scale ) : + BaseColumnFilter() { ksize = _ksize; anchor = _anchor; @@ -487,9 +502,9 @@ template<> struct ColumnSum : public BaseColumnFilter sumCount = 0; } - void reset() { sumCount = 0; } + virtual void reset() { sumCount = 0; } - void operator()(const uchar** src, uchar* dst, int dststep, int count, int width) + virtual void operator()(const uchar** src, uchar* dst, int dststep, int count, int width) { int i; int* SUM; @@ -616,7 +631,7 @@ template<> struct ColumnSum : public BaseColumnFilter #define DIVUP(total, grain) ((total + grain - 1) / (grain)) static bool ocl_boxFilter( InputArray _src, OutputArray _dst, int ddepth, - Size ksize, Point anchor, int borderType, bool normalize ) + Size ksize, Point anchor, int borderType, bool normalize, bool sqr = false ) { int type = _src.type(), sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), esz = CV_ELEM_SIZE(type); bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; @@ -661,7 +676,7 @@ static bool ocl_boxFilter( InputArray _src, OutputArray _dst, int ddepth, ocl::Kernel kernel; for ( ; ; ) { - int BLOCK_SIZE_X = tryWorkItems, BLOCK_SIZE_Y = 8; + int BLOCK_SIZE_X = tryWorkItems, BLOCK_SIZE_Y = std::min(ksize.height * 10, size.height); while (BLOCK_SIZE_X > 32 && BLOCK_SIZE_X >= ksize.width * 2 && BLOCK_SIZE_X > size.width * 2) BLOCK_SIZE_X /= 2; @@ -673,14 +688,14 @@ static bool ocl_boxFilter( InputArray _src, OutputArray _dst, int ddepth, char cvt[2][50]; String opts = format("-D LOCAL_SIZE_X=%d -D BLOCK_SIZE_Y=%d -D ST=%s -D DT=%s -D WT=%s -D convertToDT=%s -D convertToWT=%s " - "-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d -D %s%s%s%s", + "-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d -D %s%s%s%s%s", BLOCK_SIZE_X, BLOCK_SIZE_Y, ocl::typeToStr(type), ocl::typeToStr(CV_MAKE_TYPE(ddepth, cn)), ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)), ocl::convertTypeStr(wdepth, ddepth, cn, cvt[0]), ocl::convertTypeStr(sdepth, wdepth, cn, cvt[1]), anchor.x, anchor.y, ksize.width, ksize.height, borderMap[borderType], isolated ? " -D BORDER_ISOLATED" : "", doubleSupport ? " -D DOUBLE_SUPPORT" : "", - normalize ? " -D NORMALIZE" : ""); + normalize ? " -D NORMALIZE" : "", sqr ? " -D SQR" : ""); localsize[0] = BLOCK_SIZE_X; globalsize[0] = DIVUP(size.width, BLOCK_SIZE_X - (ksize.width - 1)) * BLOCK_SIZE_X; @@ -860,15 +875,18 @@ void cv::blur( InputArray src, OutputArray dst, namespace cv { -template struct SqrRowSum : public BaseRowFilter +template +struct SqrRowSum : + public BaseRowFilter { - SqrRowSum( int _ksize, int _anchor ) + SqrRowSum( int _ksize, int _anchor ) : + BaseRowFilter() { ksize = _ksize; anchor = _anchor; } - void operator()(const uchar* src, uchar* dst, int width, int cn) + virtual void operator()(const uchar* src, uchar* dst, int width, int cn) { const T* S = (const T*)src; ST* D = (ST*)dst; @@ -928,26 +946,31 @@ void cv::sqrBoxFilter( InputArray _src, OutputArray _dst, int ddepth, Size ksize, Point anchor, bool normalize, int borderType ) { - Mat src = _src.getMat(); - int sdepth = src.depth(), cn = src.channels(); + int srcType = _src.type(), sdepth = CV_MAT_DEPTH(srcType), cn = CV_MAT_CN(srcType); + Size size = _src.size(); + if( ddepth < 0 ) ddepth = sdepth < CV_32F ? CV_32F : CV_64F; - _dst.create( src.size(), CV_MAKETYPE(ddepth, cn) ); - Mat dst = _dst.getMat(); + if( borderType != BORDER_CONSTANT && normalize ) { - if( src.rows == 1 ) + if( size.height == 1 ) ksize.height = 1; - if( src.cols == 1 ) + if( size.width == 1 ) ksize.width = 1; } - int sumType = CV_64F; + CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2, + ocl_boxFilter(_src, _dst, ddepth, ksize, anchor, borderType, normalize, true)) + + int sumDepth = CV_64F; if( sdepth == CV_8U ) - sumType = CV_32S; - sumType = CV_MAKETYPE( sumType, cn ); - int srcType = CV_MAKETYPE(sdepth, cn); - int dstType = CV_MAKETYPE(ddepth, cn); + sumDepth = CV_32S; + int sumType = CV_MAKETYPE( sumDepth, cn ), dstType = CV_MAKETYPE(ddepth, cn); + + Mat src = _src.getMat(); + _dst.create( size, dstType ); + Mat dst = _dst.getMat(); Ptr rowFilter = getSqrRowSumFilter(srcType, sumType, ksize.width, anchor.x ); Ptr columnFilter = getColumnSumFilter(sumType, diff --git a/modules/imgproc/test/ocl/test_boxfilter.cpp b/modules/imgproc/test/ocl/test_boxfilter.cpp index 96ad46fb5c..c95657c9e9 100644 --- a/modules/imgproc/test/ocl/test_boxfilter.cpp +++ b/modules/imgproc/test/ocl/test_boxfilter.cpp @@ -49,23 +49,16 @@ namespace cvtest { namespace ocl { -enum -{ - noType = -1 -}; +////////////////////////////////////////// boxFilter /////////////////////////////////////////////////////// -///////////////////////////////////////////////////////////////////////////////////////////////// -// boxFilter -PARAM_TEST_CASE(BoxFilter, MatDepth, Channels, BorderType, bool, bool) +PARAM_TEST_CASE(BoxFilterBase, MatDepth, Channels, BorderType, bool, bool) { static const int kernelMinSize = 2; static const int kernelMaxSize = 10; - int depth, cn; - Size ksize; - Size dsize; + int depth, cn, borderType; + Size ksize, dsize; Point anchor; - int borderType; bool normalize, useRoi; TEST_DECLARE_INPUT_PARAMETER(src) @@ -106,6 +99,8 @@ PARAM_TEST_CASE(BoxFilter, MatDepth, Channels, BorderType, bool, bool) } }; +typedef BoxFilterBase BoxFilter; + OCL_TEST_P(BoxFilter, Mat) { for (int j = 0; j < test_loop_times; j++) @@ -119,6 +114,22 @@ OCL_TEST_P(BoxFilter, Mat) } } +typedef BoxFilterBase SqrBoxFilter; + +OCL_TEST_P(SqrBoxFilter, Mat) +{ + for (int j = 0; j < test_loop_times; j++) + { + random_roi(); + + int ddepth = depth == CV_8U ? CV_32S : CV_64F; + + OCL_OFF(cv::sqrBoxFilter(src_roi, dst_roi, ddepth, ksize, anchor, normalize, borderType)); + OCL_ON(cv::sqrBoxFilter(usrc_roi, udst_roi, ddepth, ksize, anchor, normalize, borderType)); + + Near(depth <= CV_32S ? 1 : 7e-2); + } +} OCL_INSTANTIATE_TEST_CASE_P(ImageProc, BoxFilter, Combine( @@ -133,6 +144,19 @@ OCL_INSTANTIATE_TEST_CASE_P(ImageProc, BoxFilter, ) ); +OCL_INSTANTIATE_TEST_CASE_P(ImageProc, SqrBoxFilter, + Combine( + Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F), + Values(1, 2, 4), + Values((BorderType)BORDER_CONSTANT, + (BorderType)BORDER_REPLICATE, + (BorderType)BORDER_REFLECT, + (BorderType)BORDER_REFLECT_101), + Bool(), + Bool() // ROI + ) + ); + } } // namespace cvtest::ocl