From 5022a0fae38d4baf053de727bc1f73fac0c7f084 Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Thu, 22 May 2014 10:47:59 +0400 Subject: [PATCH] Added new border types for pyrDown --- modules/imgproc/src/opencl/pyr_down.cl | 128 +++++++++------------ modules/imgproc/src/pyramids.cpp | 14 +-- modules/imgproc/test/ocl/test_pyramids.cpp | 18 +-- 3 files changed, 73 insertions(+), 87 deletions(-) diff --git a/modules/imgproc/src/opencl/pyr_down.cl b/modules/imgproc/src/opencl/pyr_down.cl index 6ba0cc691d..b8b06b712b 100644 --- a/modules/imgproc/src/opencl/pyr_down.cl +++ b/modules/imgproc/src/opencl/pyr_down.cl @@ -51,6 +51,22 @@ #endif #endif +#if defined BORDER_REPLICATE +// aaaaaa|abcdefgh|hhhhhhh +#define EXTRAPOLATE(x, maxV) clamp(x, 0, maxV-1) +#elif defined BORDER_WRAP +// cdefgh|abcdefgh|abcdefg +#define EXTRAPOLATE(x, maxV) ( (x) + (maxV) ) % (maxV) +#elif defined BORDER_REFLECT +// fedcba|abcdefgh|hgfedcb +#define EXTRAPOLATE(x, maxV) min(((maxV)-1)*2-(x)+1, max((x),-(x)-1) ) +#elif defined BORDER_REFLECT_101 || defined BORDER_REFLECT101 +// gfedcb|abcdefgh|gfedcba +#define EXTRAPOLATE(x, maxV) min(((maxV)-1)*2-(x), max((x),-(x)) ) +#else +#error No extrapolation method +#endif + #if cn != 3 #define loadpix(addr) *(__global const T*)(addr) #define storepix(val, addr) *(__global T*)(addr) = (val) @@ -61,37 +77,9 @@ #define PIXSIZE ((int)sizeof(T1)*3) #endif -#define noconvert - -inline int idx_row_low(int y, int last_row) -{ - return abs(y) % (last_row + 1); -} - -inline int idx_row_high(int y, int last_row) -{ - return abs(last_row - (int)abs(last_row - y)) % (last_row + 1); -} - -inline int idx_row(int y, int last_row) -{ - return idx_row_low(idx_row_high(y, last_row), last_row); -} - -inline int idx_col_low(int x, int last_col) -{ - return abs(x) % (last_col + 1); -} +#define SRC(_x,_y) convertToFT(loadpix(srcData + mad24(_y, src_step, PIXSIZE * _x))) -inline int idx_col_high(int x, int last_col) -{ - return abs(last_col - (int)abs(last_col - x)) % (last_col + 1); -} - -inline int idx_col(int x, int last_col) -{ - return idx_col_low(idx_col_high(x, last_col), last_col); -} +#define noconvert __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols, __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols) @@ -99,7 +87,7 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset, const int x = get_global_id(0); const int y = get_group_id(1); - __local FT smem[256 + 4]; + __local FT smem[LOCAL_SIZE + 4]; __global uchar * dstData = dst + dst_offset; __global const uchar * srcData = src + src_offset; @@ -109,16 +97,14 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset, FT co3 = 0.0625f; const int src_y = 2*y; - const int last_row = src_rows - 1; - const int last_col = src_cols - 1; if (src_y >= 2 && src_y < src_rows - 2 && x >= 2 && x < src_cols - 2) { - sum = co3 * convertToFT(loadpix(srcData + (src_y - 2) * src_step + x * PIXSIZE)); - sum = sum + co2 * convertToFT(loadpix(srcData + (src_y - 1) * src_step + x * PIXSIZE)); - sum = sum + co1 * convertToFT(loadpix(srcData + (src_y ) * src_step + x * PIXSIZE)); - sum = sum + co2 * convertToFT(loadpix(srcData + (src_y + 1) * src_step + x * PIXSIZE)); - sum = sum + co3 * convertToFT(loadpix(srcData + (src_y + 2) * src_step + x * PIXSIZE)); + sum = co3 * SRC(x, src_y - 2); + sum = sum + co2 * SRC(x, src_y - 1); + sum = sum + co1 * SRC(x, src_y ); + sum = sum + co2 * SRC(x, src_y + 1); + sum = sum + co3 * SRC(x, src_y + 2); smem[2 + get_local_id(0)] = sum; @@ -126,66 +112,62 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset, { const int left_x = x - 2; - sum = co3 * convertToFT(loadpix(srcData + (src_y - 2) * src_step + left_x * PIXSIZE)); - sum = sum + co2 * convertToFT(loadpix(srcData + (src_y - 1) * src_step + left_x * PIXSIZE)); - sum = sum + co1 * convertToFT(loadpix(srcData + (src_y ) * src_step + left_x * PIXSIZE)); - sum = sum + co2 * convertToFT(loadpix(srcData + (src_y + 1) * src_step + left_x * PIXSIZE)); - sum = sum + co3 * convertToFT(loadpix(srcData + (src_y + 2) * src_step + left_x * PIXSIZE)); + sum = co3 * SRC(left_x, src_y - 2); + sum = sum + co2 * SRC(left_x, src_y - 1); + sum = sum + co1 * SRC(left_x, src_y ); + sum = sum + co2 * SRC(left_x, src_y + 1); + sum = sum + co3 * SRC(left_x, src_y + 2); smem[get_local_id(0)] = sum; } - if (get_local_id(0) > 253) + if (get_local_id(0) > LOCAL_SIZE - 3) { const int right_x = x + 2; - sum = co3 * convertToFT(loadpix(srcData + (src_y - 2) * src_step + right_x * PIXSIZE)); - sum = sum + co2 * convertToFT(loadpix(srcData + (src_y - 1) * src_step + right_x * PIXSIZE)); - sum = sum + co1 * convertToFT(loadpix(srcData + (src_y ) * src_step + right_x * PIXSIZE)); - sum = sum + co2 * convertToFT(loadpix(srcData + (src_y + 1) * src_step + right_x * PIXSIZE)); - sum = sum + co3 * convertToFT(loadpix(srcData + (src_y + 2) * src_step + right_x * PIXSIZE)); + sum = co3 * SRC(right_x, src_y - 2); + sum = sum + co2 * SRC(right_x, src_y - 1); + sum = sum + co1 * SRC(right_x, src_y ); + sum = sum + co2 * SRC(right_x, src_y + 1); + sum = sum + co3 * SRC(right_x, src_y + 2); smem[4 + get_local_id(0)] = sum; } } else { - int col = idx_col(x, last_col); + int col = EXTRAPOLATE(x, src_cols); - sum = co3 * convertToFT(loadpix(srcData + idx_row(src_y - 2, last_row) * src_step + col * PIXSIZE)); - sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y - 1, last_row) * src_step + col * PIXSIZE)); - sum = sum + co1 * convertToFT(loadpix(srcData + idx_row(src_y , last_row) * src_step + col * PIXSIZE)); - sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y + 1, last_row) * src_step + col * PIXSIZE)); - sum = sum + co3 * convertToFT(loadpix(srcData + idx_row(src_y + 2, last_row) * src_step + col * PIXSIZE)); + sum = co3 * SRC(col, EXTRAPOLATE(src_y - 2, src_rows)); + sum = sum + co2 * SRC(col, EXTRAPOLATE(src_y - 1, src_rows)); + sum = sum + co1 * SRC(col, EXTRAPOLATE(src_y , src_rows)); + sum = sum + co2 * SRC(col, EXTRAPOLATE(src_y + 1, src_rows)); + sum = sum + co3 * SRC(col, EXTRAPOLATE(src_y + 2, src_rows)); smem[2 + get_local_id(0)] = sum; if (get_local_id(0) < 2) { - const int left_x = x - 2; + col = EXTRAPOLATE(x - 2, src_cols); - col = idx_col(left_x, last_col); - - sum = co3 * convertToFT(loadpix(srcData + idx_row(src_y - 2, last_row) * src_step + col * PIXSIZE)); - sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y - 1, last_row) * src_step + col * PIXSIZE)); - sum = sum + co1 * convertToFT(loadpix(srcData + idx_row(src_y , last_row) * src_step + col * PIXSIZE)); - sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y + 1, last_row) * src_step + col * PIXSIZE)); - sum = sum + co3 * convertToFT(loadpix(srcData + idx_row(src_y + 2, last_row) * src_step + col * PIXSIZE)); + sum = co3 * SRC(col, EXTRAPOLATE(src_y - 2, src_rows)); + sum = sum + co2 * SRC(col, EXTRAPOLATE(src_y - 1, src_rows)); + sum = sum + co1 * SRC(col, EXTRAPOLATE(src_y , src_rows)); + sum = sum + co2 * SRC(col, EXTRAPOLATE(src_y + 1, src_rows)); + sum = sum + co3 * SRC(col, EXTRAPOLATE(src_y + 2, src_rows)); smem[get_local_id(0)] = sum; } - if (get_local_id(0) > 253) + if (get_local_id(0) > LOCAL_SIZE - 3) { - const int right_x = x + 2; - - col = idx_col(right_x, last_col); + col = EXTRAPOLATE(x + 2, src_cols); - sum = co3 * convertToFT(loadpix(srcData + idx_row(src_y - 2, last_row) * src_step + col * PIXSIZE)); - sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y - 1, last_row) * src_step + col * PIXSIZE)); - sum = sum + co1 * convertToFT(loadpix(srcData + idx_row(src_y , last_row) * src_step + col * PIXSIZE)); - sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y + 1, last_row) * src_step + col * PIXSIZE)); - sum = sum + co3 * convertToFT(loadpix(srcData + idx_row(src_y + 2, last_row) * src_step + col * PIXSIZE)); + sum = co3 * SRC(col, EXTRAPOLATE(src_y - 2, src_rows)); + sum = sum + co2 * SRC(col, EXTRAPOLATE(src_y - 1, src_rows)); + sum = sum + co1 * SRC(col, EXTRAPOLATE(src_y , src_rows)); + sum = sum + co2 * SRC(col, EXTRAPOLATE(src_y + 1, src_rows)); + sum = sum + co3 * SRC(col, EXTRAPOLATE(src_y + 2, src_rows)); smem[4 + get_local_id(0)] = sum; } @@ -193,7 +175,7 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset, barrier(CLK_LOCAL_MEM_FENCE); - if (get_local_id(0) < 128) + if (get_local_id(0) < LOCAL_SIZE / 2) { const int tid2 = get_local_id(0) * 2; diff --git a/modules/imgproc/src/pyramids.cpp b/modules/imgproc/src/pyramids.cpp index 42464c1a5d..d1ed92d5d9 100644 --- a/modules/imgproc/src/pyramids.cpp +++ b/modules/imgproc/src/pyramids.cpp @@ -407,11 +407,8 @@ static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, in { int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type); - if (channels > 4 || borderType != BORDER_DEFAULT) - return false; - bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; - if ((depth == CV_64F) && !(doubleSupport)) + if (channels > 4 || (depth == CV_64F && !doubleSupport)) return false; Size ssize = _src.size(); @@ -425,15 +422,18 @@ static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, in UMat dst = _dst.getUMat(); int float_depth = depth == CV_64F ? CV_64F : CV_32F; + const int local_size = 256; + const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", + "BORDER_REFLECT_101" }; char cvt[2][50]; String buildOptions = format( "-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s " - "-D T1=%s -D cn=%d", + "-D T1=%s -D cn=%d -D %s -D LOCAL_SIZE=%d", ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)), ocl::convertTypeStr(float_depth, depth, channels, cvt[0]), ocl::convertTypeStr(depth, float_depth, channels, cvt[1]), doubleSupport ? " -D DOUBLE_SUPPORT" : "", - ocl::typeToStr(depth), channels + ocl::typeToStr(depth), channels, borderMap[borderType], local_size ); ocl::Kernel k("pyrDown", ocl::imgproc::pyr_down_oclsrc, buildOptions); if (k.empty()) @@ -441,7 +441,7 @@ static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, in k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst)); - size_t localThreads[2] = { 256, 1 }; + size_t localThreads[2] = { local_size, 1 }; size_t globalThreads[2] = { src.cols, dst.rows }; return k.run(2, globalThreads, localThreads, false); } diff --git a/modules/imgproc/test/ocl/test_pyramids.cpp b/modules/imgproc/test/ocl/test_pyramids.cpp index 113349b302..a129c7f771 100644 --- a/modules/imgproc/test/ocl/test_pyramids.cpp +++ b/modules/imgproc/test/ocl/test_pyramids.cpp @@ -52,9 +52,9 @@ namespace cvtest { namespace ocl { -PARAM_TEST_CASE(PyrTestBase, MatDepth, Channels, bool) +PARAM_TEST_CASE(PyrTestBase, MatDepth, Channels, BorderType, bool) { - int depth, channels; + int depth, channels, borderType; bool use_roi; TEST_DECLARE_INPUT_PARAMETER(src); @@ -64,7 +64,8 @@ PARAM_TEST_CASE(PyrTestBase, MatDepth, Channels, bool) { depth = GET_PARAM(0); channels = GET_PARAM(1); - use_roi = GET_PARAM(2); + borderType = GET_PARAM(2); + use_roi = GET_PARAM(3); } void generateTestData(Size src_roiSize, Size dst_roiSize) @@ -99,8 +100,8 @@ OCL_TEST_P(PyrDown, Mat) dst_roiSize = dst_roiSize.area() == 0 ? Size((src_roiSize.width + 1) / 2, (src_roiSize.height + 1) / 2) : dst_roiSize; generateTestData(src_roiSize, dst_roiSize); - OCL_OFF(pyrDown(src_roi, dst_roi, dst_roiSize)); - OCL_ON(pyrDown(usrc_roi, udst_roi, dst_roiSize)); + OCL_OFF(pyrDown(src_roi, dst_roi, dst_roiSize, borderType)); + OCL_ON(pyrDown(usrc_roi, udst_roi, dst_roiSize, borderType)); Near(depth == CV_32F ? 1e-4f : 1.0f); } @@ -109,6 +110,8 @@ OCL_TEST_P(PyrDown, Mat) OCL_INSTANTIATE_TEST_CASE_P(ImgprocPyr, PyrDown, Combine( Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F), Values(1, 2, 3, 4), + Values((BorderType)BORDER_REPLICATE, + (BorderType)BORDER_REFLECT, (BorderType)BORDER_REFLECT_101), Bool() )); @@ -124,8 +127,8 @@ OCL_TEST_P(PyrUp, Mat) Size dst_roiSize = Size(2 * src_roiSize.width, 2 * src_roiSize.height); generateTestData(src_roiSize, dst_roiSize); - OCL_OFF(pyrUp(src_roi, dst_roi, dst_roiSize)); - OCL_ON(pyrUp(usrc_roi, udst_roi, dst_roiSize)); + OCL_OFF(pyrUp(src_roi, dst_roi, dst_roiSize, borderType)); + OCL_ON(pyrUp(usrc_roi, udst_roi, dst_roiSize, borderType)); Near(depth == CV_32F ? 1e-4f : 1.0f); } @@ -134,6 +137,7 @@ OCL_TEST_P(PyrUp, Mat) OCL_INSTANTIATE_TEST_CASE_P(ImgprocPyr, PyrUp, Combine( Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F), Values(1, 2, 3, 4), + Values((BorderType)BORDER_REFLECT_101), Bool() ));