Added new border types for pyrDown

pull/2785/head
Alexander Karsakov 11 years ago
parent 7032a5a46c
commit 5022a0fae3
  1. 128
      modules/imgproc/src/opencl/pyr_down.cl
  2. 14
      modules/imgproc/src/pyramids.cpp
  3. 18
      modules/imgproc/test/ocl/test_pyramids.cpp

@ -51,6 +51,22 @@
#endif
#endif
#if defined BORDER_REPLICATE
// aaaaaa|abcdefgh|hhhhhhh
#define EXTRAPOLATE(x, maxV) clamp(x, 0, maxV-1)
#elif defined BORDER_WRAP
// cdefgh|abcdefgh|abcdefg
#define EXTRAPOLATE(x, maxV) ( (x) + (maxV) ) % (maxV)
#elif defined BORDER_REFLECT
// fedcba|abcdefgh|hgfedcb
#define EXTRAPOLATE(x, maxV) min(((maxV)-1)*2-(x)+1, max((x),-(x)-1) )
#elif defined BORDER_REFLECT_101 || defined BORDER_REFLECT101
// gfedcb|abcdefgh|gfedcba
#define EXTRAPOLATE(x, maxV) min(((maxV)-1)*2-(x), max((x),-(x)) )
#else
#error No extrapolation method
#endif
#if cn != 3
#define loadpix(addr) *(__global const T*)(addr)
#define storepix(val, addr) *(__global T*)(addr) = (val)
@ -61,37 +77,9 @@
#define PIXSIZE ((int)sizeof(T1)*3)
#endif
#define noconvert
inline int idx_row_low(int y, int last_row)
{
return abs(y) % (last_row + 1);
}
inline int idx_row_high(int y, int last_row)
{
return abs(last_row - (int)abs(last_row - y)) % (last_row + 1);
}
inline int idx_row(int y, int last_row)
{
return idx_row_low(idx_row_high(y, last_row), last_row);
}
inline int idx_col_low(int x, int last_col)
{
return abs(x) % (last_col + 1);
}
#define SRC(_x,_y) convertToFT(loadpix(srcData + mad24(_y, src_step, PIXSIZE * _x)))
inline int idx_col_high(int x, int last_col)
{
return abs(last_col - (int)abs(last_col - x)) % (last_col + 1);
}
inline int idx_col(int x, int last_col)
{
return idx_col_low(idx_col_high(x, last_col), last_col);
}
#define noconvert
__kernel void pyrDown(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols)
@ -99,7 +87,7 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset,
const int x = get_global_id(0);
const int y = get_group_id(1);
__local FT smem[256 + 4];
__local FT smem[LOCAL_SIZE + 4];
__global uchar * dstData = dst + dst_offset;
__global const uchar * srcData = src + src_offset;
@ -109,16 +97,14 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset,
FT co3 = 0.0625f;
const int src_y = 2*y;
const int last_row = src_rows - 1;
const int last_col = src_cols - 1;
if (src_y >= 2 && src_y < src_rows - 2 && x >= 2 && x < src_cols - 2)
{
sum = co3 * convertToFT(loadpix(srcData + (src_y - 2) * src_step + x * PIXSIZE));
sum = sum + co2 * convertToFT(loadpix(srcData + (src_y - 1) * src_step + x * PIXSIZE));
sum = sum + co1 * convertToFT(loadpix(srcData + (src_y ) * src_step + x * PIXSIZE));
sum = sum + co2 * convertToFT(loadpix(srcData + (src_y + 1) * src_step + x * PIXSIZE));
sum = sum + co3 * convertToFT(loadpix(srcData + (src_y + 2) * src_step + x * PIXSIZE));
sum = co3 * SRC(x, src_y - 2);
sum = sum + co2 * SRC(x, src_y - 1);
sum = sum + co1 * SRC(x, src_y );
sum = sum + co2 * SRC(x, src_y + 1);
sum = sum + co3 * SRC(x, src_y + 2);
smem[2 + get_local_id(0)] = sum;
@ -126,66 +112,62 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset,
{
const int left_x = x - 2;
sum = co3 * convertToFT(loadpix(srcData + (src_y - 2) * src_step + left_x * PIXSIZE));
sum = sum + co2 * convertToFT(loadpix(srcData + (src_y - 1) * src_step + left_x * PIXSIZE));
sum = sum + co1 * convertToFT(loadpix(srcData + (src_y ) * src_step + left_x * PIXSIZE));
sum = sum + co2 * convertToFT(loadpix(srcData + (src_y + 1) * src_step + left_x * PIXSIZE));
sum = sum + co3 * convertToFT(loadpix(srcData + (src_y + 2) * src_step + left_x * PIXSIZE));
sum = co3 * SRC(left_x, src_y - 2);
sum = sum + co2 * SRC(left_x, src_y - 1);
sum = sum + co1 * SRC(left_x, src_y );
sum = sum + co2 * SRC(left_x, src_y + 1);
sum = sum + co3 * SRC(left_x, src_y + 2);
smem[get_local_id(0)] = sum;
}
if (get_local_id(0) > 253)
if (get_local_id(0) > LOCAL_SIZE - 3)
{
const int right_x = x + 2;
sum = co3 * convertToFT(loadpix(srcData + (src_y - 2) * src_step + right_x * PIXSIZE));
sum = sum + co2 * convertToFT(loadpix(srcData + (src_y - 1) * src_step + right_x * PIXSIZE));
sum = sum + co1 * convertToFT(loadpix(srcData + (src_y ) * src_step + right_x * PIXSIZE));
sum = sum + co2 * convertToFT(loadpix(srcData + (src_y + 1) * src_step + right_x * PIXSIZE));
sum = sum + co3 * convertToFT(loadpix(srcData + (src_y + 2) * src_step + right_x * PIXSIZE));
sum = co3 * SRC(right_x, src_y - 2);
sum = sum + co2 * SRC(right_x, src_y - 1);
sum = sum + co1 * SRC(right_x, src_y );
sum = sum + co2 * SRC(right_x, src_y + 1);
sum = sum + co3 * SRC(right_x, src_y + 2);
smem[4 + get_local_id(0)] = sum;
}
}
else
{
int col = idx_col(x, last_col);
int col = EXTRAPOLATE(x, src_cols);
sum = co3 * convertToFT(loadpix(srcData + idx_row(src_y - 2, last_row) * src_step + col * PIXSIZE));
sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y - 1, last_row) * src_step + col * PIXSIZE));
sum = sum + co1 * convertToFT(loadpix(srcData + idx_row(src_y , last_row) * src_step + col * PIXSIZE));
sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y + 1, last_row) * src_step + col * PIXSIZE));
sum = sum + co3 * convertToFT(loadpix(srcData + idx_row(src_y + 2, last_row) * src_step + col * PIXSIZE));
sum = co3 * SRC(col, EXTRAPOLATE(src_y - 2, src_rows));
sum = sum + co2 * SRC(col, EXTRAPOLATE(src_y - 1, src_rows));
sum = sum + co1 * SRC(col, EXTRAPOLATE(src_y , src_rows));
sum = sum + co2 * SRC(col, EXTRAPOLATE(src_y + 1, src_rows));
sum = sum + co3 * SRC(col, EXTRAPOLATE(src_y + 2, src_rows));
smem[2 + get_local_id(0)] = sum;
if (get_local_id(0) < 2)
{
const int left_x = x - 2;
col = EXTRAPOLATE(x - 2, src_cols);
col = idx_col(left_x, last_col);
sum = co3 * convertToFT(loadpix(srcData + idx_row(src_y - 2, last_row) * src_step + col * PIXSIZE));
sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y - 1, last_row) * src_step + col * PIXSIZE));
sum = sum + co1 * convertToFT(loadpix(srcData + idx_row(src_y , last_row) * src_step + col * PIXSIZE));
sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y + 1, last_row) * src_step + col * PIXSIZE));
sum = sum + co3 * convertToFT(loadpix(srcData + idx_row(src_y + 2, last_row) * src_step + col * PIXSIZE));
sum = co3 * SRC(col, EXTRAPOLATE(src_y - 2, src_rows));
sum = sum + co2 * SRC(col, EXTRAPOLATE(src_y - 1, src_rows));
sum = sum + co1 * SRC(col, EXTRAPOLATE(src_y , src_rows));
sum = sum + co2 * SRC(col, EXTRAPOLATE(src_y + 1, src_rows));
sum = sum + co3 * SRC(col, EXTRAPOLATE(src_y + 2, src_rows));
smem[get_local_id(0)] = sum;
}
if (get_local_id(0) > 253)
if (get_local_id(0) > LOCAL_SIZE - 3)
{
const int right_x = x + 2;
col = idx_col(right_x, last_col);
col = EXTRAPOLATE(x + 2, src_cols);
sum = co3 * convertToFT(loadpix(srcData + idx_row(src_y - 2, last_row) * src_step + col * PIXSIZE));
sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y - 1, last_row) * src_step + col * PIXSIZE));
sum = sum + co1 * convertToFT(loadpix(srcData + idx_row(src_y , last_row) * src_step + col * PIXSIZE));
sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y + 1, last_row) * src_step + col * PIXSIZE));
sum = sum + co3 * convertToFT(loadpix(srcData + idx_row(src_y + 2, last_row) * src_step + col * PIXSIZE));
sum = co3 * SRC(col, EXTRAPOLATE(src_y - 2, src_rows));
sum = sum + co2 * SRC(col, EXTRAPOLATE(src_y - 1, src_rows));
sum = sum + co1 * SRC(col, EXTRAPOLATE(src_y , src_rows));
sum = sum + co2 * SRC(col, EXTRAPOLATE(src_y + 1, src_rows));
sum = sum + co3 * SRC(col, EXTRAPOLATE(src_y + 2, src_rows));
smem[4 + get_local_id(0)] = sum;
}
@ -193,7 +175,7 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset,
barrier(CLK_LOCAL_MEM_FENCE);
if (get_local_id(0) < 128)
if (get_local_id(0) < LOCAL_SIZE / 2)
{
const int tid2 = get_local_id(0) * 2;

@ -407,11 +407,8 @@ static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, in
{
int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type);
if (channels > 4 || borderType != BORDER_DEFAULT)
return false;
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
if ((depth == CV_64F) && !(doubleSupport))
if (channels > 4 || (depth == CV_64F && !doubleSupport))
return false;
Size ssize = _src.size();
@ -425,15 +422,18 @@ static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, in
UMat dst = _dst.getUMat();
int float_depth = depth == CV_64F ? CV_64F : CV_32F;
const int local_size = 256;
const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP",
"BORDER_REFLECT_101" };
char cvt[2][50];
String buildOptions = format(
"-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s "
"-D T1=%s -D cn=%d",
"-D T1=%s -D cn=%d -D %s -D LOCAL_SIZE=%d",
ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)),
ocl::convertTypeStr(float_depth, depth, channels, cvt[0]),
ocl::convertTypeStr(depth, float_depth, channels, cvt[1]),
doubleSupport ? " -D DOUBLE_SUPPORT" : "",
ocl::typeToStr(depth), channels
ocl::typeToStr(depth), channels, borderMap[borderType], local_size
);
ocl::Kernel k("pyrDown", ocl::imgproc::pyr_down_oclsrc, buildOptions);
if (k.empty())
@ -441,7 +441,7 @@ static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, in
k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst));
size_t localThreads[2] = { 256, 1 };
size_t localThreads[2] = { local_size, 1 };
size_t globalThreads[2] = { src.cols, dst.rows };
return k.run(2, globalThreads, localThreads, false);
}

@ -52,9 +52,9 @@
namespace cvtest {
namespace ocl {
PARAM_TEST_CASE(PyrTestBase, MatDepth, Channels, bool)
PARAM_TEST_CASE(PyrTestBase, MatDepth, Channels, BorderType, bool)
{
int depth, channels;
int depth, channels, borderType;
bool use_roi;
TEST_DECLARE_INPUT_PARAMETER(src);
@ -64,7 +64,8 @@ PARAM_TEST_CASE(PyrTestBase, MatDepth, Channels, bool)
{
depth = GET_PARAM(0);
channels = GET_PARAM(1);
use_roi = GET_PARAM(2);
borderType = GET_PARAM(2);
use_roi = GET_PARAM(3);
}
void generateTestData(Size src_roiSize, Size dst_roiSize)
@ -99,8 +100,8 @@ OCL_TEST_P(PyrDown, Mat)
dst_roiSize = dst_roiSize.area() == 0 ? Size((src_roiSize.width + 1) / 2, (src_roiSize.height + 1) / 2) : dst_roiSize;
generateTestData(src_roiSize, dst_roiSize);
OCL_OFF(pyrDown(src_roi, dst_roi, dst_roiSize));
OCL_ON(pyrDown(usrc_roi, udst_roi, dst_roiSize));
OCL_OFF(pyrDown(src_roi, dst_roi, dst_roiSize, borderType));
OCL_ON(pyrDown(usrc_roi, udst_roi, dst_roiSize, borderType));
Near(depth == CV_32F ? 1e-4f : 1.0f);
}
@ -109,6 +110,8 @@ OCL_TEST_P(PyrDown, Mat)
OCL_INSTANTIATE_TEST_CASE_P(ImgprocPyr, PyrDown, Combine(
Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F),
Values(1, 2, 3, 4),
Values((BorderType)BORDER_REPLICATE,
(BorderType)BORDER_REFLECT, (BorderType)BORDER_REFLECT_101),
Bool()
));
@ -124,8 +127,8 @@ OCL_TEST_P(PyrUp, Mat)
Size dst_roiSize = Size(2 * src_roiSize.width, 2 * src_roiSize.height);
generateTestData(src_roiSize, dst_roiSize);
OCL_OFF(pyrUp(src_roi, dst_roi, dst_roiSize));
OCL_ON(pyrUp(usrc_roi, udst_roi, dst_roiSize));
OCL_OFF(pyrUp(src_roi, dst_roi, dst_roiSize, borderType));
OCL_ON(pyrUp(usrc_roi, udst_roi, dst_roiSize, borderType));
Near(depth == CV_32F ? 1e-4f : 1.0f);
}
@ -134,6 +137,7 @@ OCL_TEST_P(PyrUp, Mat)
OCL_INSTANTIATE_TEST_CASE_P(ImgprocPyr, PyrUp, Combine(
Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F),
Values(1, 2, 3, 4),
Values((BorderType)BORDER_REFLECT_101),
Bool()
));

Loading…
Cancel
Save