generalized OpenCL version of cv::sepFilter2D; removed some restrictions and added 3-channels support

pull/2491/head
Ilya Lavrenov 11 years ago
parent b449b0bf71
commit 291458a859
  1. 68
      modules/imgproc/src/filter.cpp
  2. 47
      modules/imgproc/src/opencl/filterSepCol.cl
  3. 45
      modules/imgproc/src/opencl/filterSepRow.cl
  4. 2
      modules/imgproc/test/ocl/test_filters.cpp
  5. 17
      modules/imgproc/test/ocl/test_sepfilter2D.cpp

@ -41,7 +41,6 @@
//M*/ //M*/
#include "precomp.hpp" #include "precomp.hpp"
#define CV_OPENCL_RUN_ASSERT
#include "opencl_kernels.hpp" #include "opencl_kernels.hpp"
#include <sstream> #include <sstream>
@ -3318,11 +3317,16 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,
return kernel.run(2, globalsize, localsize, true); return kernel.run(2, globalsize, localsize, true);
} }
static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor, int borderType) static bool ocl_sepRowFilter2D(const UMat & src, UMat & buf, const Mat & kernelX, int anchor,
int borderType, int ddepth, bool fast8uc1)
{ {
int type = src.type(), cn = CV_MAT_CN(type), sdepth = CV_MAT_DEPTH(type); int type = src.type(), cn = CV_MAT_CN(type), sdepth = CV_MAT_DEPTH(type);
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
Size bufSize = buf.size(); Size bufSize = buf.size();
if (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F))
return false;
#ifdef ANDROID #ifdef ANDROID
size_t localsize[2] = {16, 10}; size_t localsize[2] = {16, 10};
#else #else
@ -3330,7 +3334,7 @@ static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor,
#endif #endif
size_t globalsize[2] = {DIVUP(bufSize.width, localsize[0]) * localsize[0], DIVUP(bufSize.height, localsize[1]) * localsize[1]}; size_t globalsize[2] = {DIVUP(bufSize.width, localsize[0]) * localsize[0], DIVUP(bufSize.height, localsize[1]) * localsize[1]};
if (type == CV_8UC1) if (fast8uc1)
globalsize[0] = DIVUP((bufSize.width + 3) >> 2, localsize[0]) * localsize[0]; globalsize[0] = DIVUP((bufSize.width + 3) >> 2, localsize[0]) * localsize[0];
int radiusX = anchor, radiusY = (buf.rows - src.rows) >> 1; int radiusX = anchor, radiusY = (buf.rows - src.rows) >> 1;
@ -3346,20 +3350,21 @@ static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor,
char cvt[40]; char cvt[40];
cv::String build_options = cv::format("-D RADIUSX=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D %s -D %s" cv::String build_options = cv::format("-D RADIUSX=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D %s -D %s"
" -D srcT=%s -D dstT=%s -D convertToDstT=%s -D srcT1=%s -D dstT1=%s", " -D srcT=%s -D dstT=%s -D convertToDstT=%s -D srcT1=%s -D dstT1=%s%s",
radiusX, (int)localsize[0], (int)localsize[1], cn, btype, radiusX, (int)localsize[0], (int)localsize[1], cn, btype,
extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION", extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION",
isolated ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED", isolated ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED",
ocl::typeToStr(type), ocl::typeToStr(CV_32FC(cn)), ocl::typeToStr(type), ocl::typeToStr(CV_32FC(cn)),
ocl::convertTypeStr(sdepth, CV_32F, cn, cvt), ocl::convertTypeStr(sdepth, CV_32F, cn, cvt),
ocl::typeToStr(sdepth), ocl::typeToStr(CV_32F)); ocl::typeToStr(sdepth), ocl::typeToStr(CV_32F),
doubleSupport ? " -D DOUBLE_SUPPORT" : "");
build_options += ocl::kernelToStr(kernelX, CV_32F); build_options += ocl::kernelToStr(kernelX, CV_32F);
Size srcWholeSize; Point srcOffset; Size srcWholeSize; Point srcOffset;
src.locateROI(srcWholeSize, srcOffset); src.locateROI(srcWholeSize, srcOffset);
String kernelName("row_filter"); String kernelName("row_filter");
if (type == CV_8UC1) if (fast8uc1)
kernelName += "_C1_D0"; kernelName += "_C1_D0";
ocl::Kernel k(kernelName.c_str(), cv::ocl::imgproc::filterSepRow_oclsrc, ocl::Kernel k(kernelName.c_str(), cv::ocl::imgproc::filterSepRow_oclsrc,
@ -3367,16 +3372,25 @@ static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor,
if (k.empty()) if (k.empty())
return false; return false;
if (fast8uc1)
k.args(ocl::KernelArg::PtrReadOnly(src), (int)(src.step / src.elemSize()), srcOffset.x, k.args(ocl::KernelArg::PtrReadOnly(src), (int)(src.step / src.elemSize()), srcOffset.x,
srcOffset.y, src.cols, src.rows, srcWholeSize.width, srcWholeSize.height, srcOffset.y, src.cols, src.rows, srcWholeSize.width, srcWholeSize.height,
ocl::KernelArg::PtrWriteOnly(buf), (int)(buf.step / buf.elemSize()), ocl::KernelArg::PtrWriteOnly(buf), (int)(buf.step / buf.elemSize()),
buf.cols, buf.rows, radiusY); buf.cols, buf.rows, radiusY);
else
k.args(ocl::KernelArg::PtrReadOnly(src), (int)src.step, srcOffset.x,
srcOffset.y, src.cols, src.rows, srcWholeSize.width, srcWholeSize.height,
ocl::KernelArg::PtrWriteOnly(buf), (int)buf.step, buf.cols, buf.rows, radiusY);
return k.run(2, globalsize, localsize, false); return k.run(2, globalsize, localsize, false);
} }
static bool ocl_sepColFilter2D(const UMat &buf, UMat &dst, Mat &kernelY, int anchor) static bool ocl_sepColFilter2D(const UMat & buf, UMat & dst, const Mat & kernelY, int anchor)
{ {
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
if (dst.depth() == CV_64F && !doubleSupport)
return false;
#ifdef ANDROID #ifdef ANDROID
size_t localsize[2] = { 16, 10 }; size_t localsize[2] = { 16, 10 };
#else #else
@ -3388,18 +3402,17 @@ static bool ocl_sepColFilter2D(const UMat &buf, UMat &dst, Mat &kernelY, int anc
Size sz = dst.size(); Size sz = dst.size();
globalsize[1] = DIVUP(sz.height, localsize[1]) * localsize[1]; globalsize[1] = DIVUP(sz.height, localsize[1]) * localsize[1];
if (dtype == CV_8UC2)
globalsize[0] = DIVUP((sz.width + 1) / 2, localsize[0]) * localsize[0];
else
globalsize[0] = DIVUP(sz.width, localsize[0]) * localsize[0]; globalsize[0] = DIVUP(sz.width, localsize[0]) * localsize[0];
char cvt[40]; char cvt[40];
cv::String build_options = cv::format("-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d" cv::String build_options = cv::format("-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d"
" -D srcT=%s -D dstT=%s -D convertToDstT=%s", " -D srcT=%s -D dstT=%s -D convertToDstT=%s"
" -D srcT1=%s -D dstT1=%s%s",
anchor, (int)localsize[0], (int)localsize[1], cn, anchor, (int)localsize[0], (int)localsize[1], cn,
ocl::typeToStr(buf.type()), ocl::typeToStr(dtype), ocl::typeToStr(buf.type()), ocl::typeToStr(dtype),
ocl::convertTypeStr(CV_32F, ddepth, cn, cvt)); ocl::convertTypeStr(CV_32F, ddepth, cn, cvt),
ocl::typeToStr(CV_32F), ocl::typeToStr(ddepth),
doubleSupport ? " -D DOUBLE_SUPPORT" : "");
build_options += ocl::kernelToStr(kernelY, CV_32F); build_options += ocl::kernelToStr(kernelY, CV_32F);
ocl::Kernel k("col_filter", cv::ocl::imgproc::filterSepCol_oclsrc, ocl::Kernel k("col_filter", cv::ocl::imgproc::filterSepCol_oclsrc,
@ -3407,13 +3420,13 @@ static bool ocl_sepColFilter2D(const UMat &buf, UMat &dst, Mat &kernelY, int anc
if (k.empty()) if (k.empty())
return false; return false;
k.args(ocl::KernelArg::PtrReadOnly(buf), (int)(buf.step / buf.elemSize()), buf.cols, k.args(ocl::KernelArg::ReadOnly(buf), ocl::KernelArg::WriteOnly(dst));
buf.rows, ocl::KernelArg::PtrWriteOnly(dst), (int)(dst.offset / dst.elemSize()),
(int)(dst.step / dst.elemSize()), dst.cols, dst.rows);
return k.run(2, globalsize, localsize, false); return k.run(2, globalsize, localsize, false);
} }
#if 0
const int optimizedSepFilterLocalSize = 16; const int optimizedSepFilterLocalSize = 16;
static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst, static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst,
@ -3471,18 +3484,19 @@ static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst,
return k.run(2, gt2, lt2, false); return k.run(2, gt2, lt2, false);
} }
#endif
static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth, static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
InputArray _kernelX, InputArray _kernelY, Point anchor, InputArray _kernelX, InputArray _kernelY, Point anchor,
double delta, int borderType ) double delta, int borderType )
{ {
Size imgSize = _src.size(); // Size imgSize = _src.size();
if (abs(delta)> FLT_MIN) if (abs(delta)> FLT_MIN)
return false; return false;
int type = _src.type(), cn = CV_MAT_CN(type); int type = _src.type(), cn = CV_MAT_CN(type);
if ( !( (type == CV_8UC1 || type == CV_8UC4 || type == CV_32FC1 || type == CV_32FC4) && if (cn > 4)
(ddepth == CV_32F || ddepth == CV_16S || ddepth == CV_8U || ddepth < 0) ) )
return false; return false;
Mat kernelX = _kernelX.getMat().reshape(1, 1); Mat kernelX = _kernelX.getMat().reshape(1, 1);
@ -3501,9 +3515,6 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
if (ddepth < 0) if (ddepth < 0)
ddepth = sdepth; ddepth = sdepth;
// printf("%d %d\n", imgSize.width, optimizedSepFilterLocalSize + (kernelX.rows >> 1));
// printf("%d %d\n", imgSize.height, optimizedSepFilterLocalSize + (kernelY.rows >> 1));
// CV_OCL_RUN_(kernelY.rows <= 21 && kernelX.rows <= 21 && // CV_OCL_RUN_(kernelY.rows <= 21 && kernelX.rows <= 21 &&
// imgSize.width > optimizedSepFilterLocalSize + (kernelX.rows >> 1) && // imgSize.width > optimizedSepFilterLocalSize + (kernelX.rows >> 1) &&
// imgSize.height > optimizedSepFilterLocalSize + (kernelY.rows >> 1), // imgSize.height > optimizedSepFilterLocalSize + (kernelY.rows >> 1),
@ -3512,20 +3523,19 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
UMat src = _src.getUMat(); UMat src = _src.getUMat();
Size srcWholeSize; Point srcOffset; Size srcWholeSize; Point srcOffset;
src.locateROI(srcWholeSize, srcOffset); src.locateROI(srcWholeSize, srcOffset);
if ( (0 != (srcOffset.x % 4)) ||
(0 != (src.cols % 4)) || bool fast8uc1 = type == CV_8UC1 && srcOffset.x % 4 == 0 &&
(0 != ((src.step / src.elemSize()) % 4)) src.cols % 4 == 0 && src.step % 4 == 0;
)
return false;
Size srcSize = src.size(); Size srcSize = src.size();
Size bufSize(srcSize.width, srcSize.height + kernelY.cols - 1); Size bufSize(srcSize.width, srcSize.height + kernelY.cols - 1);
UMat buf; buf.create(bufSize, CV_MAKETYPE(CV_32F, cn)); UMat buf(bufSize, CV_32FC(cn));
if (!ocl_sepRowFilter2D(src, buf, kernelX, anchor.x, borderType)) if (!ocl_sepRowFilter2D(src, buf, kernelX, anchor.x, borderType, ddepth, fast8uc1))
return false; return false;
_dst.create(srcSize, CV_MAKETYPE(ddepth, cn)); _dst.create(srcSize, CV_MAKETYPE(ddepth, cn));
UMat dst = _dst.getUMat(); UMat dst = _dst.getUMat();
return ocl_sepColFilter2D(buf, dst, kernelY, anchor.y); return ocl_sepColFilter2D(buf, dst, kernelY, anchor.y);
} }

@ -34,29 +34,36 @@
// //
// //
#ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
#define READ_TIMES_COL ((2*(RADIUSY+LSIZE1)-1)/LSIZE1) #define READ_TIMES_COL ((2*(RADIUSY+LSIZE1)-1)/LSIZE1)
#define RADIUS 1 #define RADIUS 1
#define noconvert #define noconvert
/********************************************************************************** #if CN != 3
These kernels are written for separable filters such as Sobel, Scharr, GaussianBlur. #define loadpix(addr) *(__global const srcT *)(addr)
Now(6/29/2011) the kernels only support 8U data type and the anchor of the convovle #define storepix(val, addr) *(__global dstT *)(addr) = val
kernel must be in the center. ROI is not supported either. #define SRCSIZE (int)sizeof(srcT)
Each kernels read 4 elements(not 4 pixels), save them to LDS and read the data needed #define DSTSIZE (int)sizeof(dstT)
from LDS to calculate the result. #else
The length of the convovle kernel supported is only related to the MAX size of LDS, #define loadpix(addr) vload3(0, (__global const srcT1 *)(addr))
which is HW related. #define storepix(val, addr) vstore3(val, 0, (__global dstT1 *)(addr))
Niko #define SRCSIZE (int)sizeof(srcT1)*3
6/29/2011 #define DSTSIZE (int)sizeof(dstT1)*3
The info above maybe obsolete. #endif
***********************************************************************************/
#define DIG(a) a, #define DIG(a) a,
__constant float mat_kernel[] = { COEFF }; __constant float mat_kernel[] = { COEFF };
__kernel void col_filter(__global const srcT * src, int src_step_in_pixel, int src_whole_cols, int src_whole_rows, __kernel void col_filter(__global const uchar * src, int src_step, int src_offset, int src_whole_rows, int src_whole_cols,
__global dstT * dst, int dst_offset_in_pixel, int dst_step_in_pixel, int dst_cols, int dst_rows) __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols)
{ {
int x = get_global_id(0); int x = get_global_id(0);
int y = get_global_id(1); int y = get_global_id(1);
@ -64,8 +71,8 @@ __kernel void col_filter(__global const srcT * src, int src_step_in_pixel, int s
int l_x = get_local_id(0); int l_x = get_local_id(0);
int l_y = get_local_id(1); int l_y = get_local_id(1);
int start_addr = mad24(y, src_step_in_pixel, x); int start_addr = mad24(y, src_step, x * SRCSIZE);
int end_addr = mad24(src_whole_rows - 1, src_step_in_pixel, src_whole_cols); int end_addr = mad24(src_whole_rows - 1, src_step, src_whole_cols * SRCSIZE);
srcT sum, temp[READ_TIMES_COL]; srcT sum, temp[READ_TIMES_COL];
__local srcT LDS_DAT[LSIZE1 * READ_TIMES_COL][LSIZE0 + 1]; __local srcT LDS_DAT[LSIZE1 * READ_TIMES_COL][LSIZE0 + 1];
@ -73,9 +80,9 @@ __kernel void col_filter(__global const srcT * src, int src_step_in_pixel, int s
// read pixels from src // read pixels from src
for (int i = 0; i < READ_TIMES_COL; ++i) for (int i = 0; i < READ_TIMES_COL; ++i)
{ {
int current_addr = mad24(i, LSIZE1 * src_step_in_pixel, start_addr); int current_addr = mad24(i, LSIZE1 * src_step, start_addr);
current_addr = current_addr < end_addr ? current_addr : 0; current_addr = current_addr < end_addr ? current_addr : 0;
temp[i] = src[current_addr]; temp[i] = loadpix(src + current_addr);
} }
// save pixels to lds // save pixels to lds
@ -95,7 +102,7 @@ __kernel void col_filter(__global const srcT * src, int src_step_in_pixel, int s
// write the result to dst // write the result to dst
if (x < dst_cols && y < dst_rows) if (x < dst_cols && y < dst_rows)
{ {
start_addr = mad24(y, dst_step_in_pixel, x + dst_offset_in_pixel); start_addr = mad24(y, dst_step, mad24(DSTSIZE, x, dst_offset));
dst[start_addr] = convertToDstT(sum); storepix(convertToDstT(sum), dst + start_addr);
} }
} }

@ -34,6 +34,14 @@
// //
// //
#ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
#define READ_TIMES_ROW ((2*(RADIUSX+LSIZE0)-1)/LSIZE0) //for c4 only #define READ_TIMES_ROW ((2*(RADIUSX+LSIZE0)-1)/LSIZE0) //for c4 only
#define RADIUS 1 #define RADIUS 1
@ -117,16 +125,16 @@
#define noconvert #define noconvert
#if cn != 3 #if CN != 3
#define loadpix(addr) *(__global const srcT *)(addr) #define loadpix(addr) *(__global const srcT *)(addr)
#define storepix(val, addr) *(__global dstT *)(addr) = val #define storepix(val, addr) *(__global dstT *)(addr) = val
#define SRCSIZE ((int)sizeof(srcT)) #define SRCSIZE (int)sizeof(srcT)
#define DSTSIZE ((int)sizeof(dstT)) #define DSTSIZE (int)sizeof(dstT)
#else #else
#define loadpix(addr) vload3(0, (__global const srcT1 *)(addr)) #define loadpix(addr) vload3(0, (__global const srcT1 *)(addr))
#define storepix(val, addr) vstore3(val, 0, (__global dstT1 *)(addr)) #define storepix(val, addr) vstore3(val, 0, (__global dstT1 *)(addr))
#define SRCSIZE ((int)sizeof(srcT1)*3) #define SRCSIZE (int)sizeof(srcT1)*3
#define DSTSIZE ((int)sizeof(dstT1)*3) #define DSTSIZE (int)sizeof(dstT1)*3
#endif #endif
#define DIG(a) a, #define DIG(a) a,
@ -269,32 +277,33 @@ __kernel void row_filter_C1_D0(__global const uchar * src, int src_step_in_pixel
dst[start_addr] = sum.x; dst[start_addr] = sum.x;
} }
__kernel void row_filter(__global const srcT * src, int src_step_in_pixel, int src_offset_x, int src_offset_y, __kernel void row_filter(__global const uchar * src, int src_step, int src_offset_x, int src_offset_y,
int src_cols, int src_rows, int src_whole_cols, int src_whole_rows, int src_cols, int src_rows, int src_whole_cols, int src_whole_rows,
__global dstT * dst, int dst_step_in_pixel, int dst_cols, int dst_rows, __global uchar * dst, int dst_step, int dst_cols, int dst_rows,
int radiusy) int radiusy)
{ {
int x = get_global_id(0); int x = get_global_id(0);
int y = get_global_id(1); int y = get_global_id(1);
int l_x = get_local_id(0); int l_x = get_local_id(0);
int l_y = get_local_id(1); int l_y = get_local_id(1);
int start_x = x + src_offset_x - RADIUSX; int start_x = x + src_offset_x - RADIUSX;
int start_y = y + src_offset_y - radiusy; int start_y = y + src_offset_y - radiusy;
int start_addr = mad24(start_y, src_step_in_pixel, start_x); int start_addr = mad24(start_y, src_step, start_x * SRCSIZE);
dstT sum; dstT sum;
srcT temp[READ_TIMES_ROW]; srcT temp[READ_TIMES_ROW];
__local srcT LDS_DAT[LSIZE1][READ_TIMES_ROW * LSIZE0 + 1]; __local srcT LDS_DAT[LSIZE1][READ_TIMES_ROW * LSIZE0 + 1];
#ifdef BORDER_CONSTANT #ifdef BORDER_CONSTANT
int end_addr = mad24(src_whole_rows - 1, src_step_in_pixel, src_whole_cols); int end_addr = mad24(src_whole_rows - 1, src_step, src_whole_cols * SRCSIZE);
// read pixels from src // read pixels from src
for (int i = 0; i < READ_TIMES_ROW; i++) for (int i = 0; i < READ_TIMES_ROW; i++)
{ {
int current_addr = mad24(i, LSIZE0, start_addr); int current_addr = mad24(i, LSIZE0 * SRCSIZE, start_addr);
current_addr = current_addr < end_addr && current_addr > 0 ? current_addr : 0; current_addr = current_addr < end_addr && current_addr >= 0 ? current_addr : 0;
temp[i] = src[current_addr]; temp[i] = loadpix(src + current_addr);
} }
// judge if read out of boundary // judge if read out of boundary
@ -312,8 +321,7 @@ __kernel void row_filter(__global const srcT * src, int src_step_in_pixel, int s
} }
#endif #endif
#else #else
int index[READ_TIMES_ROW]; int index[READ_TIMES_ROW], s_x, s_y;
int s_x, s_y;
// judge if read out of boundary // judge if read out of boundary
for (int i = 0; i < READ_TIMES_ROW; ++i) for (int i = 0; i < READ_TIMES_ROW; ++i)
@ -328,12 +336,12 @@ __kernel void row_filter(__global const srcT * src, int src_step_in_pixel, int s
EXTRAPOLATE(s_x, 0, src_whole_cols); EXTRAPOLATE(s_x, 0, src_whole_cols);
EXTRAPOLATE(s_y, 0, src_whole_rows); EXTRAPOLATE(s_y, 0, src_whole_rows);
#endif #endif
index[i] = mad24(s_y, src_step_in_pixel, s_x); index[i] = mad24(s_y, src_step, s_x * SRCSIZE);
} }
// read pixels from src // read pixels from src
for (int i = 0; i < READ_TIMES_ROW; ++i) for (int i = 0; i < READ_TIMES_ROW; ++i)
temp[i] = src[index[i]]; temp[i] = loadpix(src + index[i]);
#endif // BORDER_CONSTANT #endif // BORDER_CONSTANT
// save pixels to lds // save pixels to lds
@ -349,10 +357,11 @@ __kernel void row_filter(__global const srcT * src, int src_step_in_pixel, int s
temp[1] = LDS_DAT[l_y][l_x + RADIUSX + i]; temp[1] = LDS_DAT[l_y][l_x + RADIUSX + i];
sum += mad(convertToDstT(temp[0]), mat_kernel[RADIUSX - i], convertToDstT(temp[1]) * mat_kernel[RADIUSX + i]); sum += mad(convertToDstT(temp[0]), mat_kernel[RADIUSX - i], convertToDstT(temp[1]) * mat_kernel[RADIUSX + i]);
} }
// write the result to dst // write the result to dst
if (x < dst_cols && y < dst_rows) if (x < dst_cols && y < dst_rows)
{ {
start_addr = mad24(y, dst_step_in_pixel, x); start_addr = mad24(y, dst_step, x * DSTSIZE);
dst[start_addr] = sum; storepix(sum, dst + start_addr);
} }
} }

@ -312,7 +312,7 @@ OCL_TEST_P(MorphologyEx, Mat)
(int)BORDER_REFLECT|BORDER_ISOLATED, (int)BORDER_WRAP|BORDER_ISOLATED, \ (int)BORDER_REFLECT|BORDER_ISOLATED, (int)BORDER_WRAP|BORDER_ISOLATED, \
(int)BORDER_REFLECT_101|BORDER_ISOLATED*/) // WRAP and ISOLATED are not supported by cv:: version (int)BORDER_REFLECT_101|BORDER_ISOLATED*/) // WRAP and ISOLATED are not supported by cv:: version
#define FILTER_TYPES Values(CV_8UC1, CV_8UC2, CV_8UC4, CV_32FC1, CV_32FC4, CV_64FC1, CV_64FC4) #define FILTER_TYPES Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4)
OCL_INSTANTIATE_TEST_CASE_P(Filter, Bilateral, Combine( OCL_INSTANTIATE_TEST_CASE_P(Filter, Bilateral, Combine(
Values((MatType)CV_8UC1), Values((MatType)CV_8UC1),

@ -75,9 +75,9 @@ PARAM_TEST_CASE(SepFilter2D, MatDepth, Channels, BorderType, bool, bool)
void random_roi() void random_roi()
{ {
Size ksize = randomSize(kernelMinSize, kernelMaxSize); Size ksize = randomSize(kernelMinSize, kernelMaxSize);
if (1 != (ksize.width % 2)) if (1 != ksize.width % 2)
ksize.width++; ksize.width++;
if (1 != (ksize.height % 2)) if (1 != ksize.height % 2)
ksize.height++; ksize.height++;
Mat temp = randomMat(Size(ksize.width, 1), CV_MAKE_TYPE(CV_32F, 1), -MAX_VALUE, MAX_VALUE); Mat temp = randomMat(Size(ksize.width, 1), CV_MAKE_TYPE(CV_32F, 1), -MAX_VALUE, MAX_VALUE);
@ -86,24 +86,22 @@ PARAM_TEST_CASE(SepFilter2D, MatDepth, Channels, BorderType, bool, bool)
cv::normalize(temp, kernelY, 1.0, 0.0, NORM_L1); cv::normalize(temp, kernelY, 1.0, 0.0, NORM_L1);
Size roiSize = randomSize(ksize.width + 16, MAX_VALUE, ksize.height + 20, MAX_VALUE); Size roiSize = randomSize(ksize.width + 16, MAX_VALUE, ksize.height + 20, MAX_VALUE);
std::cout << roiSize << std::endl;
int rest = roiSize.width % 4; int rest = roiSize.width % 4;
if (0 != rest) if (rest != 0)
roiSize.width += (4 - rest); roiSize.width += (4 - rest);
Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0); Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
rest = srcBorder.lef % 4; rest = srcBorder.lef % 4;
if (0 != rest) if (rest != 0)
srcBorder.lef += (4 - rest); srcBorder.lef += (4 - rest);
rest = srcBorder.rig % 4; rest = srcBorder.rig % 4;
if (0 != rest) if (rest != 0)
srcBorder.rig += (4 - rest); srcBorder.rig += (4 - rest);
randomSubMat(src, src_roi, roiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE); randomSubMat(src, src_roi, roiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE);
Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0); Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(dst, dst_roi, roiSize, dstBorder, type, -MAX_VALUE, MAX_VALUE); randomSubMat(dst, dst_roi, roiSize, dstBorder, type, -MAX_VALUE, MAX_VALUE);
anchor.x = -1; anchor.x = anchor.y = -1;
anchor.y = -1;
UMAT_UPLOAD_INPUT_PARAMETER(src) UMAT_UPLOAD_INPUT_PARAMETER(src)
UMAT_UPLOAD_OUTPUT_PARAMETER(dst) UMAT_UPLOAD_OUTPUT_PARAMETER(dst)
@ -128,11 +126,10 @@ OCL_TEST_P(SepFilter2D, Mat)
} }
} }
OCL_INSTANTIATE_TEST_CASE_P(ImageProc, SepFilter2D, OCL_INSTANTIATE_TEST_CASE_P(ImageProc, SepFilter2D,
Combine( Combine(
Values(CV_8U, CV_32F), Values(CV_8U, CV_32F),
Values(1, 4), OCL_ALL_CHANNELS,
Values( Values(
(BorderType)BORDER_CONSTANT, (BorderType)BORDER_CONSTANT,
(BorderType)BORDER_REPLICATE, (BorderType)BORDER_REPLICATE,

Loading…
Cancel
Save