Merge pull request #1878 from ilya-lavrenov:ocl_resize_area_master

pull/1895/merge
Andrey Pavlenko 11 years ago committed by OpenCV Buildbot
commit daefe6983f
  1. 8
      modules/core/include/opencv2/core/ocl.hpp
  2. 16
      modules/core/src/ocl.cpp
  3. 8
      modules/core/test/test_umat.cpp
  4. 8
      modules/imgproc/src/color.cpp
  5. 142
      modules/imgproc/src/imgwarp.cpp
  6. 99
      modules/imgproc/src/opencl/resize.cl
  7. 15
      modules/imgproc/test/ocl/test_warp.cpp
  8. 2
      modules/ts/include/opencv2/ts/ocl_test.hpp

@ -250,8 +250,12 @@ public:
KernelArg();
static KernelArg Local() { return KernelArg(LOCAL, 0); }
static KernelArg PtrOnly(const UMat & m)
{ return KernelArg(PTR_ONLY, (UMat*)&m); }
static KernelArg PtrWriteOnly(const UMat& m)
{ return KernelArg(PTR_ONLY+WRITE_ONLY, (UMat*)&m); }
static KernelArg PtrReadOnly(const UMat& m)
{ return KernelArg(PTR_ONLY+READ_ONLY, (UMat*)&m); }
static KernelArg PtrReadWrite(const UMat& m)
{ return KernelArg(PTR_ONLY+READ_WRITE, (UMat*)&m); }
static KernelArg ReadWrite(const UMat& m, int wscale=1)
{ return KernelArg(READ_WRITE, (UMat*)&m, wscale); }
static KernelArg ReadWriteNoSize(const UMat& m, int wscale=1)

@ -2197,10 +2197,10 @@ int Kernel::set(int i, const UMat& m)
int Kernel::set(int i, const KernelArg& arg)
{
CV_Assert( i >= 0 );
if( i == 0 )
p->cleanupUMats();
if( !p || !p->handle )
return -1;
if( i == 0 )
p->cleanupUMats();
if( arg.m )
{
int accessFlags = ((arg.flags & KernelArg::READ_ONLY) ? ACCESS_READ : 0) +
@ -2222,7 +2222,7 @@ int Kernel::set(int i, const KernelArg& arg)
{
int cols = u2d.cols*arg.wscale;
clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d.rows), &u2d.rows);
clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u2d.cols), &cols);
clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(cols), &cols);
i += 2;
}
}
@ -2256,10 +2256,17 @@ bool Kernel::run(int dims, size_t globalsize[], size_t localsize[],
{
if(!p || !p->handle || p->e != 0)
return false;
AutoBuffer<size_t> _globalSize(dims);
size_t * globalSizePtr = (size_t *)_globalSize;
for (int i = 0; i < dims; ++i)
globalSizePtr[i] = localsize == NULL ? globalsize[i] :
((globalsize[i] + localsize[i] - 1) / localsize[i]) * localsize[i];
cl_command_queue qq = getQueue(q);
size_t offset[CV_MAX_DIM] = {0};
cl_int retval = clEnqueueNDRangeKernel(qq, p->handle, (cl_uint)dims,
offset, globalsize, localsize, 0, 0,
offset, globalSizePtr, localsize, 0, 0,
sync ? 0 : &p->e);
if( sync || retval < 0 )
{
@ -2350,6 +2357,7 @@ struct Program::Impl
void** deviceList = deviceListBuf;
for( i = 0; i < n; i++ )
deviceList[i] = ctx.device(i).ptr();
retval = clBuildProgram(handle, n,
(const cl_device_id*)deviceList,
buildflags.c_str(), 0, 0);

@ -107,8 +107,8 @@ bool CV_UMatTest::TestUMat()
ra += Scalar::all(1.f);
{
Mat temp = ura.getMat(ACCESS_RW);
temp += Scalar::all(1.f);
Mat temp = ura.getMat(ACCESS_RW);
temp += Scalar::all(1.f);
}
ra.copyTo(rb);
CHECK_DIFF(ra, rb);
@ -146,8 +146,8 @@ bool CV_UMatTest::TestUMat()
CHECK_DIFF(rc0, rc);
{
UMat tmp = rc0.getUMat(ACCESS_WRITE);
cv::max(ura, urb, tmp);
UMat tmp = rc0.getUMat(ACCESS_WRITE);
cv::max(ura, urb, tmp);
}
CHECK_DIFF(rc0, rc);

@ -2875,7 +2875,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
k.create("RGB2XYZ", ocl::imgproc::cvtcolor_oclsrc,
format("-D depth=%d -D scn=%d -D dcn=3 -D bidx=%d", depth, scn, bidx));
k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::PtrOnly(c));
k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::PtrReadOnly(c));
return k.run(2, globalsize, 0, false);
}
case COLOR_XYZ2BGR: case COLOR_XYZ2RGB:
@ -2924,7 +2924,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
k.create("XYZ2RGB", ocl::imgproc::cvtcolor_oclsrc,
format("-D depth=%d -D scn=3 -D dcn=%d -D bidx=%d", depth, dcn, bidx));
k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::PtrOnly(c));
k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::PtrReadOnly(c));
return k.run(2, globalsize, 0, false);
}
case COLOR_BGR2HSV: case COLOR_RGB2HSV: case COLOR_BGR2HSV_FULL: case COLOR_RGB2HSV_FULL:
@ -2980,8 +2980,8 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
depth, hrange, bidx, scn));
k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst),
ocl::KernelArg::PtrOnly(sdiv_data), hrange == 256 ? ocl::KernelArg::PtrOnly(hdiv_data256) :
ocl::KernelArg::PtrOnly(hdiv_data180));
ocl::KernelArg::PtrReadOnly(sdiv_data), hrange == 256 ? ocl::KernelArg::PtrReadOnly(hdiv_data256) :
ocl::KernelArg::PtrReadOnly(hdiv_data180));
return k.run(2, globalsize, NULL, false);
}

@ -48,8 +48,6 @@
#include "precomp.hpp"
#include "opencl_kernels.hpp"
#include <iostream>
#include <vector>
#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
static IppStatus sts = ippInit();
@ -1902,18 +1900,72 @@ private:
};
#endif
static void ocl_computeResizeAreaTabs(int ssize, int dsize, double scale, int * const map_tab,
float * const alpha_tab, int * const ofs_tab)
{
int k = 0, dx = 0;
for ( ; dx < dsize; dx++)
{
ofs_tab[dx] = k;
double fsx1 = dx * scale;
double fsx2 = fsx1 + scale;
double cellWidth = std::min(scale, ssize - fsx1);
int sx1 = cvCeil(fsx1), sx2 = cvFloor(fsx2);
sx2 = std::min(sx2, ssize - 1);
sx1 = std::min(sx1, sx2);
if (sx1 - fsx1 > 1e-3)
{
map_tab[k] = sx1 - 1;
alpha_tab[k++] = (float)((sx1 - fsx1) / cellWidth);
}
for (int sx = sx1; sx < sx2; sx++)
{
map_tab[k] = sx;
alpha_tab[k++] = float(1.0 / cellWidth);
}
if (fsx2 - sx2 > 1e-3)
{
map_tab[k] = sx2;
alpha_tab[k++] = (float)(std::min(std::min(fsx2 - sx2, 1.), cellWidth) / cellWidth);
}
}
ofs_tab[dx] = k;
}
static void ocl_computeResizeAreaFastTabs(int * dmap_tab, int * smap_tab, int scale, int dcols, int scol)
{
for (int i = 0; i < dcols; ++i)
dmap_tab[i] = scale * i;
for (int i = 0, size = dcols * scale; i < size; ++i)
smap_tab[i] = std::min(scol - 1, i);
}
static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize,
double fx, double fy, int interpolation)
{
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
if( !(cn <= 4 &&
(interpolation == INTER_NEAREST ||
(interpolation == INTER_LINEAR))) )
double inv_fx = 1. / fx, inv_fy = 1. / fy;
float inv_fxf = (float)inv_fx, inv_fyf = (float)inv_fy;
if( cn == 3 || !(cn <= 4 &&
(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR ||
(interpolation == INTER_AREA && inv_fx >= 1 && inv_fy >= 1) )) )
return false;
UMat src = _src.getUMat();
_dst.create(dsize, type);
UMat dst = _dst.getUMat();
ocl::Kernel k;
size_t globalsize[] = { dst.cols, dst.rows };
if (interpolation == INTER_LINEAR)
{
@ -1929,14 +1981,86 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize,
else if (interpolation == INTER_NEAREST)
{
k.create("resizeNN", ocl::imgproc::resize_oclsrc,
format("-D INTER_NEAREST -D PIXTYPE=%s", ocl::memopTypeToStr(type) ));
format("-D INTER_NEAREST -D PIXTYPE=%s -D cn", ocl::memopTypeToStr(type), cn));
}
else if (interpolation == INTER_AREA)
{
int iscale_x = saturate_cast<int>(inv_fx);
int iscale_y = saturate_cast<int>(inv_fy);
bool is_area_fast = std::abs(inv_fx - iscale_x) < DBL_EPSILON &&
std::abs(inv_fy - iscale_y) < DBL_EPSILON;
int wdepth = std::max(depth, is_area_fast ? CV_32S : CV_32F);
int wtype = CV_MAKE_TYPE(wdepth, cn);
char cvt[2][40];
String buildOption = format("-D INTER_AREA -D T=%s -D WTV=%s -D convertToWTV=%s",
ocl::typeToStr(type), ocl::typeToStr(wtype),
ocl::convertTypeStr(depth, wdepth, cn, cvt[0]));
UMat alphaOcl, tabofsOcl, mapOcl;
UMat dmap, smap;
if (is_area_fast)
{
int wdepth2 = std::max(CV_32F, depth), wtype2 = CV_MAKE_TYPE(wdepth2, cn);
buildOption = buildOption + format(" -D convertToT=%s -D WT2V=%s -D convertToWT2V=%s -D INTER_AREA_FAST"
" -D XSCALE=%d -D YSCALE=%d -D SCALE=%f",
ocl::convertTypeStr(wdepth2, depth, cn, cvt[0]),
ocl::typeToStr(wtype2), ocl::convertTypeStr(wdepth, wdepth2, cn, cvt[1]),
iscale_x, iscale_y, 1.0f / (iscale_x * iscale_y));
k.create("resizeAREA_FAST", ocl::imgproc::resize_oclsrc, buildOption);
int smap_tab_size = dst.cols * iscale_x + dst.rows * iscale_y;
AutoBuffer<int> dmap_tab(dst.cols + dst.rows), smap_tab(smap_tab_size);
int * dxmap_tab = dmap_tab, * dymap_tab = dxmap_tab + dst.cols;
int * sxmap_tab = smap_tab, * symap_tab = smap_tab + dst.cols * iscale_y;
ocl_computeResizeAreaFastTabs(dxmap_tab, sxmap_tab, iscale_x, dst.cols, src.cols);
ocl_computeResizeAreaFastTabs(dymap_tab, symap_tab, iscale_y, dst.rows, src.rows);
Mat(1, dst.cols + dst.rows, CV_32SC1, (void *)dmap_tab).copyTo(dmap);
Mat(1, smap_tab_size, CV_32SC1, (void *)smap_tab).copyTo(smap);
}
else
{
buildOption = buildOption + format(" -D convertToT=%s", ocl::convertTypeStr(wdepth, depth, cn, cvt[0]));
k.create("resizeAREA", ocl::imgproc::resize_oclsrc, buildOption);
Size ssize = src.size();
int xytab_size = (ssize.width + ssize.height) << 1;
int tabofs_size = dsize.height + dsize.width + 2;
AutoBuffer<int> _xymap_tab(xytab_size), _xyofs_tab(tabofs_size);
AutoBuffer<float> _xyalpha_tab(xytab_size);
int * xmap_tab = _xymap_tab, * ymap_tab = _xymap_tab + (ssize.width << 1);
float * xalpha_tab = _xyalpha_tab, * yalpha_tab = _xyalpha_tab + (ssize.width << 1);
int * xofs_tab = _xyofs_tab, * yofs_tab = _xyofs_tab + dsize.width + 1;
ocl_computeResizeAreaTabs(ssize.width, dsize.width, inv_fx, xmap_tab, xalpha_tab, xofs_tab);
ocl_computeResizeAreaTabs(ssize.height, dsize.height, inv_fy, ymap_tab, yalpha_tab, yofs_tab);
// loading precomputed arrays to GPU
Mat(1, xytab_size, CV_32FC1, (void *)_xyalpha_tab).copyTo(alphaOcl);
Mat(1, xytab_size, CV_32SC1, (void *)_xymap_tab).copyTo(mapOcl);
Mat(1, tabofs_size, CV_32SC1, (void *)_xyofs_tab).copyTo(tabofsOcl);
}
ocl::KernelArg srcarg = ocl::KernelArg::ReadOnly(src), dstarg = ocl::KernelArg::WriteOnly(dst);
if (is_area_fast)
k.args(srcarg, dstarg, ocl::KernelArg::PtrReadOnly(dmap), ocl::KernelArg::PtrReadOnly(smap));
else
k.args(srcarg, dstarg, inv_fxf, inv_fyf, ocl::KernelArg::PtrReadOnly(tabofsOcl),
ocl::KernelArg::PtrReadOnly(mapOcl), ocl::KernelArg::PtrReadOnly(alphaOcl));
return k.run(2, globalsize, NULL, false);
}
if( k.empty() )
return false;
k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst),
(float)(1./fx), (float)(1./fy));
size_t globalsize[] = { dst.cols, dst.rows };
(float)inv_fx, (float)inv_fy);
return k.run(2, globalsize, 0, false);
}
@ -2069,7 +2193,7 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
}
if( ocl::useOpenCL() && _dst.kind() == _InputArray::UMAT &&
ocl_resize(_src, _dst, dsize, inv_scale_x, inv_scale_y, interpolation) )
ocl_resize(_src, _dst, dsize, inv_scale_x, inv_scale_y, interpolation))
return;
Mat src = _src.getMat();

@ -43,16 +43,8 @@
//
//M*/
// resize kernel
// Currently, CV_8UC1 CV_8UC4 CV_32FC1 and CV_32FC4are supported.
// We shall support other types later if necessary.
#if defined DOUBLE_SUPPORT
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#define F double
#else
#define F float
#endif
#define INTER_RESIZE_COEF_BITS 11
@ -141,8 +133,8 @@ __kernel void resizeNN(__global const uchar* srcptr, int srcstep, int srcoffset,
if( dx < dstcols && dy < dstrows )
{
F s1 = dx*ifx;
F s2 = dy*ify;
float s1 = dx*ifx;
float s2 = dy*ify;
int sx = min(convert_int_rtz(s1), srccols-1);
int sy = min(convert_int_rtz(s2), srcrows-1);
@ -153,4 +145,91 @@ __kernel void resizeNN(__global const uchar* srcptr, int srcstep, int srcoffset,
}
}
#elif defined INTER_AREA
#define TSIZE ((int)(sizeof(T)))
#ifdef INTER_AREA_FAST
__kernel void resizeAREA_FAST(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
__global const int * dmap_tab, __global const int * smap_tab)
{
int dx = get_global_id(0);
int dy = get_global_id(1);
if (dx < dst_cols && dy < dst_rows)
{
int dst_index = mad24(dy, dst_step, dst_offset);
__global const int * xmap_tab = dmap_tab;
__global const int * ymap_tab = dmap_tab + dst_cols;
__global const int * sxmap_tab = smap_tab;
__global const int * symap_tab = smap_tab + XSCALE * dst_cols;
int sx = xmap_tab[dx], sy = ymap_tab[dy];
WTV sum = (WTV)(0);
#pragma unroll
for (int y = 0; y < YSCALE; ++y)
{
int src_index = mad24(symap_tab[y + sy], src_step, src_offset);
#pragma unroll
for (int x = 0; x < XSCALE; ++x)
sum += convertToWTV(((__global const T*)(src + src_index))[sxmap_tab[sx + x]]);
}
((__global T*)(dst + dst_index))[dx] = convertToT(convertToWT2V(sum) * (WT2V)(SCALE));
}
}
#else
__kernel void resizeAREA(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
float ifx, float ify, __global const int * ofs_tab,
__global const int * map_tab, __global const float * alpha_tab)
{
int dx = get_global_id(0);
int dy = get_global_id(1);
if (dx < dst_cols && dy < dst_rows)
{
int dst_index = mad24(dy, dst_step, dst_offset);
__global const int * xmap_tab = map_tab;
__global const int * ymap_tab = (__global const int *)(map_tab + (src_cols << 1));
__global const float * xalpha_tab = alpha_tab;
__global const float * yalpha_tab = (__global const float *)(alpha_tab + (src_cols << 1));
__global const int * xofs_tab = ofs_tab;
__global const int * yofs_tab = (__global const int *)(ofs_tab + dst_cols + 1);
int xk0 = xofs_tab[dx], xk1 = xofs_tab[dx + 1];
int yk0 = yofs_tab[dy], yk1 = yofs_tab[dy + 1];
int sy0 = ymap_tab[yk0], sy1 = ymap_tab[yk1 - 1];
int sx0 = xmap_tab[xk0], sx1 = xmap_tab[xk1 - 1];
WTV sum = (WTV)(0), buf;
int src_index = mad24(sy0, src_step, src_offset);
for (int sy = sy0, yk = yk0; sy <= sy1; ++sy, src_index += src_step, ++yk)
{
WTV beta = (WTV)(yalpha_tab[yk]);
buf = (WTV)(0);
for (int sx = sx0, xk = xk0; sx <= sx1; ++sx, ++xk)
{
WTV alpha = (WTV)(xalpha_tab[xk]);
buf += convertToWTV(((__global const T*)(src + src_index))[sx]) * alpha;
}
sum += buf * beta;
}
((__global T*)(dst + dst_index))[dx] = convertToT(sum);
}
}
#endif
#endif

@ -127,13 +127,20 @@ OCL_TEST_P(Resize, Mat)
/////////////////////////////////////////////////////////////////////////////////////
OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarp, Resize, Combine(
Values(CV_8UC1, CV_8UC4, CV_16UC2, CV_32FC1, CV_32FC4),
Values(0.5, 1.5, 2.0),
Values(0.5, 1.5, 2.0),
OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarpResize, Resize, Combine(
Values((MatType)CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
Values(0.7, 0.4, 2.0),
Values(0.3, 0.6, 2.0),
Values((Interpolation)INTER_NEAREST, (Interpolation)INTER_LINEAR),
Bool()));
OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarpResizeArea, Resize, Combine(
Values((MatType)CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
Values(0.7, 0.4, 0.5),
Values(0.3, 0.6, 0.5),
Values((Interpolation)INTER_AREA),
Bool()));
} } // namespace cvtest::ocl
#endif // HAVE_OPENCL

@ -305,7 +305,7 @@ IMPLEMENT_PARAM_CLASS(Channels, int)
#define OCL_ALL_DEPTHS Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F)
#define OCL_ALL_CHANNELS Values(1, 2, 3, 4)
CV_ENUM(Interpolation, INTER_NEAREST, INTER_LINEAR, INTER_CUBIC)
CV_ENUM(Interpolation, INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, INTER_AREA)
#define OCL_INSTANTIATE_TEST_CASE_P(prefix, test_case_name, generator) \
INSTANTIATE_TEST_CASE_P(OCL_ ## prefix, test_case_name, generator)

Loading…
Cancel
Save