diff --git a/modules/ocl/perf/perf_imgwarp.cpp b/modules/ocl/perf/perf_imgwarp.cpp index ba5c3383aa..62f3345493 100644 --- a/modules/ocl/perf/perf_imgwarp.cpp +++ b/modules/ocl/perf/perf_imgwarp.cpp @@ -185,6 +185,46 @@ PERF_TEST_P(resizeFixture, resize, OCL_PERF_ELSE } +typedef tuple resizeAreaParams; +typedef TestBaseWithParam resizeAreaFixture; + +PERF_TEST_P(resizeAreaFixture, resize, + ::testing::Combine(OCL_TYPICAL_MAT_SIZES, + OCL_PERF_ENUM(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), + ::testing::Values(0.6, 0.3))) +{ + const resizeAreaParams params = GetParam(); + const Size srcSize = get<0>(params); + const int type = get<1>(params); + double scale = get<2>(params); + const Size dstSize(cvRound(srcSize.width * scale), cvRound(srcSize.height * scale)); + + checkDeviceMaxMemoryAllocSize(srcSize, type); + + Mat src(srcSize, type), dst; + dst.create(dstSize, type); + declare.in(src, WARMUP_RNG).out(dst); + + if (RUN_OCL_IMPL) + { + ocl::oclMat oclSrc(src), oclDst(dstSize, type); + + OCL_TEST_CYCLE() cv::ocl::resize(oclSrc, oclDst, Size(), scale, scale, cv::INTER_AREA); + + oclDst.download(dst); + + SANITY_CHECK(dst, 1 + DBL_EPSILON); + } + else if (RUN_PLAIN_IMPL) + { + TEST_CYCLE() cv::resize(src, dst, Size(), scale, scale, cv::INTER_AREA); + + SANITY_CHECK(dst, 1 + DBL_EPSILON); + } + else + OCL_PERF_ELSE +} + ///////////// remap//////////////////////// CV_ENUM(RemapInterType, INTER_NEAREST, INTER_LINEAR) diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp index 141325b175..d06adb0985 100644 --- a/modules/ocl/src/imgproc.cpp +++ b/modules/ocl/src/imgproc.cpp @@ -280,9 +280,47 @@ namespace cv //////////////////////////////////////////////////////////////////////////////////////////// // resize - static void resize_gpu( const oclMat &src, oclMat &dst, double fx, double fy, int interpolation) + static void computeResizeAreaTabs(int ssize, int dsize, double scale, int * const map_tab, + float * const alpha_tab, int * const ofs_tab) { - float ifx = 1.f / fx, ify = 1.f / fy; + int k = 0, dx = 0; + for ( ; dx < dsize; dx++) + { + ofs_tab[dx] = k; + + double fsx1 = dx * scale; + double fsx2 = fsx1 + scale; + double cellWidth = std::min(scale, ssize - fsx1); + + int sx1 = cvCeil(fsx1), sx2 = cvFloor(fsx2); + + sx2 = std::min(sx2, ssize - 1); + sx1 = std::min(sx1, sx2); + + if (sx1 - fsx1 > 1e-3) + { + map_tab[k] = sx1 - 1; + alpha_tab[k++] = (float)((sx1 - fsx1) / cellWidth); + } + + for (int sx = sx1; sx < sx2; sx++) + { + map_tab[k] = sx; + alpha_tab[k++] = float(1.0 / cellWidth); + } + + if (fsx2 - sx2 > 1e-3) + { + map_tab[k] = sx2; + alpha_tab[k++] = (float)(std::min(std::min(fsx2 - sx2, 1.), cellWidth) / cellWidth); + } + } + ofs_tab[dx] = k; + } + + static void resize_gpu( const oclMat &src, oclMat &dst, double ifx, double ify, int interpolation) + { + float ifxf = (float)ifx, ifyf = (float)ify; int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize(); int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize(); int ocn = interpolation == INTER_LINEAR ? dst.oclchannels() : -1; @@ -291,11 +329,19 @@ namespace cv const char * const interMap[] = { "NN", "LN", "CUBIC", "AREA", "LAN4" }; std::string kernelName = std::string("resize") + interMap[interpolation]; - const char * const typeMap[] = { "uchar", "uchar", "ushort", "ushort", "int", "int", "double" }; + const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; const char * const channelMap[] = { "" , "", "2", "4", "4" }; std::string buildOption = format("-D %s -D T=%s%s", interMap[interpolation], typeMap[dst.depth()], channelMap[dst.oclchannels()]); - //TODO: improve this kernel + int wdepth = std::max(src.depth(), CV_32F); + if (interpolation != INTER_NEAREST) + { + buildOption += format(" -D WT=%s -D WTV=%s%s -D convertToWTV=convert_%s%s -D convertToT=convert_%s%s%s", + typeMap[wdepth], typeMap[wdepth], channelMap[dst.oclchannels()], + typeMap[wdepth], channelMap[dst.oclchannels()], + typeMap[src.depth()], channelMap[dst.oclchannels()], src.depth() <= CV_32S ? "_sat_rte" : ""); + } + size_t blkSizeX = 16, blkSizeY = 16; size_t glbSizeX; if (src.type() == CV_8UC1 && interpolation == INTER_LINEAR) @@ -306,6 +352,28 @@ namespace cv else glbSizeX = dst.cols; + static oclMat alphaOcl, mapOcl, tabofsOcl; + if (interpolation == INTER_AREA) + { + Size ssize = src.size(), dsize = dst.size(); + int xytab_size = (ssize.width + ssize.height) << 1; + int tabofs_size = dsize.height + dsize.width + 2; + + AutoBuffer _xymap_tab(xytab_size), _xyofs_tab(tabofs_size); + AutoBuffer _xyalpha_tab(xytab_size); + int * xmap_tab = _xymap_tab, * ymap_tab = _xymap_tab + (ssize.width << 1); + float * xalpha_tab = _xyalpha_tab, * yalpha_tab = _xyalpha_tab + (ssize.width << 1); + int * xofs_tab = _xyofs_tab, * yofs_tab = _xyofs_tab + dsize.width + 1; + + computeResizeAreaTabs(ssize.width, dsize.width, ifx, xmap_tab, xalpha_tab, xofs_tab); + computeResizeAreaTabs(ssize.height, dsize.height, ify, ymap_tab, yalpha_tab, yofs_tab); + + // loading precomputed arrays to GPU + alphaOcl = oclMat(1, xytab_size, CV_32FC1, (void *)_xyalpha_tab); + mapOcl = oclMat(1, xytab_size, CV_32SC1, (void *)_xymap_tab); + tabofsOcl = oclMat(1, tabofs_size, CV_32SC1, (void *)_xyofs_tab); + } + size_t globalThreads[3] = { glbSizeX, dst.rows, 1 }; size_t localThreads[3] = { blkSizeX, blkSizeY, 1 }; @@ -320,8 +388,24 @@ namespace cv args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows)); args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols)); args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows)); - args.push_back( make_pair(sizeof(cl_float), (void *)&ifx)); - args.push_back( make_pair(sizeof(cl_float), (void *)&ify)); + + if (wdepth == CV_64F) + { + args.push_back( make_pair(sizeof(cl_double), (void *)&ifx)); + args.push_back( make_pair(sizeof(cl_double), (void *)&ify)); + } + else + { + args.push_back( make_pair(sizeof(cl_float), (void *)&ifxf)); + args.push_back( make_pair(sizeof(cl_float), (void *)&ifyf)); + } + + if (interpolation == INTER_AREA) + { + args.push_back( make_pair(sizeof(cl_mem), (void *)&tabofsOcl.data)); + args.push_back( make_pair(sizeof(cl_mem), (void *)&mapOcl.data)); + args.push_back( make_pair(sizeof(cl_mem), (void *)&alphaOcl.data)); + } openCLExecuteKernel(src.clCxt, &imgproc_resize, kernelName, globalThreads, localThreads, args, ocn, depth, buildOption.c_str()); @@ -329,9 +413,14 @@ namespace cv void resize(const oclMat &src, oclMat &dst, Size dsize, double fx, double fy, int interpolation) { + if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) + { + CV_Error(CV_OpenCLDoubleNotSupported, "Selected device does not support double"); + return; + } + CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3 || src.type() == CV_8UC4 || src.type() == CV_32FC1 || src.type() == CV_32FC3 || src.type() == CV_32FC4); - CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST); CV_Assert(dsize.area() > 0 || (fx > 0 && fy > 0)); if (dsize.area() == 0) @@ -345,9 +434,13 @@ namespace cv fy = (double)dsize.height / src.rows; } + double inv_fy = 1 / fy, inv_fx = 1 / fx; + CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST || + (interpolation == INTER_AREA && inv_fx >= 1 && inv_fy >= 1)); + dst.create(dsize, src.type()); - resize_gpu( src, dst, fx, fy, interpolation); + resize_gpu( src, dst, inv_fx, inv_fy, interpolation); } //////////////////////////////////////////////////////////////////////// diff --git a/modules/ocl/src/opencl/imgproc_resize.cl b/modules/ocl/src/opencl/imgproc_resize.cl index 4af9000432..5a69e87862 100644 --- a/modules/ocl/src/opencl/imgproc_resize.cl +++ b/modules/ocl/src/opencl/imgproc_resize.cl @@ -296,7 +296,7 @@ __kernel void resizeLN_C4_D5(__global float4 * dst, __global float4 * src, #elif defined NN __kernel void resizeNN(__global T * dst, __global T * src, - int dst_offset, int src_offset,int dst_step, int src_step, + int dst_offset, int src_offset, int dst_step, int src_step, int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify) { int dx = get_global_id(0); @@ -315,4 +315,52 @@ __kernel void resizeNN(__global T * dst, __global T * src, } } +#elif AREA + +__kernel void resizeAREA(__global T * dst, __global T * src, + int dst_offset, int src_offset, int dst_step, int src_step, + int src_cols, int src_rows, int dst_cols, int dst_rows, WT ifx, WT ify, + __global const int * ofs_tab, __global const int * map_tab, + __global const float * alpha_tab) +{ + int dx = get_global_id(0); + int dy = get_global_id(1); + + if (dx < dst_cols && dy < dst_rows) + { + int dst_index = mad24(dy, dst_step, dst_offset + dx); + + __global const int * xmap_tab = map_tab; + __global const int * ymap_tab = (__global const int *)(map_tab + (src_cols << 1)); + __global const float * xalpha_tab = alpha_tab; + __global const float * yalpha_tab = (__global const float *)(alpha_tab + (src_cols << 1)); + __global const int * xofs_tab = ofs_tab; + __global const int * yofs_tab = (__global const int *)(ofs_tab + dst_cols + 1); + + int xk0 = xofs_tab[dx], xk1 = xofs_tab[dx + 1]; + int yk0 = yofs_tab[dy], yk1 = yofs_tab[dy + 1]; + + int sy0 = ymap_tab[yk0], sy1 = ymap_tab[yk1 - 1]; + int sx0 = xmap_tab[xk0], sx1 = xmap_tab[xk1 - 1]; + + WTV sum = (WTV)(0), buf; + int src_index = mad24(sy0, src_step, src_offset); + + for (int sy = sy0, yk = yk0; sy <= sy1; ++sy, src_index += src_step, ++yk) + { + WTV beta = (WTV)(yalpha_tab[yk]); + buf = (WTV)(0); + + for (int sx = sx0, xk = xk0; sx <= sx1; ++sx, ++xk) + { + WTV alpha = (WTV)(xalpha_tab[xk]); + buf += convertToWTV(src[src_index + sx]) * alpha; + } + sum += buf * beta; + } + + dst[dst_index] = convertToT(sum); + } +} + #endif diff --git a/modules/ocl/test/test_warp.cpp b/modules/ocl/test/test_warp.cpp index adb3f20cd0..85f33754e9 100644 --- a/modules/ocl/test/test_warp.cpp +++ b/modules/ocl/test/test_warp.cpp @@ -398,10 +398,7 @@ PARAM_TEST_CASE(Resize, MatType, double, double, Interpolation, bool) dstRoiSize.height = cvRound(srcRoiSize.height * fy); if (dstRoiSize.area() == 0) - { - random_roi(); - return; - } + return random_roi(); Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0); randomSubMat(src, src_roi, srcRoiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE); @@ -480,11 +477,18 @@ INSTANTIATE_TEST_CASE_P(ImgprocWarp, Remap_INTER_NEAREST, Combine( (Border)BORDER_REFLECT_101), Bool())); -INSTANTIATE_TEST_CASE_P(ImgprocWarp, Resize, Combine( - Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4), - Values(0.5, 1.5, 2.0), - Values(0.5, 1.5, 2.0), +INSTANTIATE_TEST_CASE_P(ImgprocWarpResize, Resize, Combine( + Values((MatType)CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4), + Values(0.7, 0.4, 2.0), + Values(0.3, 0.6, 2.0), Values((Interpolation)INTER_NEAREST, (Interpolation)INTER_LINEAR), Bool())); +INSTANTIATE_TEST_CASE_P(ImgprocWarpResizeArea, Resize, Combine( + Values((MatType)CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4), + Values(0.7, 0.4, 0.5), + Values(0.3, 0.6, 0.5), + Values((Interpolation)INTER_AREA), + Bool())); + #endif // HAVE_OPENCL diff --git a/modules/ocl/test/utility.hpp b/modules/ocl/test/utility.hpp index d7ae1b906e..bfbe579361 100644 --- a/modules/ocl/test/utility.hpp +++ b/modules/ocl/test/utility.hpp @@ -262,7 +262,7 @@ CV_ENUM(NormCode, NORM_INF, NORM_L1, NORM_L2, NORM_TYPE_MASK, NORM_RELATIVE, NOR CV_ENUM(ReduceOp, CV_REDUCE_SUM, CV_REDUCE_AVG, CV_REDUCE_MAX, CV_REDUCE_MIN) CV_ENUM(MorphOp, MORPH_OPEN, MORPH_CLOSE, MORPH_GRADIENT, MORPH_TOPHAT, MORPH_BLACKHAT) CV_ENUM(ThreshOp, THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV) -CV_ENUM(Interpolation, INTER_NEAREST, INTER_LINEAR, INTER_CUBIC) +CV_ENUM(Interpolation, INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, INTER_AREA) CV_ENUM(Border, BORDER_REFLECT101, BORDER_REPLICATE, BORDER_CONSTANT, BORDER_REFLECT, BORDER_WRAP) CV_ENUM(TemplateMethod, TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED)