diff --git a/modules/core/include/opencv2/core/opencl/ocl_defs.hpp b/modules/core/include/opencv2/core/opencl/ocl_defs.hpp index 1c5ca58c18..55abd7cd89 100644 --- a/modules/core/include/opencv2/core/opencl/ocl_defs.hpp +++ b/modules/core/include/opencv2/core/opencl/ocl_defs.hpp @@ -22,6 +22,15 @@ fflush(stdout); \ } \ } +#elif defined CV_OPENCL_RUN_ASSERT +#define CV_OCL_RUN_(condition, func, ...) \ + { \ + if (cv::ocl::useOpenCL() && (condition)) \ + { \ + CV_Assert(func); \ + return; \ + } \ + } #else #define CV_OCL_RUN_(condition, func, ...) \ if (cv::ocl::useOpenCL() && (condition) && func) \ diff --git a/modules/imgproc/src/accum.cpp b/modules/imgproc/src/accum.cpp index 0ed2b3fedd..4d13ce244f 100644 --- a/modules/imgproc/src/accum.cpp +++ b/modules/imgproc/src/accum.cpp @@ -41,6 +41,7 @@ //M*/ #include "precomp.hpp" +#include "opencl_kernels.hpp" namespace cv { @@ -352,15 +353,83 @@ inline int getAccTabIdx(int sdepth, int ddepth) sdepth == CV_64F && ddepth == CV_64F ? 6 : -1; } +#ifdef HAVE_OPENCL + +enum +{ + ACCUMULATE = 0, + ACCUMULATE_SQUARE = 1, + ACCUMULATE_PRODUCT = 2, + ACCUMULATE_WEIGHTED = 3 +}; + +static bool ocl_accumulate( InputArray _src, InputArray _src2, InputOutputArray _dst, double alpha, + InputArray _mask, int op_type ) +{ + CV_Assert(op_type == ACCUMULATE || op_type == ACCUMULATE_SQUARE || + op_type == ACCUMULATE_PRODUCT || op_type == ACCUMULATE_WEIGHTED); + + int stype = _src.type(), cn = CV_MAT_CN(stype); + int sdepth = CV_MAT_DEPTH(stype), ddepth = _dst.depth(); + + bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0, + haveMask = !_mask.empty(); + + if (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F)) + return false; + + const char * const opMap[4] = { "ACCUMULATE", "ACCUMULATE_SQUARE", "ACCUMULATE_PRODUCT", + "ACCUMULATE_WEIGHTED" }; + + ocl::Kernel k("accumulate", ocl::imgproc::accumulate_oclsrc, + format("-D %s%s -D srcT=%s -D cn=%d -D dstT=%s%s", + opMap[op_type], haveMask ? " -D HAVE_MASK" : "", + ocl::typeToStr(sdepth), cn, ocl::typeToStr(ddepth), + doubleSupport ? " -D DOUBLE_SUPPORT" : "")); + if (k.empty()) + return false; + + UMat src = _src.getUMat(), src2 = _src2.getUMat(), dst = _dst.getUMat(), mask = _mask.getUMat(); + + ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), + src2arg = ocl::KernelArg::ReadOnlyNoSize(src2), + dstarg = ocl::KernelArg::ReadWrite(dst), + maskarg = ocl::KernelArg::ReadOnlyNoSize(mask); + + int argidx = k.set(0, srcarg); + if (op_type == ACCUMULATE_PRODUCT) + argidx = k.set(argidx, src2arg); + argidx = k.set(argidx, dstarg); + if (op_type == ACCUMULATE_WEIGHTED) + { + if (ddepth == CV_32F) + argidx = k.set(argidx, (float)alpha); + else + argidx = k.set(argidx, alpha); + } + if (haveMask) + argidx = k.set(argidx, maskarg); + + size_t globalsize[2] = { src.cols, src.rows }; + return k.run(2, globalsize, NULL, false); +} + +#endif + } void cv::accumulate( InputArray _src, InputOutputArray _dst, InputArray _mask ) { - Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat(); - int sdepth = src.depth(), ddepth = dst.depth(), cn = src.channels(); + int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), scn = CV_MAT_CN(stype); + int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), dcn = CV_MAT_CN(dtype); + + CV_Assert( _src.sameSize(_dst) && dcn == scn ); + CV_Assert( _mask.empty() || (_src.sameSize(_mask) && _mask.type() == CV_8U) ); - CV_Assert( dst.size == src.size && dst.channels() == cn ); - CV_Assert( mask.empty() || (mask.size == src.size && mask.type() == CV_8U) ); + CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(), + ocl_accumulate(_src, noArray(), _dst, 0.0, _mask, ACCUMULATE)) + + Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat(); int fidx = getAccTabIdx(sdepth, ddepth); AccFunc func = fidx >= 0 ? accTab[fidx] : 0; @@ -372,17 +441,21 @@ void cv::accumulate( InputArray _src, InputOutputArray _dst, InputArray _mask ) int len = (int)it.size; for( size_t i = 0; i < it.nplanes; i++, ++it ) - func(ptrs[0], ptrs[1], ptrs[2], len, cn); + func(ptrs[0], ptrs[1], ptrs[2], len, scn); } - void cv::accumulateSquare( InputArray _src, InputOutputArray _dst, InputArray _mask ) { - Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat(); - int sdepth = src.depth(), ddepth = dst.depth(), cn = src.channels(); + int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), scn = CV_MAT_CN(stype); + int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), dcn = CV_MAT_CN(dtype); + + CV_Assert( _src.sameSize(_dst) && dcn == scn ); + CV_Assert( _mask.empty() || (_src.sameSize(_mask) && _mask.type() == CV_8U) ); - CV_Assert( dst.size == src.size && dst.channels() == cn ); - CV_Assert( mask.empty() || (mask.size == src.size && mask.type() == CV_8U) ); + CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(), + ocl_accumulate(_src, noArray(), _dst, 0.0, _mask, ACCUMULATE_SQUARE)) + + Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat(); int fidx = getAccTabIdx(sdepth, ddepth); AccFunc func = fidx >= 0 ? accSqrTab[fidx] : 0; @@ -394,18 +467,23 @@ void cv::accumulateSquare( InputArray _src, InputOutputArray _dst, InputArray _m int len = (int)it.size; for( size_t i = 0; i < it.nplanes; i++, ++it ) - func(ptrs[0], ptrs[1], ptrs[2], len, cn); + func(ptrs[0], ptrs[1], ptrs[2], len, scn); } void cv::accumulateProduct( InputArray _src1, InputArray _src2, InputOutputArray _dst, InputArray _mask ) { - Mat src1 = _src1.getMat(), src2 = _src2.getMat(), dst = _dst.getMat(), mask = _mask.getMat(); - int sdepth = src1.depth(), ddepth = dst.depth(), cn = src1.channels(); + int stype = _src1.type(), sdepth = CV_MAT_DEPTH(stype), scn = CV_MAT_CN(stype); + int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), dcn = CV_MAT_CN(dtype); + + CV_Assert( _src1.sameSize(_src2) && stype == _src2.type() ); + CV_Assert( _src1.sameSize(_dst) && dcn == scn ); + CV_Assert( _mask.empty() || (_src1.sameSize(_mask) && _mask.type() == CV_8U) ); - CV_Assert( src2.size && src1.size && src2.type() == src1.type() ); - CV_Assert( dst.size == src1.size && dst.channels() == cn ); - CV_Assert( mask.empty() || (mask.size == src1.size && mask.type() == CV_8U) ); + CV_OCL_RUN(_src1.dims() <= 2 && _dst.isUMat(), + ocl_accumulate(_src1, _src2, _dst, 0.0, _mask, ACCUMULATE_PRODUCT)) + + Mat src1 = _src1.getMat(), src2 = _src2.getMat(), dst = _dst.getMat(), mask = _mask.getMat(); int fidx = getAccTabIdx(sdepth, ddepth); AccProdFunc func = fidx >= 0 ? accProdTab[fidx] : 0; @@ -417,18 +495,22 @@ void cv::accumulateProduct( InputArray _src1, InputArray _src2, int len = (int)it.size; for( size_t i = 0; i < it.nplanes; i++, ++it ) - func(ptrs[0], ptrs[1], ptrs[2], ptrs[3], len, cn); + func(ptrs[0], ptrs[1], ptrs[2], ptrs[3], len, scn); } - void cv::accumulateWeighted( InputArray _src, InputOutputArray _dst, double alpha, InputArray _mask ) { - Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat(); - int sdepth = src.depth(), ddepth = dst.depth(), cn = src.channels(); + int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), scn = CV_MAT_CN(stype); + int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), dcn = CV_MAT_CN(dtype); + + CV_Assert( _src.sameSize(_dst) && dcn == scn ); + CV_Assert( _mask.empty() || (_src.sameSize(_mask) && _mask.type() == CV_8U) ); - CV_Assert( dst.size == src.size && dst.channels() == cn ); - CV_Assert( mask.empty() || (mask.size == src.size && mask.type() == CV_8U) ); + CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(), + ocl_accumulate(_src, noArray(), _dst, alpha, _mask, ACCUMULATE_WEIGHTED)) + + Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat(); int fidx = getAccTabIdx(sdepth, ddepth); AccWFunc func = fidx >= 0 ? accWTab[fidx] : 0; @@ -440,7 +522,7 @@ void cv::accumulateWeighted( InputArray _src, InputOutputArray _dst, int len = (int)it.size; for( size_t i = 0; i < it.nplanes; i++, ++it ) - func(ptrs[0], ptrs[1], ptrs[2], len, cn, alpha); + func(ptrs[0], ptrs[1], ptrs[2], len, scn, alpha); } diff --git a/modules/imgproc/src/opencl/accumulate.cl b/modules/imgproc/src/opencl/accumulate.cl new file mode 100644 index 0000000000..a60d4d6d9d --- /dev/null +++ b/modules/imgproc/src/opencl/accumulate.cl @@ -0,0 +1,65 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. + +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) +#pragma OPENCL EXTENSION cl_khr_fp64:enable +#endif +#endif + +__kernel void accumulate(__global const uchar * srcptr, int src_step, int src_offset, +#ifdef ACCUMULATE_PRODUCT + __global const uchar * src2ptr, int src2_step, int src2_offset, +#endif + __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols +#ifdef ACCUMULATE_WEIGHTED + , dstT alpha +#endif +#ifdef HAVE_MASK + , __global const uchar * mask, int mask_step, int mask_offset +#endif + ) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if (x < dst_cols && y < dst_rows) + { + int src_index = mad24(y, src_step, src_offset + x * cn * (int)sizeof(srcT)); +#ifdef HAVE_MASK + int mask_index = mad24(y, mask_step, mask_offset + x); + mask += mask_index; +#endif + int dst_index = mad24(y, dst_step, dst_offset + x * cn * (int)sizeof(dstT)); + + __global const srcT * src = (__global const srcT *)(srcptr + src_index); +#ifdef ACCUMULATE_PRODUCT + int src2_index = mad24(y, src2_step, src2_offset + x * cn * (int)sizeof(srcT)); + __global const srcT * src2 = (__global const srcT *)(src2ptr + src2_index); +#endif + __global dstT * dst = (__global dstT *)(dstptr + dst_index); + + #pragma unroll + for (int c = 0; c < cn; ++c) +#ifdef HAVE_MASK + if (mask[0]) +#endif +#ifdef ACCUMULATE + dst[c] += src[c]; +#elif defined ACCUMULATE_SQUARE + dst[c] += src[c] * src[c]; +#elif defined ACCUMULATE_PRODUCT + dst[c] += src[c] * src2[c]; +#elif defined ACCUMULATE_WEIGHTED + dst[c] = (1 - alpha) * dst[c] + src[c] * alpha; +#else +#error "Unknown accumulation type" +#endif + } +} diff --git a/modules/imgproc/test/ocl/test_accumulate.cpp b/modules/imgproc/test/ocl/test_accumulate.cpp new file mode 100644 index 0000000000..586c34b26a --- /dev/null +++ b/modules/imgproc/test/ocl/test_accumulate.cpp @@ -0,0 +1,240 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Nathan, liujun@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "test_precomp.hpp" +#include "cvconfig.h" +#include "opencv2/ts/ocl_test.hpp" + +#ifdef HAVE_OPENCL + +namespace cvtest { +namespace ocl { + +PARAM_TEST_CASE(AccumulateBase, std::pair, Channels, bool) +{ + int sdepth, ddepth, channels; + bool useRoi; + double alpha; + + TEST_DECLARE_INPUT_PARAMETER(src) + TEST_DECLARE_INPUT_PARAMETER(mask) + TEST_DECLARE_INPUT_PARAMETER(src2) + TEST_DECLARE_OUTPUT_PARAMETER(dst) + + virtual void SetUp() + { + const std::pair depths = GET_PARAM(0); + sdepth = depths.first, ddepth = depths.second; + channels = GET_PARAM(1); + useRoi = GET_PARAM(2); + } + + void random_roi() + { + const int stype = CV_MAKE_TYPE(sdepth, channels), + dtype = CV_MAKE_TYPE(ddepth, channels); + + Size roiSize = randomSize(1, 10); + Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0); + randomSubMat(src, src_roi, roiSize, srcBorder, stype, -MAX_VALUE, MAX_VALUE); + + Border maskBorder = randomBorder(0, useRoi ? MAX_VALUE : 0); + randomSubMat(mask, mask_roi, roiSize, maskBorder, CV_8UC1, -MAX_VALUE, MAX_VALUE); + threshold(mask, mask, 80, 255, THRESH_BINARY); + + Border src2Border = randomBorder(0, useRoi ? MAX_VALUE : 0); + randomSubMat(src2, src2_roi, roiSize, src2Border, stype, -MAX_VALUE, MAX_VALUE); + + Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0); + randomSubMat(dst, dst_roi, roiSize, dstBorder, dtype, -MAX_VALUE, MAX_VALUE); + + UMAT_UPLOAD_INPUT_PARAMETER(src) + UMAT_UPLOAD_INPUT_PARAMETER(mask) + UMAT_UPLOAD_INPUT_PARAMETER(src2) + UMAT_UPLOAD_OUTPUT_PARAMETER(dst) + + alpha = randomDouble(-5, 5); + } +}; + +/////////////////////////////////// Accumulate /////////////////////////////////// + +typedef AccumulateBase Accumulate; + +OCL_TEST_P(Accumulate, Mat) +{ + for (int i = 0; i < test_loop_times; ++i) + { + random_roi(); + + OCL_OFF(cv::accumulate(src_roi, dst_roi)); + OCL_ON(cv::accumulate(usrc_roi, udst_roi)); + + OCL_EXPECT_MATS_NEAR(dst, 1e-6); + } +} + +OCL_TEST_P(Accumulate, Mask) +{ + for (int i = 0; i < test_loop_times; ++i) + { + random_roi(); + + OCL_OFF(cv::accumulate(src_roi, dst_roi, mask_roi)); + OCL_ON(cv::accumulate(usrc_roi, udst_roi, umask_roi)); + + OCL_EXPECT_MATS_NEAR(dst, 1e-6); + } +} + +/////////////////////////////////// AccumulateSquare /////////////////////////////////// + +typedef AccumulateBase AccumulateSquare; + +OCL_TEST_P(AccumulateSquare, Mat) +{ + for (int i = 0; i < test_loop_times; ++i) + { + random_roi(); + + OCL_OFF(cv::accumulateSquare(src_roi, dst_roi)); + OCL_ON(cv::accumulateSquare(usrc_roi, udst_roi)); + + OCL_EXPECT_MATS_NEAR(dst, 1e-2); + } +} + +OCL_TEST_P(AccumulateSquare, Mask) +{ + for (int i = 0; i < test_loop_times; ++i) + { + random_roi(); + + OCL_OFF(cv::accumulateSquare(src_roi, dst_roi, mask_roi)); + OCL_ON(cv::accumulateSquare(usrc_roi, udst_roi, umask_roi)); + + OCL_EXPECT_MATS_NEAR(dst, 1e-2); + } +} + +/////////////////////////////////// AccumulateProduct /////////////////////////////////// + +typedef AccumulateBase AccumulateProduct; + +OCL_TEST_P(AccumulateProduct, Mat) +{ + for (int i = 0; i < test_loop_times; ++i) + { + random_roi(); + + OCL_OFF(cv::accumulateProduct(src_roi, src2_roi, dst_roi)); + OCL_ON(cv::accumulateProduct(usrc_roi, usrc2_roi, udst_roi)); + + OCL_EXPECT_MATS_NEAR(dst, 1e-2); + } +} + +OCL_TEST_P(AccumulateProduct, Mask) +{ + for (int i = 0; i < test_loop_times; ++i) + { + random_roi(); + + OCL_OFF(cv::accumulateProduct(src_roi, src2_roi, dst_roi, mask_roi)); + OCL_ON(cv::accumulateProduct(usrc_roi, usrc2_roi, udst_roi, umask_roi)); + + OCL_EXPECT_MATS_NEAR(dst, 1e-2); + } +} + +/////////////////////////////////// AccumulateWeighted /////////////////////////////////// + +typedef AccumulateBase AccumulateWeighted; + +OCL_TEST_P(AccumulateWeighted, Mat) +{ + for (int i = 0; i < test_loop_times; ++i) + { + random_roi(); + + OCL_OFF(cv::accumulateWeighted(src_roi, dst_roi, alpha)); + OCL_ON(cv::accumulateWeighted(usrc_roi, udst_roi, alpha)); + + OCL_EXPECT_MATS_NEAR(dst, 1e-2); + } +} + +OCL_TEST_P(AccumulateWeighted, Mask) +{ + for (int i = 0; i < test_loop_times; ++i) + { + random_roi(); + + OCL_OFF(cv::accumulateWeighted(src_roi, dst_roi, alpha)); + OCL_ON(cv::accumulateWeighted(usrc_roi, udst_roi, alpha)); + + OCL_EXPECT_MATS_NEAR(dst, 1e-2); + } +} + +/////////////////////////////////// Instantiation /////////////////////////////////// + +#define OCL_DEPTH_ALL_COMBINATIONS \ + testing::Values(std::make_pair(CV_8U, CV_32F), \ + std::make_pair(CV_16U, CV_32F), \ + std::make_pair(CV_32F, CV_32F), \ + std::make_pair(CV_8U, CV_64F), \ + std::make_pair(CV_16U, CV_64F), \ + std::make_pair(CV_32F, CV_64F), \ + std::make_pair(CV_64F, CV_64F)) + +OCL_INSTANTIATE_TEST_CASE_P(ImgProc, Accumulate, Combine(OCL_DEPTH_ALL_COMBINATIONS, OCL_ALL_CHANNELS, Bool())); +OCL_INSTANTIATE_TEST_CASE_P(ImgProc, AccumulateSquare, Combine(OCL_DEPTH_ALL_COMBINATIONS, OCL_ALL_CHANNELS, Bool())); +OCL_INSTANTIATE_TEST_CASE_P(ImgProc, AccumulateProduct, Combine(OCL_DEPTH_ALL_COMBINATIONS, OCL_ALL_CHANNELS, Bool())); +OCL_INSTANTIATE_TEST_CASE_P(ImgProc, AccumulateWeighted, Combine(OCL_DEPTH_ALL_COMBINATIONS, OCL_ALL_CHANNELS, Bool())); + +} } // namespace cvtest::ocl + +#endif diff --git a/modules/imgproc/test/ocl/test_blend.cpp b/modules/imgproc/test/ocl/test_blend.cpp index 4cfe486d2f..17c0b13123 100644 --- a/modules/imgproc/test/ocl/test_blend.cpp +++ b/modules/imgproc/test/ocl/test_blend.cpp @@ -75,7 +75,7 @@ PARAM_TEST_CASE(BlendLinear, MatDepth, Channels, bool) const int type = CV_MAKE_TYPE(depth, channels); const double upValue = 256; - Size roiSize = randomSize(1, 20); + Size roiSize = randomSize(1, MAX_VALUE); Border src1Border = randomBorder(0, useRoi ? MAX_VALUE : 0); randomSubMat(src1, src1_roi, roiSize, src1Border, type, -upValue, upValue); @@ -104,8 +104,7 @@ PARAM_TEST_CASE(BlendLinear, MatDepth, Channels, bool) void Near(double eps = 0.0) { - EXPECT_MAT_NEAR(dst, udst, eps); - EXPECT_MAT_NEAR(dst_roi, udst_roi, eps); + OCL_EXPECT_MATS_NEAR(dst, eps) } };