Merge pull request #1979 from pentschev:ocl_arithm_sqrt_master

pull/1954/merge
Andrey Pavlenko 11 years ago committed by OpenCV Buildbot
commit 5299b13434
  1. 12
      modules/ocl/doc/operations_on_matrices.rst
  2. 4
      modules/ocl/include/opencv2/ocl.hpp
  3. 34
      modules/ocl/perf/perf_arithm.cpp
  4. 11
      modules/ocl/src/arithm.cpp
  5. 111
      modules/ocl/src/opencl/arithm_sqrt.cl
  6. 17
      modules/ocl/test/test_arithm.cpp

@ -557,6 +557,18 @@ Returns void
The functions split split multi-channel array into separate single-channel arrays. Supports all data types.
ocl::sqrt
------------------
Returns void
.. ocv:function:: void ocl::sqrt(const oclMat &src, oclMat &dst)
:param src: the first source array.
:param dst: the dst array; must have the same size and same type as ``src``.
The function ``sqrt`` calculates the square root of each input array element. Supports only ``CV_32FC1`` and ``CV_64F`` data types.
ocl::subtract
------------------
Returns void

@ -559,6 +559,10 @@ namespace cv
// supports only CV_32FC1, CV_64FC1 type
CV_EXPORTS void log(const oclMat &src, oclMat &dst);
//! computes square root of each matrix element
// supports only CV_32FC1, CV_64FC1 type
CV_EXPORTS void sqrt(const oclMat &src, oclMat &dst);
//! computes magnitude of each (x(i), y(i)) vector
// supports only CV_32F, CV_64F type
CV_EXPORTS void magnitude(const oclMat &x, const oclMat &y, oclMat &magnitude);

@ -162,6 +162,40 @@ PERF_TEST_P(LogFixture, Log, OCL_TYPICAL_MAT_SIZES)
SANITY_CHECK(dst, eps, ERROR_RELATIVE);
}
///////////// SQRT ///////////////////////
typedef TestBaseWithParam<Size> SqrtFixture;
PERF_TEST_P(SqrtFixture, Sqrt, OCL_TYPICAL_MAT_SIZES)
{
// getting params
const Size srcSize = GetParam();
const double eps = 1e-6;
// creating src data
Mat src(srcSize, CV_32F), dst(srcSize, src.type());
randu(src, 0, 10);
declare.in(src).out(dst);
// select implementation
if (RUN_OCL_IMPL)
{
ocl::oclMat oclSrc(src), oclDst(srcSize, src.type());
OCL_TEST_CYCLE() cv::ocl::sqrt(oclSrc, oclDst);
oclDst.download(dst);
}
else if (RUN_PLAIN_IMPL)
{
TEST_CYCLE() cv::sqrt(src, dst);
}
else
OCL_PERF_ELSE
SANITY_CHECK(dst, eps, ERROR_RELATIVE);
}
///////////// Add ////////////////////////
typedef Size_MatType AddFixture;

@ -839,7 +839,7 @@ void cv::ocl::LUT(const oclMat &src, const oclMat &lut, oclMat &dst)
//////////////////////////////// exp log /////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
static void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, String kernelName, const cv::ocl::ProgramEntry* source)
static void arithmetic_exp_log_sqrt_run(const oclMat &src, oclMat &dst, String kernelName, const cv::ocl::ProgramEntry* source)
{
Context *clCxt = src.clCxt;
if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
@ -882,12 +882,17 @@ static void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, String kernel
void cv::ocl::exp(const oclMat &src, oclMat &dst)
{
arithmetic_exp_log_run(src, dst, "arithm_exp", &arithm_exp);
arithmetic_exp_log_sqrt_run(src, dst, "arithm_exp", &arithm_exp);
}
void cv::ocl::log(const oclMat &src, oclMat &dst)
{
arithmetic_exp_log_run(src, dst, "arithm_log", &arithm_log);
arithmetic_exp_log_sqrt_run(src, dst, "arithm_log", &arithm_log);
}
void cv::ocl::sqrt(const oclMat &src, oclMat &dst)
{
arithmetic_exp_log_sqrt_run(src, dst, "arithm_sqrt", &arithm_sqrt);
}
//////////////////////////////////////////////////////////////////////////////

@ -0,0 +1,111 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peter Andreas Entschev, peter@entschev.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////LOG/////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel void arithm_sqrt_C1(__global srcT *src, __global srcT *dst,
int cols1, int rows,
int srcOffset1, int dstOffset1,
int srcStep1, int dstStep1)
{
int x = get_global_id(0);
int y = get_global_id(1);
if(x < cols1 && y < rows)
{
int srcIdx = mad24(y, srcStep1, x + srcOffset1);
int dstIdx = mad24(y, dstStep1, x + dstOffset1);
dst[dstIdx] = sqrt(src[srcIdx]);
}
}
__kernel void arithm_sqrt_C2(__global srcT *src, __global srcT *dst,
int cols1, int rows,
int srcOffset1, int dstOffset1,
int srcStep1, int dstStep1)
{
int x1 = get_global_id(0) << 1;
int y = get_global_id(1);
if(x1 < cols1 && y < rows)
{
int srcIdx = mad24(y, srcStep1, x1 + srcOffset1);
int dstIdx = mad24(y, dstStep1, x1 + dstOffset1);
dst[dstIdx] = sqrt(src[srcIdx]);
dst[dstIdx + 1] = x1 + 1 < cols1 ? sqrt(src[srcIdx + 1]) : dst[dstIdx + 1];
}
}
__kernel void arithm_sqrt_C4(__global srcT *src, __global srcT *dst,
int cols1, int rows,
int srcOffset1, int dstOffset1,
int srcStep1, int dstStep1)
{
int x1 = get_global_id(0) << 2;
int y = get_global_id(1);
if(x1 < cols1 && y < rows)
{
int srcIdx = mad24(y, srcStep1, x1 + srcOffset1);
int dstIdx = mad24(y, dstStep1, x1 + dstOffset1);
dst[dstIdx] = sqrt(src[srcIdx]);
dst[dstIdx + 1] = x1 + 1 < cols1 ? sqrt(src[srcIdx + 1]) : dst[dstIdx + 1];
dst[dstIdx + 2] = x1 + 2 < cols1 ? sqrt(src[srcIdx + 2]) : dst[dstIdx + 2];
dst[dstIdx + 3] = x1 + 3 < cols1 ? sqrt(src[srcIdx + 3]) : dst[dstIdx + 3];
}
}

@ -278,6 +278,22 @@ OCL_TEST_P(Log, Mat)
}
}
//////////////////////////////// Sqrt ////////////////////////////////////////////////
typedef ArithmTestBase Sqrt;
OCL_TEST_P(Sqrt, Mat)
{
for (int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
cv::sqrt(src1_roi, dst1_roi);
cv::ocl::sqrt(gsrc1_roi, gdst1_roi);
Near(1);
}
}
//////////////////////////////// Add /////////////////////////////////////////////////
typedef ArithmTestBase Add;
@ -1569,6 +1585,7 @@ OCL_TEST_P(Repeat, Mat)
INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool(), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, Exp, Combine(testing::Values(CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, Log, Combine(testing::Values(CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, Sqrt, Combine(testing::Values(CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, Add, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, Sub, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, Mul, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));

Loading…
Cancel
Save