Merge pull request #1979 from pentschev:ocl_arithm_sqrt_master

11 years ago · 5299b13434
parent b0a9220de8 5bc9f15dda
commit 5299b13434
6 changed files with 186 additions and 3 deletions
--- a/modules/ocl/doc/operations_on_matrices.rst
+++ b/modules/ocl/doc/operations_on_matrices.rst
@ -557,6 +557,18 @@ Returns void

 The functions split split multi-channel array into separate single-channel arrays. Supports all data types.

+ocl::sqrt
+------------------
+Returns void
+
+.. ocv:function:: void ocl::sqrt(const oclMat &src, oclMat &dst)
+
+    :param src: the first source array.
+
+    :param dst: the dst array; must have the same size and same type as ``src``.
+
+The function ``sqrt`` calculates the square root of each input array element. Supports only ``CV_32FC1`` and ``CV_64F`` data types.
+
 ocl::subtract
 ------------------
 Returns void
--- a/modules/ocl/include/opencv2/ocl.hpp
+++ b/modules/ocl/include/opencv2/ocl.hpp
@ -559,6 +559,10 @@ namespace cv
        // supports only CV_32FC1, CV_64FC1 type
        CV_EXPORTS void log(const oclMat &src, oclMat &dst);

+        //! computes square root of each matrix element
+        // supports only CV_32FC1, CV_64FC1 type
+        CV_EXPORTS void sqrt(const oclMat &src, oclMat &dst);
+
        //! computes magnitude of each (x(i), y(i)) vector
        // supports only CV_32F, CV_64F type
        CV_EXPORTS void magnitude(const oclMat &x, const oclMat &y, oclMat &magnitude);
--- a/modules/ocl/perf/perf_arithm.cpp
+++ b/modules/ocl/perf/perf_arithm.cpp
@ -162,6 +162,40 @@ PERF_TEST_P(LogFixture, Log, OCL_TYPICAL_MAT_SIZES)
    SANITY_CHECK(dst, eps, ERROR_RELATIVE);
 }

+///////////// SQRT ///////////////////////
+
+typedef TestBaseWithParam<Size> SqrtFixture;
+
+PERF_TEST_P(SqrtFixture, Sqrt, OCL_TYPICAL_MAT_SIZES)
+{
+    // getting params
+    const Size srcSize = GetParam();
+    const double eps = 1e-6;
+
+    // creating src data
+    Mat src(srcSize, CV_32F), dst(srcSize, src.type());
+    randu(src, 0, 10);
+    declare.in(src).out(dst);
+
+    // select implementation
+    if (RUN_OCL_IMPL)
+    {
+        ocl::oclMat oclSrc(src), oclDst(srcSize, src.type());
+
+        OCL_TEST_CYCLE() cv::ocl::sqrt(oclSrc, oclDst);
+
+        oclDst.download(dst);
+    }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::sqrt(src, dst);
+    }
+    else
+        OCL_PERF_ELSE
+
+    SANITY_CHECK(dst, eps, ERROR_RELATIVE);
+}
+
 ///////////// Add ////////////////////////

 typedef Size_MatType AddFixture;
--- a/modules/ocl/src/arithm.cpp
+++ b/modules/ocl/src/arithm.cpp
@ -839,7 +839,7 @@ void cv::ocl::LUT(const oclMat &src, const oclMat &lut, oclMat &dst)
 //////////////////////////////// exp log /////////////////////////////////////
 //////////////////////////////////////////////////////////////////////////////

-static void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, String kernelName, const cv::ocl::ProgramEntry* source)
+static void arithmetic_exp_log_sqrt_run(const oclMat &src, oclMat &dst, String kernelName, const cv::ocl::ProgramEntry* source)
 {
    Context  *clCxt = src.clCxt;
    if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
@ -882,12 +882,17 @@ static void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, String kernel

 void cv::ocl::exp(const oclMat &src, oclMat &dst)
 {
-    arithmetic_exp_log_run(src, dst, "arithm_exp", &arithm_exp);
+    arithmetic_exp_log_sqrt_run(src, dst, "arithm_exp", &arithm_exp);
 }

 void cv::ocl::log(const oclMat &src, oclMat &dst)
 {
-    arithmetic_exp_log_run(src, dst, "arithm_log", &arithm_log);
+    arithmetic_exp_log_sqrt_run(src, dst, "arithm_log", &arithm_log);
+}
+
+void cv::ocl::sqrt(const oclMat &src, oclMat &dst)
+{
+    arithmetic_exp_log_sqrt_run(src, dst, "arithm_sqrt", &arithm_sqrt);
 }

 //////////////////////////////////////////////////////////////////////////////
--- a/modules/ocl/src/opencl/arithm_sqrt.cl
+++ b/modules/ocl/src/opencl/arithm_sqrt.cl
@ -0,0 +1,111 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Peter Andreas Entschev, peter@entschev.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#endif
+#endif
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////LOG/////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+
+__kernel void arithm_sqrt_C1(__global srcT *src, __global srcT *dst,
+    int cols1, int rows,
+    int srcOffset1, int dstOffset1,
+    int srcStep1, int dstStep1)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if(x < cols1 && y < rows)
+    {
+        int srcIdx = mad24(y, srcStep1, x + srcOffset1);
+        int dstIdx = mad24(y, dstStep1, x + dstOffset1);
+
+        dst[dstIdx] = sqrt(src[srcIdx]);
+    }
+}
+
+__kernel void arithm_sqrt_C2(__global srcT *src, __global srcT *dst,
+    int cols1, int rows,
+    int srcOffset1, int dstOffset1,
+    int srcStep1, int dstStep1)
+{
+    int x1 = get_global_id(0) << 1;
+    int y = get_global_id(1);
+
+    if(x1 < cols1 && y < rows)
+    {
+        int srcIdx = mad24(y, srcStep1, x1 + srcOffset1);
+        int dstIdx = mad24(y, dstStep1, x1 + dstOffset1);
+
+        dst[dstIdx] =                      sqrt(src[srcIdx]);
+        dst[dstIdx + 1] = x1 + 1 < cols1 ? sqrt(src[srcIdx + 1]) : dst[dstIdx + 1];
+    }
+}
+
+__kernel void arithm_sqrt_C4(__global srcT *src, __global srcT *dst,
+    int cols1, int rows,
+    int srcOffset1, int dstOffset1,
+    int srcStep1, int dstStep1)
+{
+    int x1 = get_global_id(0) << 2;
+    int y = get_global_id(1);
+
+    if(x1 < cols1 && y < rows)
+    {
+        int srcIdx = mad24(y, srcStep1, x1 + srcOffset1);
+        int dstIdx = mad24(y, dstStep1, x1 + dstOffset1);
+
+        dst[dstIdx] =                      sqrt(src[srcIdx]);
+        dst[dstIdx + 1] = x1 + 1 < cols1 ? sqrt(src[srcIdx + 1]) : dst[dstIdx + 1];
+        dst[dstIdx + 2] = x1 + 2 < cols1 ? sqrt(src[srcIdx + 2]) : dst[dstIdx + 2];
+        dst[dstIdx + 3] = x1 + 3 < cols1 ? sqrt(src[srcIdx + 3]) : dst[dstIdx + 3];
+    }
+}
--- a/modules/ocl/test/test_arithm.cpp
+++ b/modules/ocl/test/test_arithm.cpp
@ -278,6 +278,22 @@ OCL_TEST_P(Log, Mat)
    }
 }

+//////////////////////////////// Sqrt ////////////////////////////////////////////////
+
+typedef ArithmTestBase Sqrt;
+
+OCL_TEST_P(Sqrt, Mat)
+{
+    for (int j = 0; j < LOOP_TIMES; j++)
+    {
+        random_roi();
+
+        cv::sqrt(src1_roi, dst1_roi);
+        cv::ocl::sqrt(gsrc1_roi, gdst1_roi);
+        Near(1);
+    }
+}
+
 //////////////////////////////// Add /////////////////////////////////////////////////

 typedef ArithmTestBase Add;
@ -1569,6 +1585,7 @@ OCL_TEST_P(Repeat, Mat)
 INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool(), Bool()));
 INSTANTIATE_TEST_CASE_P(Arithm, Exp, Combine(testing::Values(CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
 INSTANTIATE_TEST_CASE_P(Arithm, Log, Combine(testing::Values(CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
+INSTANTIATE_TEST_CASE_P(Arithm, Sqrt, Combine(testing::Values(CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
 INSTANTIATE_TEST_CASE_P(Arithm, Add, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
 INSTANTIATE_TEST_CASE_P(Arithm, Sub, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
 INSTANTIATE_TEST_CASE_P(Arithm, Mul, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));