Overhaul bioinspired opencl implementation

This patch update bioinspired opencl code to make it working with latest opencv, major changes include: 1. data structure update, e.g. oclMat to UMat. 2. opencl kernel optimization to get it running faster 3. accuracy and perf test update. Signed-off-by: Li Peng <peng.li@intel.com>
8 years ago · 966b2f55b9
parent 86342522b0
commit 966b2f55b9
10 changed files with 881 additions and 996 deletions
--- a/modules/bioinspired/include/opencv2/bioinspired/retina.hpp
+++ b/modules/bioinspired/include/opencv2/bioinspired/retina.hpp
@ -447,11 +447,6 @@ the log scale that is applied
 */
 CV_EXPORTS_W Ptr<Retina> createRetina(Size inputSize, const bool colorMode, int colorSamplingMethod=RETINA_COLOR_BAYER, const bool useRetinaLogSampling=false, const float reductionFactor=1.0f, const float samplingStrenght=10.0f);
 #ifdef HAVE_OPENCV_OCL
 Ptr<Retina> createRetina_OCL(Size inputSize);
 Ptr<Retina> createRetina_OCL(Size inputSize, const bool colorMode, int colorSamplingMethod=RETINA_COLOR_BAYER, const bool useRetinaLogSampling=false, const float reductionFactor=1.0f, const float samplingStrenght=10.0f);
 #endif
 //! @}
 //! @}
--- a/modules/bioinspired/perf/opencl/perf_retina.cpp
+++ b/modules/bioinspired/perf/opencl/perf_retina.cpp
@ -1,126 +0,0 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
 //    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors as is and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #include "perf_precomp.hpp"
 #include "opencv2/imgproc.hpp"
 #include "opencv2/highgui.hpp"
 #include "opencv2/core/ocl.hpp"
 #ifdef HAVE_OPENCV_OCL
 #include "opencv2/ocl.hpp"
 using namespace std::tr1;
 using namespace cv;
 using namespace perf;
 namespace cvtest {
 namespace ocl {
 ///////////////////////// Retina ////////////////////////
 typedef tuple<bool, int, double, double> RetinaParams;
 typedef TestBaseWithParam<RetinaParams> RetinaFixture;
 #define OCL_TEST_CYCLE() for(; startTimer(), next(); cv::ocl::finish(), stopTimer())
 PERF_TEST_P(RetinaFixture, Retina,
            ::testing::Combine(testing::Bool(), testing::Values((int)cv::bioinspired::RETINA_COLOR_BAYER),
                               testing::Values(1.0, 0.5), testing::Values(10.0, 5.0)))
 {
    if (!cv::ocl::haveOpenCL())
        throw TestBase::PerfSkipTestException();
    RetinaParams params = GetParam();
    bool colorMode = get<0>(params), useLogSampling = false;
    int colorSamplingMethod = get<1>(params);
    double reductionFactor = get<2>(params), samplingStrength = get<3>(params);
    Mat input = cv::imread(cvtest::TS::ptr()->get_data_path() + "shared/lena.png", colorMode);
    ASSERT_FALSE(input.empty());
    Mat gold_parvo, gold_magno;
    if (getSelectedImpl() == "plain")
    {
        Ptr<bioinspired::Retina> gold_retina = bioinspired::createRetina(
            input.size(), colorMode, colorSamplingMethod,
            useLogSampling, reductionFactor, samplingStrength);
        TEST_CYCLE()
        {
            gold_retina->run(input);
            gold_retina->getParvo(gold_parvo);
            gold_retina->getMagno(gold_magno);
        }
    }
    else if (getSelectedImpl() == "ocl")
    {
        cv::ocl::oclMat ocl_input(input), ocl_parvo, ocl_magno;
        Ptr<cv::bioinspired::Retina> ocl_retina = cv::bioinspired::createRetina_OCL(
            input.size(), colorMode, colorSamplingMethod, useLogSampling,
            reductionFactor, samplingStrength);
        OCL_TEST_CYCLE()
        {
            ocl_retina->run(ocl_input);
            ocl_retina->getParvo(ocl_parvo);
            ocl_retina->getMagno(ocl_magno);
        }
    }
    else
        CV_TEST_FAIL_NO_IMPL();
    SANITY_CHECK_NOTHING();
 }
 } } // namespace cvtest::ocl
 #endif // HAVE_OPENCV_OCL
--- a/modules/bioinspired/perf/opencl/perf_retina.ocl.cpp
+++ b/modules/bioinspired/perf/opencl/perf_retina.ocl.cpp
@ -0,0 +1,47 @@
 #include "../perf_precomp.hpp"
 #include "opencv2/ts/ocl_perf.hpp"
 using namespace std::tr1;
 using namespace cv;
 using namespace perf;
 namespace cvtest {
 namespace ocl {
 ///////////////////////// Retina ////////////////////////
 typedef tuple<bool, int, double, double> RetinaParams;
 typedef TestBaseWithParam<RetinaParams> RetinaFixture;
 OCL_PERF_TEST_P(RetinaFixture, Retina,
                ::testing::Combine(testing::Bool(), testing::Values((int)cv::bioinspired::RETINA_COLOR_BAYER),
                                   testing::Values(1.0, 0.5), testing::Values(10.0, 5.0)))
 {
    RetinaParams params = GetParam();
    bool colorMode = get<0>(params), useLogSampling = false;
    int colorSamplingMethod = get<1>(params);
    float reductionFactor = static_cast<float>(get<2>(params));
    float samplingStrength = static_cast<float>(get<3>(params));
    Mat input = imread(getDataPath("cv/shared/lena.png"), colorMode);
    ASSERT_FALSE(input.empty());
    UMat ocl_parvo, ocl_magno;
    {
        Ptr<cv::bioinspired::Retina> retina = cv::bioinspired::createRetina(
            input.size(), colorMode, colorSamplingMethod, useLogSampling,
            reductionFactor, samplingStrength);
        OCL_TEST_CYCLE()
        {
            retina->run(input);
            retina->getParvo(ocl_parvo);
            retina->getMagno(ocl_magno);
        }
    }
    SANITY_CHECK_NOTHING();
 }
 } } // namespace cvtest::ocl
--- a/modules/bioinspired/perf/perf_main.cpp
+++ b/modules/bioinspired/perf/perf_main.cpp
@ -42,12 +42,4 @@
 #include "perf_precomp.hpp"
-static const char * impls[] =
+CV_PERF_TEST_MAIN(bioinspired)
 {
 #ifdef HAVE_OPENCV_OCL
    "ocl",
 #endif
    "plain"
 };
 CV_PERF_TEST_MAIN_WITH_IMPLS(ocl, impls, ::perf::TestBase::setModulePerformanceStrategy(::perf::PERF_STRATEGY_SIMPLE))
--- a/modules/bioinspired/src/opencl/retina_kernel.cl
+++ b/modules/bioinspired/src/opencl/retina_kernel.cl
@ -75,72 +75,35 @@ kernel void horizontalCausalFilter_addInput(
        output + mad24(gid, elements_per_row, out_offset / 4);
    float res;
-    float4 in_v4, out_v4, res_v4 = (float4)(0);
+    float4 in_v4, out_v4, sum_v4, res_v4 = (float4)(0);
    //vectorize to increase throughput
    for(int i = 0; i < cols / 4; ++i, iptr += 4, optr += 4)
    {
        in_v4  = vload4(0, iptr);
-        out_v4 = vload4(0, optr);
+        out_v4 = vload4(0, optr) * _tau;
        sum_v4 = in_v4 + out_v4;
-        res_v4.x = in_v4.x + _tau * out_v4.x + _a * res_v4.w;
+        res_v4.x = sum_v4.x + _a * res_v4.w;
-        res_v4.y = in_v4.y + _tau * out_v4.y + _a * res_v4.x;
+        res_v4.y = sum_v4.y + _a * res_v4.x;
-        res_v4.z = in_v4.z + _tau * out_v4.z + _a * res_v4.y;
+        res_v4.z = sum_v4.z + _a * res_v4.y;
-        res_v4.w = in_v4.w + _tau * out_v4.w + _a * res_v4.z;
+        res_v4.w = sum_v4.w + _a * res_v4.z;
        vstore4(res_v4, 0, optr);
    }
    res = res_v4.w;
    // there may be left some
    for(int i = 0; i < cols % 4;  ++i, ++iptr, ++optr)
    {
        res = *iptr + _tau * *optr + _a * res;
        *optr = res;
    }
 }
-//_horizontalAnticausalFilter
+    optr = output + mad24(gid + 1, elements_per_row, -4 + out_offset / 4);
-kernel void horizontalAnticausalFilter(
+    res_v4 = (float4)(0);
-    global float * output,
+    for(int i = 0; i < elements_per_row / 4; ++i, optr -= 4)
    const int cols,
    const int rows,
    const int elements_per_row,
    const int out_offset,
    const float _a
 )
 {
    int gid = get_global_id(0);
    if(gid >= rows)
    {
        return;
    }
    global float * optr = output +
                          mad24(gid + 1, elements_per_row, - 1 + out_offset / 4);
    float4 result_v4 = (float4)(0), out_v4;
    float result = 0;
    // we assume elements_per_row is multple of WIDTH_MULTIPLE
    for(int i = 0; i < WIDTH_MULTIPLE; ++ i, -- optr)
    {
        if(i >= elements_per_row - cols)
        {
            result = *optr + _a * result;
        }
        *optr = result;
    }
    result_v4.x = result;
    optr -= 3;
    for(int i = WIDTH_MULTIPLE / 4; i < elements_per_row / 4; ++i, optr -= 4)
    {
        // shift left, `offset` is type `size_t` so it cannot be negative
        out_v4 = vload4(0, optr);
-        result_v4.w = out_v4.w + _a * result_v4.x;
+        res_v4.w = out_v4.w + _a * res_v4.x;
-        result_v4.z = out_v4.z + _a * result_v4.w;
+        res_v4.z = out_v4.z + _a * res_v4.w;
-        result_v4.y = out_v4.y + _a * result_v4.z;
+        res_v4.y = out_v4.y + _a * res_v4.z;
-        result_v4.x = out_v4.x + _a * result_v4.y;
+        res_v4.x = out_v4.x + _a * res_v4.y;
-        vstore4(result_v4, 0, optr);
+        vstore4(res_v4, 0, optr);
    }
 }
@ -151,26 +114,37 @@ kernel void verticalCausalFilter(
    const int rows,
    const int elements_per_row,
    const int out_offset,
-    const float _a
+    const float _a,
    const float _gain
 )
 {
-    int gid = get_global_id(0);
+    int gid = get_global_id(0) * 2;
    if(gid >= cols)
    {
        return;
    }
    global float * optr = output + gid + out_offset / 4;
-    float result = 0;
+    float2 input;
    float2 result = (float2)0;
    for(int i = 0; i < rows; ++i, optr += elements_per_row)
    {
-        result = *optr + _a * result;
+        input = vload2(0, optr);
-        *optr = result;
+        result = input + _a * result;
        vstore2(result, 0, optr);
    }
    optr = output + (rows - 1) * elements_per_row + gid + out_offset / 4;
    result = (float2)0;
    for(int i = 0; i < rows; ++i, optr -= elements_per_row)
    {
        input = vload2(0, optr);
        result = input + _a * result;
        vstore2(_gain * result, 0, optr);
    }
 }
-//_verticalCausalFilter
+kernel void verticalCausalFilter_multichannel(
 kernel void verticalAnticausalFilter_multGain(
    global float * output,
    const int cols,
    const int rows,
@ -180,74 +154,69 @@ kernel void verticalAnticausalFilter_multGain(
    const float _gain
 )
 {
-    int gid = get_global_id(0);
+    int gid = get_global_id(0) * 2;
    if(gid >= cols)
    {
        return;
    }
-    global float * optr = output + (rows - 1) * elements_per_row + gid + out_offset / 4;
+    global float * optr[3];
-    float result = 0;
+    float2 input[3];
-    for(int i = 0; i < rows; ++i, optr -= elements_per_row)
+    float2 result[3] = { (float2)0, (float2)0, (float2)0 };
    {
        result = *optr + _a * result;
        *optr = _gain * result;
    }
 }
 //
 // end of _spatiotemporalLPfilter
 /////////////////////////////////////////////////////////////////////
-//////////////// horizontalAnticausalFilter_Irregular ////////////////
+    optr[0] = output + gid + out_offset / 4;
-kernel void horizontalAnticausalFilter_Irregular(
+    optr[1] = output + gid + out_offset / 4 + rows * elements_per_row;
-    global float * output,
+    optr[2] = output + gid + out_offset / 4 + 2 * rows * elements_per_row;
-    global float * buffer,
+
-    const int cols,
+    for(int i = 0; i < rows; ++i)
    const int rows,
    const int elements_per_row,
    const int out_offset,
    const int buffer_offset
 )
 {
    int gid = get_global_id(0);
    if(gid >= rows)
    {
-        return;
+        input[0] = vload2(0, optr[0]);
-    }
+        input[1] = vload2(0, optr[1]);
        input[2] = vload2(0, optr[2]);
-    global float * optr =
+        result[0] = input[0] + _a * result[0];
-        output + mad24(rows - gid, elements_per_row, -1 + out_offset / 4);
+        result[1] = input[1] + _a * result[1];
-    global float * bptr =
+        result[2] = input[2] + _a * result[2];
        buffer + mad24(rows - gid, elements_per_row, -1 + buffer_offset / 4);
-    float4 buf_v4, out_v4, res_v4 = (float4)(0);
+        vstore2(result[0], 0, optr[0]);
-    float result = 0;
+        vstore2(result[1], 0, optr[1]);
-    // we assume elements_per_row is multple of WIDTH_MULTIPLE
+        vstore2(result[2], 0, optr[2]);
-    for(int i = 0; i < WIDTH_MULTIPLE; ++ i, -- optr, -- bptr)
+
-    {
+        optr[0] += elements_per_row;
-        if(i >= elements_per_row - cols)
+        optr[1] += elements_per_row;
-        {
+        optr[2] += elements_per_row;
            result = *optr + *bptr * result;
        }
        *optr = result;
    }
-    res_v4.x = result;
+
-    optr -= 3;
+    optr[0] = output + (rows - 1) * elements_per_row + gid + out_offset / 4;
-    bptr -= 3;
+    optr[1] = output + (rows - 1) * elements_per_row + gid + out_offset / 4 + rows * elements_per_row;
-    for(int i = WIDTH_MULTIPLE / 4; i < elements_per_row / 4; ++i, optr -= 4, bptr -= 4)
+    optr[2] = output + (rows - 1) * elements_per_row + gid + out_offset / 4 + 2 * rows * elements_per_row;
    result[0] = result[1] = result[2] = (float2)0;
    for(int i = 0; i < rows; ++i)
    {
-        buf_v4 = vload4(0, bptr);
+        input[0] = vload2(0, optr[0]);
-        out_v4 = vload4(0, optr);
+        input[1] = vload2(0, optr[1]);
        input[2] = vload2(0, optr[2]);
-        res_v4.w = out_v4.w + buf_v4.w * res_v4.x;
+        result[0] = input[0] + _a * result[0];
-        res_v4.z = out_v4.z + buf_v4.z * res_v4.w;
+        result[1] = input[1] + _a * result[1];
-        res_v4.y = out_v4.y + buf_v4.y * res_v4.z;
+        result[2] = input[2] + _a * result[2];
        res_v4.x = out_v4.x + buf_v4.x * res_v4.y;
-        vstore4(res_v4, 0, optr);
+        vstore2(_gain * result[0], 0, optr[0]);
        vstore2(_gain * result[1], 0, optr[1]);
        vstore2(_gain * result[2], 0, optr[2]);
        optr[0] -= elements_per_row;
        optr[1] -= elements_per_row;
        optr[2] -= elements_per_row;
    }
 }
 //
 // end of _spatiotemporalLPfilter
 /////////////////////////////////////////////////////////////////////
 //////////////// verticalCausalFilter_Irregular ////////////////
 //////////////// verticalCausalFilter_Irregular ////////////////
 kernel void verticalCausalFilter_Irregular(
    global float * output,
@ -256,22 +225,61 @@ kernel void verticalCausalFilter_Irregular(
    const int rows,
    const int elements_per_row,
    const int out_offset,
-    const int buffer_offset
+    const int buffer_offset,
    const float gain
 )
 {
-    int gid = get_global_id(0);
+    int gid = get_global_id(0) * 2;
    if(gid >= cols)
    {
        return;
    }
-    global float * optr = output + gid + out_offset / 4;
+    global float * optr[3];
    global float * bptr = buffer + gid + buffer_offset / 4;
-    float result = 0;
+    float2 result[3] = { (float2)0, (float2)0, (float2)0 };
-    for(int i = 0; i < rows; ++i, optr += elements_per_row, bptr += elements_per_row)
+    float2 grad, input[3];
-    {
+    optr[0] = output + gid + out_offset / 4;
-        result = *optr + *bptr * result;
+    optr[1] = output + gid + out_offset / 4 + rows * elements_per_row;
-        *optr = result;
+    optr[2] = output + gid + out_offset / 4 + 2 * rows * elements_per_row;
    for(int i = 0; i < rows; ++i, bptr += elements_per_row)
    {
        input[0] = vload2(0, optr[0]);
        input[1] = vload2(0, optr[1]);
        input[2] = vload2(0, optr[2]);
        grad = vload2(0, bptr);
        result[0] = input[0] + grad * result[0];
        result[1] = input[1] + grad * result[1];
        result[2] = input[2] + grad * result[2];
        vstore2(result[0], 0, optr[0]);
        vstore2(result[1], 0, optr[1]);
        vstore2(result[2], 0, optr[2]);
        optr[0] += elements_per_row;
        optr[1] += elements_per_row;
        optr[2] += elements_per_row;
    }
    int start_idx = mad24(rows - 1, elements_per_row, gid);
    optr[0] = output + start_idx + out_offset / 4;
    optr[1] = output + start_idx + out_offset / 4 + rows * elements_per_row;
    optr[2] = output + start_idx + out_offset / 4 + 2 * rows * elements_per_row;
    bptr = buffer + start_idx + buffer_offset / 4;
    result[0] = result[1] = result[2] = (float2)0;
    for(int i = 0; i < rows; ++i, bptr -= elements_per_row)
    {
        input[0] = vload2(0, optr[0]);
        input[1] = vload2(0, optr[1]);
        input[2] = vload2(0, optr[2]);
        grad = vload2(0, bptr);
        result[0] = input[0] + grad * result[0];
        result[1] = input[1] + grad * result[1];
        result[2] = input[2] + grad * result[2];
        vstore2(gain * result[0], 0, optr[0]);
        vstore2(gain * result[1], 0, optr[1]);
        vstore2(gain * result[2], 0, optr[2]);
        optr[0] -= elements_per_row;
        optr[1] -= elements_per_row;
        optr[2] -= elements_per_row;
    }
 }
@ -314,41 +322,22 @@ kernel void adaptiveHorizontalCausalFilter_addInput(
        vstore4(res_v4, 0, optr);
    }
    for(int i = 0; i < cols % 4; ++i, ++iptr, ++gptr, ++optr)
    {
        res_v4.w = *iptr + *gptr * res_v4.w;
        *optr = res_v4.w;
    }
 }
-//////////////// _adaptiveVerticalAnticausalFilter_multGain ////////////////
+    optr = output + mad24(gid + 1, elements_per_row, -4 + out_offset / 4);
-kernel void adaptiveVerticalAnticausalFilter_multGain(
+    gptr = gradient + mad24(gid + 1, elements_per_row, -4 + grad_offset / 4);
-    global const float * gradient,
+    res_v4 = (float4)(0);
    global float * output,
    const int cols,
    const int rows,
    const int elements_per_row,
    const int grad_offset,
    const int out_offset,
    const float gain
 )
 {
    int gid = get_global_id(0);
    if(gid >= cols)
    {
        return;
    }
-    int start_idx = mad24(rows - 1, elements_per_row, gid);
+    for(int i = 0; i < cols / 4; ++i, gptr -= 4, optr -= 4)
    {
        grad_v4 = vload4(0, gptr);
        out_v4 = vload4(0, optr);
-    global const float * gptr = gradient + start_idx + grad_offset / 4;
+        res_v4.w = out_v4.w + grad_v4.w * res_v4.x;
-    global float * optr = output + start_idx + out_offset / 4;
+        res_v4.z = out_v4.z + grad_v4.z * res_v4.w;
        res_v4.y = out_v4.y + grad_v4.y * res_v4.z;
        res_v4.x = out_v4.x + grad_v4.x * res_v4.y;
-    float result = 0;
+        vstore4(res_v4, 0, optr);
    for(int i = 0; i < rows; ++i, gptr -= elements_per_row, optr -= elements_per_row)
    {
        result = *optr + *gptr * result;
        *optr = gain * result;
    }
 }
@ -367,17 +356,18 @@ kernel void localLuminanceAdaptation(
    const float _maxInputValue
 )
 {
-    int gidx = get_global_id(0), gidy = get_global_id(1);
+    int gidx = get_global_id(0) * 4, gidy = get_global_id(1);
    if(gidx >= cols || gidy >= rows)
    {
        return;
    }
    int offset = mad24(gidy, elements_per_row, gidx);
-
+    float4 luma_vec = vload4(0, luma + offset);
-    float X0 = luma[offset] * _localLuminanceFactor + _localLuminanceAddon;
+    float4 X0 = luma_vec * _localLuminanceFactor + _localLuminanceAddon;
-    float input_val = input[offset];
+    float4 input_val = vload4(0, input + offset);
    // output of the following line may be different between GPU and CPU
-    output[offset] = (_maxInputValue + X0) * input_val / (input_val + X0 + 0.00000000001f);
+    float4 out_vec = (_maxInputValue + X0) * input_val / (input_val + X0 + 0.00000000001f);
    vstore4(out_vec, 0, output + offset);
 }
 // end of basicretinafilter
 //------------------------------------------------------
@ -403,7 +393,7 @@ kernel void amacrineCellsComputing(
    const float coeff
 )
 {
-    int gidx = get_global_id(0), gidy = get_global_id(1);
+    int gidx = get_global_id(0) * 4, gidy = get_global_id(1);
    if(gidx >= cols || gidy >= rows)
    {
        return;
@ -417,13 +407,16 @@ kernel void amacrineCellsComputing(
    out_on      += offset;
    out_off     += offset;
-    float magnoXonPixelResult = coeff * (*out_on + *opl_on - *prev_in_on);
+    float4 val_opl_on = vload4(0, opl_on);
-    *out_on = fmax(magnoXonPixelResult, 0);
+    float4 val_opl_off = vload4(0, opl_off);
    float magnoXoffPixelResult = coeff * (*out_off + *opl_off - *prev_in_off);
    *out_off = fmax(magnoXoffPixelResult, 0);
-    *prev_in_on = *opl_on;
+    float4 magnoXonPixelResult = coeff * (vload4(0, out_on) + val_opl_on - vload4(0, prev_in_on));
-    *prev_in_off = *opl_off;
+    vstore4(fmax(magnoXonPixelResult, 0), 0, out_on);
    float4 magnoXoffPixelResult = coeff * (vload4(0, out_off) + val_opl_off - vload4(0, prev_in_off));
    vstore4(fmax(magnoXoffPixelResult, 0), 0, out_off);
    vstore4(val_opl_on, 0, prev_in_on);
    vstore4(val_opl_off, 0, prev_in_off);
 }
 /////////////////////////////////////////////////////////
@ -457,11 +450,7 @@ kernel void OPL_OnOffWaysComputing(
    parvo_off += offset;
    float4 diff = *photo_out - *horiz_out;
-    float4 isPositive;// = convert_float4(diff > (float4)(0.0f, 0.0f, 0.0f, 0.0f));
+    float4 isPositive = convert_float4(abs(diff > (float4)0.0f));
    isPositive.x = diff.x > 0.0f;
    isPositive.y = diff.y > 0.0f;
    isPositive.z = diff.z > 0.0f;
    isPositive.w = diff.w > 0.0f;
    float4 res_on  = isPositive * diff;
    float4 res_off = (isPositive - (float4)(1.0f)) * diff;
@ -491,14 +480,19 @@ kernel void runColorMultiplexingBayer(
    const int elements_per_row
 )
 {
-    int gidx = get_global_id(0), gidy = get_global_id(1);
+    int gidx = get_global_id(0) * 4, gidy = get_global_id(1);
    if(gidx >= cols || gidy >= rows)
    {
        return;
    }
    int offset = mad24(gidy, elements_per_row, gidx);
-    output[offset] = input[bayerSampleOffset(elements_per_row, rows, gidx, gidy)];
+    float4 val;
    val.x = input[bayerSampleOffset(elements_per_row, rows, gidx + 0, gidy)];
    val.y = input[bayerSampleOffset(elements_per_row, rows, gidx + 1, gidy)];
    val.z = input[bayerSampleOffset(elements_per_row, rows, gidx + 2, gidy)];
    val.w = input[bayerSampleOffset(elements_per_row, rows, gidx + 3, gidy)];
    vstore4(val, 0, output + offset);
 }
 kernel void runColorDemultiplexingBayer(
@ -509,14 +503,18 @@ kernel void runColorDemultiplexingBayer(
    const int elements_per_row
 )
 {
-    int gidx = get_global_id(0), gidy = get_global_id(1);
+    int gidx = get_global_id(0) * 4, gidy = get_global_id(1);
    if(gidx >= cols || gidy >= rows)
    {
        return;
    }
    int offset = mad24(gidy, elements_per_row, gidx);
-    output[bayerSampleOffset(elements_per_row, rows, gidx, gidy)] = input[offset];
+    float4 val = vload4(0, input + offset);
    output[bayerSampleOffset(elements_per_row, rows, gidx + 0, gidy)] = val.x;
    output[bayerSampleOffset(elements_per_row, rows, gidx + 1, gidy)] = val.y;
    output[bayerSampleOffset(elements_per_row, rows, gidx + 2, gidy)] = val.z;
    output[bayerSampleOffset(elements_per_row, rows, gidx + 3, gidy)] = val.w;
 }
 kernel void demultiplexAssign(
@ -550,16 +548,16 @@ kernel void normalizeGrayOutputCentredSigmoide(
 )
 {
-    int gidx = get_global_id(0), gidy = get_global_id(1);
+    int gidx = get_global_id(0) * 4, gidy = get_global_id(1);
    if(gidx >= cols || gidy >= rows)
    {
        return;
    }
    int offset = mad24(gidy, elements_per_row, gidx);
-    float input_val = input[offset];
+    float4 input_val = vload4(0, input + offset);
-    output[offset] = meanval +
+    input_val =  meanval + (meanval + X0) * (input_val - meanval) / (fabs(input_val - meanval) + X0);
-                     (meanval + X0) * (input_val - meanval) / (fabs(input_val - meanval) + X0);
+    vstore4(input_val, 0, output + offset);
 }
 //// normalize by photoreceptors density
@ -575,7 +573,7 @@ kernel void normalizePhotoDensity(
    const float pG
 )
 {
-    const int gidx = get_global_id(0), gidy = get_global_id(1);
+    const int gidx = get_global_id(0) * 4, gidy = get_global_id(1);
    if(gidx >= cols || gidy >= rows)
    {
        return;
@ -583,16 +581,19 @@ kernel void normalizePhotoDensity(
    const int offset = mad24(gidy, elements_per_row, gidx);
    int index = offset;
-    float Cr = chroma[index] * colorDensity[index];
+    float4 Cr = vload4(0, chroma + index) * vload4(0, colorDensity + index);
    index += elements_per_row * rows;
-    float Cg = chroma[index] * colorDensity[index];
+    float4 Cg = vload4(0, chroma + index) * vload4(0, colorDensity + index);
    index += elements_per_row * rows;
-    float Cb = chroma[index] * colorDensity[index];
+    float4 Cb = vload4(0, chroma + index) * vload4(0, colorDensity + index);
-    const float luma_res = (Cr + Cg + Cb) * pG;
+    const float4 luma_res = (Cr + Cg + Cb) * pG;
-    luma[offset] = luma_res;
+    vstore4(luma_res, 0, luma + offset);
-    demultiplex[bayerSampleOffset(elements_per_row, rows, gidx, gidy)] =
+    float4 res_v4 = vload4(0, multiplex + offset) - luma_res;
-        multiplex[offset] - luma_res;
+    demultiplex[bayerSampleOffset(elements_per_row, rows, gidx + 0, gidy)] = res_v4.x;
    demultiplex[bayerSampleOffset(elements_per_row, rows, gidx + 1, gidy)] = res_v4.y;
    demultiplex[bayerSampleOffset(elements_per_row, rows, gidx + 2, gidy)] = res_v4.z;
    demultiplex[bayerSampleOffset(elements_per_row, rows, gidx + 3, gidy)] = res_v4.w;
 }
@ -629,7 +630,8 @@ kernel void computeGradient(
    const float horiz_grad = 0.5f * h_grad + 0.25f * (h_grad_p + h_grad_n);
    const float verti_grad = 0.5f * v_grad + 0.25f * (v_grad_p + v_grad_n);
-    const bool is_vertical_greater = horiz_grad < verti_grad;
+    const bool is_vertical_greater = (horiz_grad < verti_grad) &&
                                     ((verti_grad - horiz_grad) > 1e-5);
    gradient[offset + elements_per_row * rows] = is_vertical_greater ? 0.06f : 0.57f;
    gradient[offset                          ] = is_vertical_greater ? 0.57f : 0.06f;
@ -647,7 +649,7 @@ kernel void substractResidual(
    const float pB
 )
 {
-    const int gidx = get_global_id(0), gidy = get_global_id(1);
+    const int gidx = get_global_id(0) * 4, gidy = get_global_id(1);
    if(gidx >= cols || gidy >= rows)
    {
        return;
@ -658,12 +660,15 @@ kernel void substractResidual(
        mad24(gidy + rows, elements_per_row, gidx),
        mad24(gidy + 2 * rows, elements_per_row, gidx)
    };
-    float vals[3] = {input[indices[0]], input[indices[1]], input[indices[2]]};
+    float4 vals[3];
-    float residu = pR * vals[0] + pG * vals[1] + pB * vals[2];
+    vals[0] = vload4(0, input + indices[0]);
    vals[1] = vload4(0, input + indices[1]);
    vals[2] = vload4(0, input + indices[2]);
-    input[indices[0]] = vals[0] - residu;
+    float4 residu = pR * vals[0] + pG * vals[1] + pB * vals[2];
-    input[indices[1]] = vals[1] - residu;
+    vstore4(vals[0] - residu, 0, input + indices[0]);
-    input[indices[2]] = vals[2] - residu;
+    vstore4(vals[1] - residu, 0, input + indices[1]);
    vstore4(vals[2] - residu, 0, input + indices[2]);
 }
 ///// clipRGBOutput_0_maxInputValue /////
@ -675,15 +680,15 @@ kernel void clipRGBOutput_0_maxInputValue(
    const float maxVal
 )
 {
-    const int gidx = get_global_id(0), gidy = get_global_id(1);
+    const int gidx = get_global_id(0) * 4, gidy = get_global_id(1);
    if(gidx >= cols || gidy >= rows)
    {
        return;
    }
    const int offset = mad24(gidy, elements_per_row, gidx);
-    float val = input[offset];
+    float4 val = vload4(0, input + offset);
    val = clamp(val, 0.0f, maxVal);
-    input[offset] = val;
+    vstore4(val, 0, input + offset);
 }
 //// normalizeGrayOutputNearZeroCentreredSigmoide ////
@ -697,15 +702,16 @@ kernel void normalizeGrayOutputNearZeroCentreredSigmoide(
    const float X0cube
 )
 {
-    const int gidx = get_global_id(0), gidy = get_global_id(1);
+    const int gidx = get_global_id(0) * 4, gidy = get_global_id(1);
    if(gidx >= cols || gidy >= rows)
    {
        return;
    }
    const int offset = mad24(gidy, elements_per_row, gidx);
-    float currentCubeLuminance = input[offset];
+    float4 currentCubeLuminance = vload4(0, input + offset);
    currentCubeLuminance = currentCubeLuminance * currentCubeLuminance * currentCubeLuminance;
-    output[offset] = currentCubeLuminance * X0cube / (X0cube + currentCubeLuminance);
+    float4 val = currentCubeLuminance * X0cube / (X0cube + currentCubeLuminance);
    vstore4(val, 0, output + offset);
 }
 //// centerReductImageLuminance ////
@ -718,15 +724,16 @@ kernel void centerReductImageLuminance(
    const float std_dev
 )
 {
-    const int gidx = get_global_id(0), gidy = get_global_id(1);
+    const int gidx = get_global_id(0) * 4, gidy = get_global_id(1);
    if(gidx >= cols || gidy >= rows)
    {
        return;
    }
    const int offset = mad24(gidy, elements_per_row, gidx);
-    float val = input[offset];
+    float4 val = vload4(0, input + offset);
-    input[offset] = (val - mean) / std_dev;
+    val = (val - mean) / std_dev;
    vstore4(val, 0, input + offset);
 }
 //// inverseValue ////
@ -737,13 +744,15 @@ kernel void inverseValue(
    const int elements_per_row
 )
 {
-    const int gidx = get_global_id(0), gidy = get_global_id(1);
+    const int gidx = get_global_id(0) * 4, gidy = get_global_id(1);
    if(gidx >= cols || gidy >= rows)
    {
        return;
    }
    const int offset = mad24(gidy, elements_per_row, gidx);
-    input[offset] = 1.f / input[offset];
+    float4 val = vload4(0, input + offset);
    val = 1.f / val;
    vstore4(val, 0, input + offset);
 }
 #define CV_PI 3.1415926535897932384626433832795
--- a/modules/bioinspired/src/precomp.hpp
+++ b/modules/bioinspired/src/precomp.hpp
@ -48,13 +48,10 @@
 #include "opencv2/core/utility.hpp"
 #include "opencv2/core/private.hpp"
 #include "opencv2/core/ocl.hpp"
 #include "opencv2/core/opencl/ocl_defs.hpp"
 #include <valarray>
 #ifdef HAVE_OPENCV_OCL
    #include "opencv2/ocl/private/util.hpp"
 #endif
 namespace cv
 {
--- a/modules/bioinspired/src/retina.cpp
+++ b/modules/bioinspired/src/retina.cpp
@ -70,6 +70,7 @@
 */
 #include "precomp.hpp"
 #include "retinafilter.hpp"
 #include "retina_ocl.hpp"
 #include <cstdio>
 #include <sstream>
 #include <valarray>
@ -292,11 +293,25 @@ private:
    bool _convertCvMat2ValarrayBuffer(InputArray inputMatToConvert, std::valarray<float> &outputValarrayMatrix);
 #ifdef HAVE_OPENCL
    ocl::RetinaOCLImpl* _ocl_retina;
    bool ocl_run(InputArray inputImage);
    bool ocl_getParvo(OutputArray retinaOutput_parvo);
    bool ocl_getMagno(OutputArray retinaOutput_magno);
    bool ocl_getParvoRAW(OutputArray retinaOutput_parvo);
    bool ocl_getMagnoRAW(OutputArray retinaOutput_magno);
 #endif
 };
 // smart pointers allocation :
-Ptr<Retina> createRetina(Size inputSize){ return makePtr<RetinaImpl>(inputSize); }
+Ptr<Retina> createRetina(Size inputSize)
-Ptr<Retina> createRetina(Size inputSize, const bool colorMode, int colorSamplingMethod, const bool useRetinaLogSampling, const float reductionFactor, const float samplingStrenght){
+{
    return makePtr<RetinaImpl>(inputSize);
 }
 Ptr<Retina> createRetina(Size inputSize, const bool colorMode, int colorSamplingMethod, const bool useRetinaLogSampling, const float reductionFactor, const float samplingStrenght)
 {
    return makePtr<RetinaImpl>(inputSize, colorMode, colorSamplingMethod, useRetinaLogSampling, reductionFactor, samplingStrenght);
 }
@ -306,18 +321,34 @@ RetinaImpl::RetinaImpl(const cv::Size inputSz)
 {
    _retinaFilter = 0;
    _init(inputSz, true, RETINA_COLOR_BAYER, false);
 #ifdef HAVE_OPENCL
    _ocl_retina = 0;
    if (inputSz.width % 4 == 0)
        _ocl_retina = new ocl::RetinaOCLImpl(inputSz);
 #endif
 }
 RetinaImpl::RetinaImpl(const cv::Size inputSz, const bool colorMode, int colorSamplingMethod, const bool useRetinaLogSampling, const float reductionFactor, const float samplingStrenght)
 {
    _retinaFilter = 0;
    _init(inputSz, colorMode, colorSamplingMethod, useRetinaLogSampling, reductionFactor, samplingStrenght);
 #ifdef HAVE_OPENCL
    _ocl_retina = 0;
    if (inputSz.width % 4 == 0)
        _ocl_retina = new ocl::RetinaOCLImpl(inputSz, colorMode, colorSamplingMethod,
                                             useRetinaLogSampling, reductionFactor, samplingStrenght);
 #endif
 }
 RetinaImpl::~RetinaImpl()
 {
    if (_retinaFilter)
        delete _retinaFilter;
 #ifdef HAVE_OPENCL
    if (_ocl_retina)
        delete _ocl_retina;
 #endif
 }
 /**
@ -529,8 +560,18 @@ void RetinaImpl::setupIPLMagnoChannel(const bool normaliseOutput, const float pa
    _retinaParameters.IplMagno.localAdaptintegration_k = localAdaptintegration_k;
 }
 #ifdef HAVE_OPENCL
 bool RetinaImpl::ocl_run(InputArray inputMatToConvert)
 {
    _ocl_retina->run(inputMatToConvert);
    return true;
 }
 #endif
 void RetinaImpl::run(InputArray inputMatToConvert)
 {
    CV_OCL_RUN((_ocl_retina != 0), ocl_run(inputMatToConvert));
    // first convert input image to the compatible format : std::valarray<float>
    const bool colorMode = _convertCvMat2ValarrayBuffer(inputMatToConvert.getMat(), _inputBuffer);
    // process the retina
@ -559,8 +600,18 @@ void RetinaImpl::applyFastToneMapping(InputArray inputImage, OutputArray outputT
 }
 #ifdef HAVE_OPENCL
 bool RetinaImpl::ocl_getParvo(OutputArray retinaOutput_parvo)
 {
    _ocl_retina->getParvo(retinaOutput_parvo);
    return true;
 }
 #endif
 void RetinaImpl::getParvo(OutputArray retinaOutput_parvo)
 {
    CV_OCL_RUN((_ocl_retina != 0) && retinaOutput_parvo.isUMat(), ocl_getParvo(retinaOutput_parvo));
    if (_retinaFilter->getColorMode())
    {
        // reallocate output buffer (if necessary)
@ -572,24 +623,57 @@ void RetinaImpl::getParvo(OutputArray retinaOutput_parvo)
    }
    //retinaOutput_parvo/=255.0;
 }
 #ifdef HAVE_OPENCL
 bool RetinaImpl::ocl_getMagno(OutputArray retinaOutput_magno)
 {
    _ocl_retina->getMagno(retinaOutput_magno);
    return true;
 }
 #endif
 void RetinaImpl::getMagno(OutputArray retinaOutput_magno)
 {
    CV_OCL_RUN((_ocl_retina != 0) && retinaOutput_magno.isUMat(), ocl_getMagno(retinaOutput_magno));
    // reallocate output buffer (if necessary)
    _convertValarrayBuffer2cvMat(_retinaFilter->getMovingContours(), _retinaFilter->getOutputNBrows(), _retinaFilter->getOutputNBcolumns(), false, retinaOutput_magno);
    //retinaOutput_magno/=255.0;
 }
 #ifdef HAVE_OPENCL
 bool RetinaImpl::ocl_getMagnoRAW(OutputArray magnoOutputBufferCopy)
 {
    _ocl_retina->getMagnoRAW(magnoOutputBufferCopy);
    return true;
 }
 #endif
 // original API level data accessors : copy buffers if size matches, reallocate if required
 void RetinaImpl::getMagnoRAW(OutputArray magnoOutputBufferCopy){
    CV_OCL_RUN((_ocl_retina != 0) && magnoOutputBufferCopy.isUMat(), ocl_getMagnoRAW(magnoOutputBufferCopy));
    // get magno channel header
    const cv::Mat magnoChannel=cv::Mat(getMagnoRAW());
    // copy data
    magnoChannel.copyTo(magnoOutputBufferCopy);
 }
 #ifdef HAVE_OPENCL
 bool RetinaImpl::ocl_getParvoRAW(OutputArray parvoOutputBufferCopy)
 {
    _ocl_retina->getParvoRAW(parvoOutputBufferCopy);
    return true;
 }
 #endif
 void RetinaImpl::getParvoRAW(OutputArray parvoOutputBufferCopy){
    CV_OCL_RUN((_ocl_retina != 0) && parvoOutputBufferCopy.isUMat(), ocl_getParvoRAW(parvoOutputBufferCopy));
    // get parvo channel header
-    const cv::Mat parvoChannel=cv::Mat(getMagnoRAW());
+    const cv::Mat parvoChannel=cv::Mat(getParvoRAW());
    // copy data
    parvoChannel.copyTo(parvoOutputBufferCopy);
 }
@ -649,7 +733,7 @@ void RetinaImpl::_convertValarrayBuffer2cvMat(const std::valarray<float> &grayMa
            for (unsigned int j=0;j<nbColumns;++j)
            {
                cv::Point2d pixel(j,i);
-                outMat.at<unsigned char>(pixel)=(unsigned char)*(valarrayPTR++);
+                outMat.at<unsigned char>(pixel)=(unsigned char)cvRound(*(valarrayPTR++));
            }
        }
    }
@ -665,9 +749,9 @@ void RetinaImpl::_convertValarrayBuffer2cvMat(const std::valarray<float> &grayMa
            {
                cv::Point2d pixel(j,i);
                cv::Vec3b pixelValues;
-                pixelValues[2]=(unsigned char)*(valarrayPTR);
+                pixelValues[2]=(unsigned char)cvRound(*(valarrayPTR));
-                pixelValues[1]=(unsigned char)*(valarrayPTR+nbPixels);
+                pixelValues[1]=(unsigned char)cvRound(*(valarrayPTR+nbPixels));
-                pixelValues[0]=(unsigned char)*(valarrayPTR+doubleNBpixels);
+                pixelValues[0]=(unsigned char)cvRound(*(valarrayPTR+doubleNBpixels));
                outMat.at<cv::Vec3b>(pixel)=pixelValues;
            }
@ -729,7 +813,15 @@ bool RetinaImpl::_convertCvMat2ValarrayBuffer(InputArray inputMat, std::valarray
    return imageNumberOfChannels>1; // return bool : false for gray level image processing, true for color mode
 }
-void RetinaImpl::clearBuffers() { _retinaFilter->clearAllBuffers(); }
+void RetinaImpl::clearBuffers()
 {
 #ifdef HAVE_OPENCL
    if (_ocl_retina != 0)
        _ocl_retina->clearBuffers();
 #endif
    _retinaFilter->clearAllBuffers();
 }
 void RetinaImpl::activateMovingContoursProcessing(const bool activate) { _retinaFilter->activateMovingContoursProcessing(activate); }
--- a/modules/bioinspired/src/retina_ocl.cpp
+++ b/modules/bioinspired/src/retina_ocl.cpp
--- a/modules/bioinspired/src/retina_ocl.hpp
+++ b/modules/bioinspired/src/retina_ocl.hpp
@ -47,8 +47,9 @@
 #define __OCL_RETINA_HPP__
 #include "precomp.hpp"
 #include "opencv2/bioinspired/retina.hpp"
-#ifdef HAVE_OPENCV_OCL
+#ifdef HAVE_OPENCL
 // please refer to c++ headers for API comments
 namespace cv
@ -57,10 +58,10 @@ namespace bioinspired
 {
 namespace ocl
 {
-void normalizeGrayOutputCentredSigmoide(const float meanValue, const float sensitivity, cv::ocl::oclMat &in, cv::ocl::oclMat &out, const float maxValue = 255.f);
+void normalizeGrayOutputCentredSigmoide(const float meanValue, const float sensitivity, UMat &in, UMat &out, const float maxValue = 255.f);
-void normalizeGrayOutput_0_maxOutputValue(cv::ocl::oclMat &inputOutputBuffer, const float maxOutputValue = 255.0);
+void normalizeGrayOutput_0_maxOutputValue(UMat &inputOutputBuffer, const float maxOutputValue = 255.0);
-void normalizeGrayOutputNearZeroCentreredSigmoide(cv::ocl::oclMat &inputPicture, cv::ocl::oclMat &outputBuffer, const float sensitivity = 40, const float maxOutputValue = 255.0f);
+void normalizeGrayOutputNearZeroCentreredSigmoide(UMat &inputPicture, UMat &outputBuffer, const float sensitivity = 40, const float maxOutputValue = 255.0f);
-void centerReductImageLuminance(cv::ocl::oclMat &inputOutputBuffer);
+void centerReductImageLuminance(UMat &inputOutputBuffer);
 class BasicRetinaFilter
 {
@ -81,13 +82,13 @@ public:
        clearSecondaryBuffer();
    }
    void  resize(const unsigned int NBrows, const unsigned int NBcolumns);
-    const cv::ocl::oclMat &runFilter_LPfilter(const cv::ocl::oclMat &inputFrame, const unsigned int filterIndex = 0);
+    const UMat &runFilter_LPfilter(const UMat &inputFrame, const unsigned int filterIndex = 0);
-    void  runFilter_LPfilter(const cv::ocl::oclMat &inputFrame, cv::ocl::oclMat &outputFrame, const unsigned int filterIndex = 0);
+    void  runFilter_LPfilter(const UMat &inputFrame, UMat &outputFrame, const unsigned int filterIndex = 0);
-    void  runFilter_LPfilter_Autonomous(cv::ocl::oclMat &inputOutputFrame, const unsigned int filterIndex = 0);
+    void  runFilter_LPfilter_Autonomous(UMat &inputOutputFrame, const unsigned int filterIndex = 0);
-    const cv::ocl::oclMat &runFilter_LocalAdapdation(const cv::ocl::oclMat &inputOutputFrame, const cv::ocl::oclMat &localLuminance);
+    const UMat &runFilter_LocalAdapdation(const UMat &inputOutputFrame, const UMat &localLuminance);
-    void  runFilter_LocalAdapdation(const cv::ocl::oclMat &inputFrame, const cv::ocl::oclMat &localLuminance, cv::ocl::oclMat &outputFrame);
+    void  runFilter_LocalAdapdation(const UMat &inputFrame, const UMat &localLuminance, UMat &outputFrame);
-    const cv::ocl::oclMat &runFilter_LocalAdapdation_autonomous(const cv::ocl::oclMat &inputFrame);
+    const UMat &runFilter_LocalAdapdation_autonomous(const UMat &inputFrame);
-    void  runFilter_LocalAdapdation_autonomous(const cv::ocl::oclMat &inputFrame, cv::ocl::oclMat &outputFrame);
+    void  runFilter_LocalAdapdation_autonomous(const UMat &inputFrame, UMat &outputFrame);
    void  setLPfilterParameters(const float beta, const float tau, const float k, const unsigned int filterIndex = 0);
    inline void setV0CompressionParameter(const float v0, const float maxInputValue, const float)
    {
@ -122,7 +123,7 @@ public:
    {
        return _v0 / _maxInputValue;
    }
-    inline const cv::ocl::oclMat &getOutput() const
+    inline const UMat &getOutput() const
    {
        return _filterOutput;
    }
@ -166,8 +167,8 @@ protected:
    unsigned int _halfNBrows;
    unsigned int _halfNBcolumns;
-    cv::ocl::oclMat _filterOutput;
+    UMat _filterOutput;
-    cv::ocl::oclMat _localBuffer;
+    UMat _localBuffer;
    std::valarray <float>_filteringCoeficientsTable;
    float _v0;
@ -180,19 +181,19 @@ protected:
    float _tau;
    float _gain;
-    void _spatiotemporalLPfilter(const cv::ocl::oclMat &inputFrame, cv::ocl::oclMat &LPfilterOutput, const unsigned int coefTableOffset = 0);
+    void _spatiotemporalLPfilter(const UMat &inputFrame, UMat &LPfilterOutput, const unsigned int coefTableOffset = 0);
-    float _squaringSpatiotemporalLPfilter(const cv::ocl::oclMat &inputFrame, cv::ocl::oclMat &outputFrame, const unsigned int filterIndex = 0);
+    void _spatiotemporalLPfilter_h(const UMat &inputFrame, UMat &LPfilterOutput, const unsigned int coefTableOffset = 0);
-    void _spatiotemporalLPfilter_Irregular(const cv::ocl::oclMat &inputFrame, cv::ocl::oclMat &outputFrame, const unsigned int filterIndex = 0);
+    void _spatiotemporalLPfilter_v(UMat &LPfilterOutput, const unsigned int multichannel = 0);
-    void _localSquaringSpatioTemporalLPfilter(const cv::ocl::oclMat &inputFrame, cv::ocl::oclMat &LPfilterOutput, const unsigned int *integrationAreas, const unsigned int filterIndex = 0);
+    float _squaringSpatiotemporalLPfilter(const UMat &inputFrame, UMat &outputFrame, const unsigned int filterIndex = 0);
-    void _localLuminanceAdaptation(const cv::ocl::oclMat &inputFrame, const cv::ocl::oclMat &localLuminance, cv::ocl::oclMat &outputFrame, const bool updateLuminanceMean = true);
+    void _spatiotemporalLPfilter_Irregular(const UMat &inputFrame, UMat &outputFrame, const unsigned int filterIndex = 0);
-    void _localLuminanceAdaptation(cv::ocl::oclMat &inputOutputFrame, const cv::ocl::oclMat &localLuminance);
+    void _localSquaringSpatioTemporalLPfilter(const UMat &inputFrame, UMat &LPfilterOutput, const unsigned int *integrationAreas, const unsigned int filterIndex = 0);
-    void _localLuminanceAdaptationPosNegValues(const cv::ocl::oclMat &inputFrame, const cv::ocl::oclMat &localLuminance, float *outputFrame);
+    void _localLuminanceAdaptation(const UMat &inputFrame, const UMat &localLuminance, UMat &outputFrame, const bool updateLuminanceMean = true);
-    void _horizontalCausalFilter_addInput(const cv::ocl::oclMat &inputFrame, cv::ocl::oclMat &outputFrame);
+    void _localLuminanceAdaptation(UMat &inputOutputFrame, const UMat &localLuminance);
-    void _horizontalAnticausalFilter(cv::ocl::oclMat &outputFrame);
+    void _localLuminanceAdaptationPosNegValues(const UMat &inputFrame, const UMat &localLuminance, float *outputFrame);
-    void _verticalCausalFilter(cv::ocl::oclMat &outputFrame);
+    void _horizontalCausalFilter_addInput(const UMat &inputFrame, UMat &outputFrame);
-    void _horizontalAnticausalFilter_Irregular(cv::ocl::oclMat &outputFrame, const cv::ocl::oclMat &spatialConstantBuffer);
+    void _verticalCausalFilter(UMat &outputFrame);
-    void _verticalCausalFilter_Irregular(cv::ocl::oclMat &outputFrame, const cv::ocl::oclMat &spatialConstantBuffer);
+    void _verticalCausalFilter_multichannel(UMat &outputFrame);
-    void _verticalAnticausalFilter_multGain(cv::ocl::oclMat &outputFrame);
+    void _verticalCausalFilter_Irregular(UMat &outputFrame, const UMat &spatialConstantBuffer);
 };
 class MagnoRetinaFilter: public BasicRetinaFilter
@ -204,17 +205,17 @@ public:
    void resize(const unsigned int NBrows, const unsigned int NBcolumns);
    void setCoefficientsTable(const float parasolCells_beta, const float parasolCells_tau, const float parasolCells_k, const float amacrinCellsTemporalCutFrequency, const float localAdaptIntegration_tau, const float localAdaptIntegration_k);
-    const cv::ocl::oclMat &runFilter(const cv::ocl::oclMat &OPL_ON, const cv::ocl::oclMat &OPL_OFF);
+    const UMat &runFilter(const UMat &OPL_ON, const UMat &OPL_OFF);
-    inline const cv::ocl::oclMat &getMagnoON() const
+    inline const UMat &getMagnoON() const
    {
        return _magnoXOutputON;
    }
-    inline const cv::ocl::oclMat &getMagnoOFF() const
+    inline const UMat &getMagnoOFF() const
    {
        return _magnoXOutputOFF;
    }
-    inline const cv::ocl::oclMat &getMagnoYsaturated() const
+    inline const UMat &getMagnoYsaturated() const
    {
        return _magnoYsaturated;
    }
@ -227,19 +228,19 @@ public:
        return this->_filteringCoeficientsTable[2];
    }
 private:
-    cv::ocl::oclMat _previousInput_ON;
+    UMat _previousInput_ON;
-    cv::ocl::oclMat _previousInput_OFF;
+    UMat _previousInput_OFF;
-    cv::ocl::oclMat _amacrinCellsTempOutput_ON;
+    UMat _amacrinCellsTempOutput_ON;
-    cv::ocl::oclMat _amacrinCellsTempOutput_OFF;
+    UMat _amacrinCellsTempOutput_OFF;
-    cv::ocl::oclMat _magnoXOutputON;
+    UMat _magnoXOutputON;
-    cv::ocl::oclMat _magnoXOutputOFF;
+    UMat _magnoXOutputOFF;
-    cv::ocl::oclMat _localProcessBufferON;
+    UMat _localProcessBufferON;
-    cv::ocl::oclMat _localProcessBufferOFF;
+    UMat _localProcessBufferOFF;
-    cv::ocl::oclMat _magnoYOutput;
+    UMat _magnoYOutput;
-    cv::ocl::oclMat _magnoYsaturated;
+    UMat _magnoYsaturated;
    float _temporalCoefficient;
-    void _amacrineCellsComputing(const cv::ocl::oclMat &OPL_ON,  const cv::ocl::oclMat &OPL_OFF);
+    void _amacrineCellsComputing(const UMat &OPL_ON,  const UMat &OPL_OFF);
 };
 class ParvoRetinaFilter: public BasicRetinaFilter
@ -255,34 +256,34 @@ public:
    {
        BasicRetinaFilter::setLPfilterParameters(0, tau, k, 2);
    }
-    const cv::ocl::oclMat &runFilter(const cv::ocl::oclMat &inputFrame, const bool useParvoOutput = true);
+    const UMat &runFilter(const UMat &inputFrame, const bool useParvoOutput = true);
-    inline const cv::ocl::oclMat &getPhotoreceptorsLPfilteringOutput() const
+    inline const UMat &getPhotoreceptorsLPfilteringOutput() const
    {
        return _photoreceptorsOutput;
    }
-    inline const cv::ocl::oclMat &getHorizontalCellsOutput() const
+    inline const UMat &getHorizontalCellsOutput() const
    {
        return _horizontalCellsOutput;
    }
-    inline const cv::ocl::oclMat &getParvoON() const
+    inline const UMat &getParvoON() const
    {
        return _parvocellularOutputON;
    }
-    inline const cv::ocl::oclMat &getParvoOFF() const
+    inline const UMat &getParvoOFF() const
    {
        return _parvocellularOutputOFF;
    }
-    inline const cv::ocl::oclMat &getBipolarCellsON() const
+    inline const UMat &getBipolarCellsON() const
    {
        return _bipolarCellsOutputON;
    }
-    inline const cv::ocl::oclMat &getBipolarCellsOFF() const
+    inline const UMat &getBipolarCellsOFF() const
    {
        return _bipolarCellsOutputOFF;
    }
@ -297,15 +298,15 @@ public:
        return this->_filteringCoeficientsTable[5];
    }
 private:
-    cv::ocl::oclMat _photoreceptorsOutput;
+    UMat _photoreceptorsOutput;
-    cv::ocl::oclMat _horizontalCellsOutput;
+    UMat _horizontalCellsOutput;
-    cv::ocl::oclMat _parvocellularOutputON;
+    UMat _parvocellularOutputON;
-    cv::ocl::oclMat _parvocellularOutputOFF;
+    UMat _parvocellularOutputOFF;
-    cv::ocl::oclMat _bipolarCellsOutputON;
+    UMat _bipolarCellsOutputON;
-    cv::ocl::oclMat _bipolarCellsOutputOFF;
+    UMat _bipolarCellsOutputOFF;
-    cv::ocl::oclMat _localAdaptationOFF;
+    UMat _localAdaptationOFF;
-    cv::ocl::oclMat _localAdaptationON;
+    UMat _localAdaptationON;
-    cv::ocl::oclMat _parvocellularOutputONminusOFF;
+    UMat _parvocellularOutputONminusOFF;
    void _OPL_OnOffWaysComputing();
 };
 class RetinaColor: public BasicRetinaFilter
@ -316,12 +317,12 @@ public:
    void clearAllBuffers();
    void resize(const unsigned int NBrows, const unsigned int NBcolumns);
-    inline void runColorMultiplexing(const cv::ocl::oclMat &inputRGBFrame)
+    inline void runColorMultiplexing(const UMat &inputRGBFrame)
    {
        runColorMultiplexing(inputRGBFrame, _multiplexedFrame);
    }
-    void runColorMultiplexing(const cv::ocl::oclMat &demultiplexedInputFrame, cv::ocl::oclMat &multiplexedFrame);
+    void runColorMultiplexing(const UMat &demultiplexedInputFrame, UMat &multiplexedFrame);
-    void runColorDemultiplexing(const cv::ocl::oclMat &multiplexedColorFrame, const bool adaptiveFiltering = false, const float maxInputValue = 255.0);
+    void runColorDemultiplexing(const UMat &multiplexedColorFrame, const bool adaptiveFiltering = false, const float maxInputValue = 255.0);
    void setColorSaturation(const bool saturateColors = true, const float colorSaturationValue = 4.0)
    {
@ -334,29 +335,29 @@ public:
        setLPfilterParameters(beta, tau, k);
    }
-    bool applyKrauskopfLMS2Acr1cr2Transform(cv::ocl::oclMat &result);
+    bool applyKrauskopfLMS2Acr1cr2Transform(UMat &result);
-    bool applyLMS2LabTransform(cv::ocl::oclMat &result);
+    bool applyLMS2LabTransform(UMat &result);
-    inline const cv::ocl::oclMat &getMultiplexedFrame() const
+    inline const UMat &getMultiplexedFrame() const
    {
        return _multiplexedFrame;
    }
-    inline const cv::ocl::oclMat &getDemultiplexedColorFrame() const
+    inline const UMat &getDemultiplexedColorFrame() const
    {
        return _demultiplexedColorFrame;
    }
-    inline const cv::ocl::oclMat &getLuminance() const
+    inline const UMat &getLuminance() const
    {
        return _luminance;
    }
-    inline const cv::ocl::oclMat &getChrominance() const
+    inline const UMat &getChrominance() const
    {
        return _chrominance;
    }
-    void clipRGBOutput_0_maxInputValue(cv::ocl::oclMat &inputOutputBuffer, const float maxOutputValue = 255.0);
+    void clipRGBOutput_0_maxInputValue(UMat &inputOutputBuffer, const float maxOutputValue = 255.0);
    void normalizeRGBOutput_0_maxOutputValue(const float maxOutputValue = 255.0);
-    inline void setDemultiplexedColorFrame(const cv::ocl::oclMat &demultiplexedImage)
+    inline void setDemultiplexedColorFrame(const UMat &demultiplexedImage)
    {
        _demultiplexedColorFrame = demultiplexedImage;
    }
@ -372,26 +373,26 @@ protected:
    int _samplingMethod;
    bool _saturateColors;
    float _colorSaturationValue;
-    cv::ocl::oclMat _luminance;
+    UMat _luminance;
-    cv::ocl::oclMat _multiplexedFrame;
+    UMat _multiplexedFrame;
-    cv::ocl::oclMat _RGBmosaic;
+    UMat _RGBmosaic;
-    cv::ocl::oclMat _tempMultiplexedFrame;
+    UMat _tempMultiplexedFrame;
-    cv::ocl::oclMat _demultiplexedTempBuffer;
+    UMat _demultiplexedTempBuffer;
-    cv::ocl::oclMat _demultiplexedColorFrame;
+    UMat _demultiplexedColorFrame;
-    cv::ocl::oclMat _chrominance;
+    UMat _chrominance;
-    cv::ocl::oclMat _colorLocalDensity;
+    UMat _colorLocalDensity;
-    cv::ocl::oclMat _imageGradient;
+    UMat _imageGradient;
    float _pR, _pG, _pB;
    bool _objectInit;
    void _initColorSampling();
-    void _adaptiveSpatialLPfilter(const cv::ocl::oclMat &inputFrame, const cv::ocl::oclMat &gradient, cv::ocl::oclMat &outputFrame);
+    void _adaptiveSpatialLPfilter_h(const UMat &inputFrame, const UMat &gradient, UMat &outputFrame);
-    void _adaptiveHorizontalCausalFilter_addInput(const cv::ocl::oclMat &inputFrame, const cv::ocl::oclMat &gradient, cv::ocl::oclMat &outputFrame);
+    void _adaptiveSpatialLPfilter_v(const UMat &gradient, UMat &outputFrame);
-    void _adaptiveVerticalAnticausalFilter_multGain(const cv::ocl::oclMat &gradient, cv::ocl::oclMat &outputFrame);
+    void _adaptiveHorizontalCausalFilter_addInput(const UMat &inputFrame, const UMat &gradient, UMat &outputFrame);
-    void _computeGradient(const cv::ocl::oclMat &luminance, cv::ocl::oclMat &gradient);
+    void _computeGradient(const UMat &luminance, UMat &gradient);
    void _normalizeOutputs_0_maxOutputValue(void);
-    void _applyImageColorSpaceConversion(const cv::ocl::oclMat &inputFrame, cv::ocl::oclMat &outputFrame, const float *transformTable);
+    void _applyImageColorSpaceConversion(const UMat &inputFrame, UMat &outputFrame, const float *transformTable);
 };
 class RetinaFilter
 {
@ -401,8 +402,8 @@ public:
    void clearAllBuffers();
    void resize(const unsigned int NBrows, const unsigned int NBcolumns);
-    bool checkInput(const cv::ocl::oclMat &input, const bool colorMode);
+    bool checkInput(const UMat &input, const bool colorMode);
-    bool runFilter(const cv::ocl::oclMat &imageInput, const bool useAdaptiveFiltering = true, const bool processRetinaParvoMagnoMapping = false, const bool useColorMode = false, const bool inputIsColorMultiplexed = false);
+    bool runFilter(const UMat &imageInput, const bool useAdaptiveFiltering = true, const bool processRetinaParvoMagnoMapping = false, const bool useColorMode = false, const bool inputIsColorMultiplexed = false);
    void setGlobalParameters(const float OPLspatialResponse1 = 0.7, const float OPLtemporalresponse1 = 1, const float OPLassymetryGain = 0, const float OPLspatialResponse2 = 5, const float OPLtemporalresponse2 = 1, const float LPfilterSpatialResponse = 5, const float LPfilterGain = 0, const float LPfilterTemporalresponse = 0, const float MovingContoursExtractorCoefficient = 5, const bool normalizeParvoOutput_0_maxOutputValue = false, const bool normalizeMagnoOutput_0_maxOutputValue = false, const float maxOutputValue = 255.0, const float maxInputValue = 255.0, const float meanValue = 128.0);
@ -467,16 +468,16 @@ public:
    {
        _colorEngine.setColorSaturation(saturateColors, colorSaturationValue);
    }
-    inline const cv::ocl::oclMat &getLocalAdaptation() const
+    inline const UMat &getLocalAdaptation() const
    {
        return _photoreceptorsPrefilter.getOutput();
    }
-    inline const cv::ocl::oclMat &getPhotoreceptors() const
+    inline const UMat &getPhotoreceptors() const
    {
        return _ParvoRetinaFilter.getPhotoreceptorsLPfilteringOutput();
    }
-    inline const cv::ocl::oclMat &getHorizontalCells() const
+    inline const UMat &getHorizontalCells() const
    {
        return _ParvoRetinaFilter.getHorizontalCellsOutput();
    }
@ -484,20 +485,20 @@ public:
    {
        return _useParvoOutput;
    }
-    bool getParvoFoveaResponse(cv::ocl::oclMat &parvoFovealResponse);
+    bool getParvoFoveaResponse(UMat &parvoFovealResponse);
    inline void activateContoursProcessing(const bool useParvoOutput)
    {
        _useParvoOutput = useParvoOutput;
    }
-    const cv::ocl::oclMat &getContours();
+    const UMat &getContours();
-    inline const cv::ocl::oclMat &getContoursON() const
+    inline const UMat &getContoursON() const
    {
        return _ParvoRetinaFilter.getParvoON();
    }
-    inline const cv::ocl::oclMat &getContoursOFF() const
+    inline const UMat &getContoursOFF() const
    {
        return _ParvoRetinaFilter.getParvoOFF();
    }
@ -512,41 +513,41 @@ public:
        _useMagnoOutput = useMagnoOutput;
    }
-    inline const cv::ocl::oclMat &getMovingContours() const
+    inline const UMat &getMovingContours() const
    {
        return _MagnoRetinaFilter.getOutput();
    }
-    inline const cv::ocl::oclMat &getMovingContoursSaturated() const
+    inline const UMat &getMovingContoursSaturated() const
    {
        return _MagnoRetinaFilter.getMagnoYsaturated();
    }
-    inline const cv::ocl::oclMat &getMovingContoursON() const
+    inline const UMat &getMovingContoursON() const
    {
        return _MagnoRetinaFilter.getMagnoON();
    }
-    inline const cv::ocl::oclMat &getMovingContoursOFF() const
+    inline const UMat &getMovingContoursOFF() const
    {
        return _MagnoRetinaFilter.getMagnoOFF();
    }
-    inline const cv::ocl::oclMat &getRetinaParvoMagnoMappedOutput() const
+    inline const UMat &getRetinaParvoMagnoMappedOutput() const
    {
        return _retinaParvoMagnoMappedFrame;
    }
-    inline const cv::ocl::oclMat &getParvoContoursChannel() const
+    inline const UMat &getParvoContoursChannel() const
    {
        return _colorEngine.getLuminance();
    }
-    inline const cv::ocl::oclMat &getParvoChrominance() const
+    inline const UMat &getParvoChrominance() const
    {
        return _colorEngine.getChrominance();
    }
-    inline const cv::ocl::oclMat &getColorOutput() const
+    inline const UMat &getColorOutput() const
    {
        return _colorEngine.getDemultiplexedColorFrame();
    }
@ -609,7 +610,7 @@ private:
    unsigned int _ellapsedFramesSinceLastReset;
    unsigned int _globalTemporalConstant;
-    cv::ocl::oclMat _retinaParvoMagnoMappedFrame;
+    UMat _retinaParvoMagnoMappedFrame;
    BasicRetinaFilter _photoreceptorsPrefilter;
    ParvoRetinaFilter _ParvoRetinaFilter;
    MagnoRetinaFilter _MagnoRetinaFilter;
@ -623,12 +624,60 @@ private:
    void _setInitPeriodCount();
    void _processRetinaParvoMagnoMapping();
-    void _runGrayToneMapping(const cv::ocl::oclMat &grayImageInput, cv::ocl::oclMat &grayImageOutput , const float PhotoreceptorsCompression = 0.6, const float ganglionCellsCompression = 0.6);
+    void _runGrayToneMapping(const UMat &grayImageInput, UMat &grayImageOutput , const float PhotoreceptorsCompression = 0.6, const float ganglionCellsCompression = 0.6);
 };
 class RetinaOCLImpl : public Retina
 {
 public:
    RetinaOCLImpl(Size getInputSize);
    RetinaOCLImpl(Size getInputSize, const bool colorMode, int colorSamplingMethod = RETINA_COLOR_BAYER, const bool useRetinaLogSampling = false, const double reductionFactor = 1.0, const double samplingStrenght = 10.0);
    virtual ~RetinaOCLImpl();
    Size getInputSize();
    Size getOutputSize();
    void setup(String retinaParameterFile = "", const bool applyDefaultSetupOnFailure = true);
    void setup(cv::FileStorage &fs, const bool applyDefaultSetupOnFailure = true);
    void setup(RetinaParameters newParameters);
    RetinaParameters getParameters();
    const String printSetup();
    virtual void write(String fs) const;
    virtual void write(FileStorage& fs) const;
    void setupOPLandIPLParvoChannel(const bool colorMode = true, const bool normaliseOutput = true, const float photoreceptorsLocalAdaptationSensitivity = 0.7, const float photoreceptorsTemporalConstant = 0.5, const float photoreceptorsSpatialConstant = 0.53, const float horizontalCellsGain = 0, const float HcellsTemporalConstant = 1, const float HcellsSpatialConstant = 7, const float ganglionCellsSensitivity = 0.7);
    void setupIPLMagnoChannel(const bool normaliseOutput = true, const float parasolCells_beta = 0, const float parasolCells_tau = 0, const float parasolCells_k = 7, const float amacrinCellsTemporalCutFrequency = 1.2, const float V0CompressionParameter = 0.95, const float localAdaptintegration_tau = 0, const float localAdaptintegration_k = 7);
    void run(InputArray inputImage);
    void getParvo(OutputArray retinaOutput_parvo);
    void getMagno(OutputArray retinaOutput_magno);
    void setColorSaturation(const bool saturateColors = true, const float colorSaturationValue = 4.0);
    void clearBuffers();
    void activateMovingContoursProcessing(const bool activate);
    void activateContoursProcessing(const bool activate);
    // unimplemented interfaces:
    void applyFastToneMapping(InputArray /*inputImage*/, OutputArray /*outputToneMappedImage*/);
    void getParvoRAW(OutputArray /*retinaOutput_parvo*/);
    void getMagnoRAW(OutputArray /*retinaOutput_magno*/);
    const Mat getMagnoRAW() const;
    const Mat getParvoRAW() const;
 protected:
    RetinaParameters _retinaParameters;
    UMat _inputBuffer;
    RetinaFilter* _retinaFilter;
    bool convertToColorPlanes(const UMat& input, UMat &output);
    void convertToInterleaved(const UMat& input, bool colorMode, UMat &output);
    void _init(const Size getInputSize, const bool colorMode, int colorSamplingMethod = RETINA_COLOR_BAYER, const bool useRetinaLogSampling = false, const double reductionFactor = 1.0, const double samplingStrenght = 10.0);
 };
 }  /* namespace ocl */
 }  /* namespace bioinspired */
 }  /* namespace cv */
-#endif  /* HAVE_OPENCV_OCL */
+#endif  /* HAVE_OPENCL */
 #endif  /* __OCL_RETINA_HPP__ */
--- a/modules/bioinspired/test/test_retina_ocl.cpp
+++ b/modules/bioinspired/test/test_retina_ocl.cpp
@ -44,87 +44,39 @@
 //M*/
 #include "test_precomp.hpp"
-#include "opencv2/opencv_modules.hpp"
+#include "opencv2/ts/ocl_test.hpp"
 #include "opencv2/bioinspired.hpp"
 #include "opencv2/imgproc.hpp"
 #include "opencv2/highgui.hpp"
-#include "opencv2/core/ocl.hpp" // cv::ocl::haveOpenCL
+#ifdef HAVE_OPENCL
 #if defined(HAVE_OPENCV_OCL)
 #include "opencv2/ocl.hpp"
 #define RETINA_ITERATIONS 5
-static double checkNear(const cv::Mat &m1, const cv::Mat &m2)
+namespace cvtest {
-{
+namespace ocl {
    return cv::norm(m1, m2, cv::NORM_INF);
 }
 #define PARAM_TEST_CASE(name, ...) struct name : testing::TestWithParam< std::tr1::tuple< __VA_ARGS__ > >
 #define GET_PARAM(k) std::tr1::get< k >(GetParam())
 static int oclInit = false;
 static int oclAvailable = false;
 PARAM_TEST_CASE(Retina_OCL, bool, int, bool, double, double)
 {
    bool colorMode;
    int colorSamplingMethod;
    bool useLogSampling;
-    double reductionFactor;
+    float reductionFactor;
-    double samplingStrength;
+    float samplingStrength;
    virtual void SetUp()
    {
        colorMode           = GET_PARAM(0);
        colorSamplingMethod = GET_PARAM(1);
        useLogSampling      = GET_PARAM(2);
-        reductionFactor     = GET_PARAM(3);
+        reductionFactor     = static_cast<float>(GET_PARAM(3));
-        samplingStrength    = GET_PARAM(4);
+        samplingStrength    = static_cast<float>(GET_PARAM(4));
        if (!oclInit)
        {
            if (cv::ocl::haveOpenCL())
            {
                try
                {
                    const cv::ocl::DeviceInfo& dev = cv::ocl::Context::getContext()->getDeviceInfo();
                    std::cout << "Device name:" << dev.deviceName << std::endl;
                    oclAvailable = true;
                }
                catch (...)
                {
                    std::cout << "Device name: N/A" << std::endl;
                }
            }
            oclInit = true;
        }
    }
 };
-TEST_P(Retina_OCL, Accuracy)
+OCL_TEST_P(Retina_OCL, Accuracy)
 {
    if (!oclAvailable)
    {
        std::cout << "SKIP test" << std::endl;
        return;
    }
    using namespace cv;
    Mat input = imread(cvtest::TS::ptr()->get_data_path() + "shared/lena.png", colorMode);
    CV_Assert(!input.empty());
    ocl::oclMat ocl_input(input);
-    Ptr<bioinspired::Retina> ocl_retina = bioinspired::createRetina_OCL(
+    Ptr<bioinspired::Retina> retina = bioinspired::createRetina(
        input.size(),
        colorMode,
        colorSamplingMethod,
        useLogSampling,
        reductionFactor,
        samplingStrength);
    Ptr<bioinspired::Retina> gold_retina = bioinspired::createRetina(
        input.size(),
        colorMode,
        colorSamplingMethod,
@ -134,31 +86,35 @@ TEST_P(Retina_OCL, Accuracy)
    Mat gold_parvo;
    Mat gold_magno;
-    ocl::oclMat ocl_parvo;
+    UMat ocl_parvo;
-    ocl::oclMat ocl_magno;
+    UMat ocl_magno;
    for(int i = 0; i < RETINA_ITERATIONS; i ++)
    {
-        ocl_retina->run(ocl_input);
+        OCL_OFF(retina->run(input));
-        gold_retina->run(input);
+        OCL_OFF(retina->getParvo(gold_parvo));
        OCL_OFF(retina->getMagno(gold_magno));
        OCL_OFF(retina->clearBuffers());
-        gold_retina->getParvo(gold_parvo);
+        OCL_ON(retina->run(input));
-        gold_retina->getMagno(gold_magno);
+        OCL_ON(retina->getParvo(ocl_parvo));
        OCL_ON(retina->getMagno(ocl_magno));
        OCL_ON(retina->clearBuffers());
-        ocl_retina->getParvo(ocl_parvo);
+        int eps = 1;
        ocl_retina->getMagno(ocl_magno);
-        int eps = colorMode ? 2 : 1;
+        EXPECT_MAT_NEAR(gold_parvo, ocl_parvo, eps);
-
+        EXPECT_MAT_NEAR(gold_magno, ocl_magno, eps);
        EXPECT_LE(checkNear(gold_parvo, (Mat)ocl_parvo), eps);
        EXPECT_LE(checkNear(gold_magno, (Mat)ocl_magno), eps);
    }
 }
-INSTANTIATE_TEST_CASE_P(Contrib, Retina_OCL, testing::Combine(
+OCL_INSTANTIATE_TEST_CASE_P(Contrib, Retina_OCL, testing::Combine(
                            testing::Bool(),
                            testing::Values((int)cv::bioinspired::RETINA_COLOR_BAYER),
                            testing::Values(false/*,true*/),
                            testing::Values(1.0, 0.5),
                            testing::Values(10.0, 5.0)));
-#endif
+
 } } // namespace cvtest::ocl
 #endif // HAVE_OPENCL