diff --git a/modules/bioinspired/include/opencv2/bioinspired/retina.hpp b/modules/bioinspired/include/opencv2/bioinspired/retina.hpp index e531614ac..583599ca2 100644 --- a/modules/bioinspired/include/opencv2/bioinspired/retina.hpp +++ b/modules/bioinspired/include/opencv2/bioinspired/retina.hpp @@ -447,11 +447,6 @@ the log scale that is applied */ CV_EXPORTS_W Ptr createRetina(Size inputSize, const bool colorMode, int colorSamplingMethod=RETINA_COLOR_BAYER, const bool useRetinaLogSampling=false, const float reductionFactor=1.0f, const float samplingStrenght=10.0f); -#ifdef HAVE_OPENCV_OCL -Ptr createRetina_OCL(Size inputSize); -Ptr createRetina_OCL(Size inputSize, const bool colorMode, int colorSamplingMethod=RETINA_COLOR_BAYER, const bool useRetinaLogSampling=false, const float reductionFactor=1.0f, const float samplingStrenght=10.0f); -#endif - //! @} //! @} diff --git a/modules/bioinspired/perf/opencl/perf_retina.cpp b/modules/bioinspired/perf/opencl/perf_retina.cpp deleted file mode 100644 index 05c0cfaa8..000000000 --- a/modules/bioinspired/perf/opencl/perf_retina.cpp +++ /dev/null @@ -1,126 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. -// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// @Authors -// Fangfang Bai, fangfang@multicorewareinc.com -// Jin Ma, jin@multicorewareinc.com -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors as is and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "perf_precomp.hpp" - -#include "opencv2/imgproc.hpp" -#include "opencv2/highgui.hpp" -#include "opencv2/core/ocl.hpp" - -#ifdef HAVE_OPENCV_OCL - -#include "opencv2/ocl.hpp" - -using namespace std::tr1; -using namespace cv; -using namespace perf; - -namespace cvtest { -namespace ocl { - -///////////////////////// Retina //////////////////////// - -typedef tuple RetinaParams; -typedef TestBaseWithParam RetinaFixture; - -#define OCL_TEST_CYCLE() for(; startTimer(), next(); cv::ocl::finish(), stopTimer()) - -PERF_TEST_P(RetinaFixture, Retina, - ::testing::Combine(testing::Bool(), testing::Values((int)cv::bioinspired::RETINA_COLOR_BAYER), - testing::Values(1.0, 0.5), testing::Values(10.0, 5.0))) -{ - if (!cv::ocl::haveOpenCL()) - throw TestBase::PerfSkipTestException(); - - RetinaParams params = GetParam(); - bool colorMode = get<0>(params), useLogSampling = false; - int colorSamplingMethod = get<1>(params); - double reductionFactor = get<2>(params), samplingStrength = get<3>(params); - - Mat input = cv::imread(cvtest::TS::ptr()->get_data_path() + "shared/lena.png", colorMode); - ASSERT_FALSE(input.empty()); - - Mat gold_parvo, gold_magno; - - if (getSelectedImpl() == "plain") - { - Ptr gold_retina = bioinspired::createRetina( - input.size(), colorMode, colorSamplingMethod, - useLogSampling, reductionFactor, samplingStrength); - - TEST_CYCLE() - { - gold_retina->run(input); - - gold_retina->getParvo(gold_parvo); - gold_retina->getMagno(gold_magno); - } - } - else if (getSelectedImpl() == "ocl") - { - cv::ocl::oclMat ocl_input(input), ocl_parvo, ocl_magno; - - Ptr ocl_retina = cv::bioinspired::createRetina_OCL( - input.size(), colorMode, colorSamplingMethod, useLogSampling, - reductionFactor, samplingStrength); - - OCL_TEST_CYCLE() - { - ocl_retina->run(ocl_input); - - ocl_retina->getParvo(ocl_parvo); - ocl_retina->getMagno(ocl_magno); - } - } - else - CV_TEST_FAIL_NO_IMPL(); - - SANITY_CHECK_NOTHING(); -} - -} } // namespace cvtest::ocl - -#endif // HAVE_OPENCV_OCL diff --git a/modules/bioinspired/perf/opencl/perf_retina.ocl.cpp b/modules/bioinspired/perf/opencl/perf_retina.ocl.cpp new file mode 100644 index 000000000..2fe2cc52c --- /dev/null +++ b/modules/bioinspired/perf/opencl/perf_retina.ocl.cpp @@ -0,0 +1,47 @@ +#include "../perf_precomp.hpp" +#include "opencv2/ts/ocl_perf.hpp" + +using namespace std::tr1; +using namespace cv; +using namespace perf; + +namespace cvtest { +namespace ocl { + +///////////////////////// Retina //////////////////////// + +typedef tuple RetinaParams; +typedef TestBaseWithParam RetinaFixture; + +OCL_PERF_TEST_P(RetinaFixture, Retina, + ::testing::Combine(testing::Bool(), testing::Values((int)cv::bioinspired::RETINA_COLOR_BAYER), + testing::Values(1.0, 0.5), testing::Values(10.0, 5.0))) +{ + RetinaParams params = GetParam(); + bool colorMode = get<0>(params), useLogSampling = false; + int colorSamplingMethod = get<1>(params); + float reductionFactor = static_cast(get<2>(params)); + float samplingStrength = static_cast(get<3>(params)); + + Mat input = imread(getDataPath("cv/shared/lena.png"), colorMode); + ASSERT_FALSE(input.empty()); + + UMat ocl_parvo, ocl_magno; + + { + Ptr retina = cv::bioinspired::createRetina( + input.size(), colorMode, colorSamplingMethod, useLogSampling, + reductionFactor, samplingStrength); + + OCL_TEST_CYCLE() + { + retina->run(input); + retina->getParvo(ocl_parvo); + retina->getMagno(ocl_magno); + } + } + + SANITY_CHECK_NOTHING(); +} + +} } // namespace cvtest::ocl diff --git a/modules/bioinspired/perf/perf_main.cpp b/modules/bioinspired/perf/perf_main.cpp index 10ffdb22b..8a9ed8e6e 100644 --- a/modules/bioinspired/perf/perf_main.cpp +++ b/modules/bioinspired/perf/perf_main.cpp @@ -42,12 +42,4 @@ #include "perf_precomp.hpp" -static const char * impls[] = -{ -#ifdef HAVE_OPENCV_OCL - "ocl", -#endif - "plain" -}; - -CV_PERF_TEST_MAIN_WITH_IMPLS(ocl, impls, ::perf::TestBase::setModulePerformanceStrategy(::perf::PERF_STRATEGY_SIMPLE)) +CV_PERF_TEST_MAIN(bioinspired) diff --git a/modules/bioinspired/src/opencl/retina_kernel.cl b/modules/bioinspired/src/opencl/retina_kernel.cl index 02b7a8320..226e1e398 100644 --- a/modules/bioinspired/src/opencl/retina_kernel.cl +++ b/modules/bioinspired/src/opencl/retina_kernel.cl @@ -75,72 +75,35 @@ kernel void horizontalCausalFilter_addInput( output + mad24(gid, elements_per_row, out_offset / 4); float res; - float4 in_v4, out_v4, res_v4 = (float4)(0); + float4 in_v4, out_v4, sum_v4, res_v4 = (float4)(0); //vectorize to increase throughput for(int i = 0; i < cols / 4; ++i, iptr += 4, optr += 4) { in_v4 = vload4(0, iptr); - out_v4 = vload4(0, optr); + out_v4 = vload4(0, optr) * _tau; + sum_v4 = in_v4 + out_v4; - res_v4.x = in_v4.x + _tau * out_v4.x + _a * res_v4.w; - res_v4.y = in_v4.y + _tau * out_v4.y + _a * res_v4.x; - res_v4.z = in_v4.z + _tau * out_v4.z + _a * res_v4.y; - res_v4.w = in_v4.w + _tau * out_v4.w + _a * res_v4.z; + res_v4.x = sum_v4.x + _a * res_v4.w; + res_v4.y = sum_v4.y + _a * res_v4.x; + res_v4.z = sum_v4.z + _a * res_v4.y; + res_v4.w = sum_v4.w + _a * res_v4.z; vstore4(res_v4, 0, optr); } - res = res_v4.w; - // there may be left some - for(int i = 0; i < cols % 4; ++i, ++iptr, ++optr) - { - res = *iptr + _tau * *optr + _a * res; - *optr = res; - } -} - -//_horizontalAnticausalFilter -kernel void horizontalAnticausalFilter( - global float * output, - const int cols, - const int rows, - const int elements_per_row, - const int out_offset, - const float _a -) -{ - int gid = get_global_id(0); - if(gid >= rows) - { - return; - } - global float * optr = output + - mad24(gid + 1, elements_per_row, - 1 + out_offset / 4); - - float4 result_v4 = (float4)(0), out_v4; - float result = 0; - // we assume elements_per_row is multple of WIDTH_MULTIPLE - for(int i = 0; i < WIDTH_MULTIPLE; ++ i, -- optr) - { - if(i >= elements_per_row - cols) - { - result = *optr + _a * result; - } - *optr = result; - } - result_v4.x = result; - optr -= 3; - for(int i = WIDTH_MULTIPLE / 4; i < elements_per_row / 4; ++i, optr -= 4) + optr = output + mad24(gid + 1, elements_per_row, -4 + out_offset / 4); + res_v4 = (float4)(0); + for(int i = 0; i < elements_per_row / 4; ++i, optr -= 4) { // shift left, `offset` is type `size_t` so it cannot be negative out_v4 = vload4(0, optr); - result_v4.w = out_v4.w + _a * result_v4.x; - result_v4.z = out_v4.z + _a * result_v4.w; - result_v4.y = out_v4.y + _a * result_v4.z; - result_v4.x = out_v4.x + _a * result_v4.y; + res_v4.w = out_v4.w + _a * res_v4.x; + res_v4.z = out_v4.z + _a * res_v4.w; + res_v4.y = out_v4.y + _a * res_v4.z; + res_v4.x = out_v4.x + _a * res_v4.y; - vstore4(result_v4, 0, optr); + vstore4(res_v4, 0, optr); } } @@ -151,26 +114,37 @@ kernel void verticalCausalFilter( const int rows, const int elements_per_row, const int out_offset, - const float _a + const float _a, + const float _gain ) { - int gid = get_global_id(0); + int gid = get_global_id(0) * 2; if(gid >= cols) { return; } global float * optr = output + gid + out_offset / 4; - float result = 0; + float2 input; + float2 result = (float2)0; for(int i = 0; i < rows; ++i, optr += elements_per_row) { - result = *optr + _a * result; - *optr = result; + input = vload2(0, optr); + result = input + _a * result; + vstore2(result, 0, optr); + } + + optr = output + (rows - 1) * elements_per_row + gid + out_offset / 4; + result = (float2)0; + for(int i = 0; i < rows; ++i, optr -= elements_per_row) + { + input = vload2(0, optr); + result = input + _a * result; + vstore2(_gain * result, 0, optr); } } -//_verticalCausalFilter -kernel void verticalAnticausalFilter_multGain( +kernel void verticalCausalFilter_multichannel( global float * output, const int cols, const int rows, @@ -180,74 +154,69 @@ kernel void verticalAnticausalFilter_multGain( const float _gain ) { - int gid = get_global_id(0); + int gid = get_global_id(0) * 2; if(gid >= cols) { return; } - global float * optr = output + (rows - 1) * elements_per_row + gid + out_offset / 4; - float result = 0; - for(int i = 0; i < rows; ++i, optr -= elements_per_row) - { - result = *optr + _a * result; - *optr = _gain * result; - } -} -// -// end of _spatiotemporalLPfilter -///////////////////////////////////////////////////////////////////// + global float * optr[3]; + float2 input[3]; + float2 result[3] = { (float2)0, (float2)0, (float2)0 }; -//////////////// horizontalAnticausalFilter_Irregular //////////////// -kernel void horizontalAnticausalFilter_Irregular( - global float * output, - global float * buffer, - const int cols, - const int rows, - const int elements_per_row, - const int out_offset, - const int buffer_offset -) -{ - int gid = get_global_id(0); - if(gid >= rows) - { - return; - } + optr[0] = output + gid + out_offset / 4; + optr[1] = output + gid + out_offset / 4 + rows * elements_per_row; + optr[2] = output + gid + out_offset / 4 + 2 * rows * elements_per_row; - global float * optr = - output + mad24(rows - gid, elements_per_row, -1 + out_offset / 4); - global float * bptr = - buffer + mad24(rows - gid, elements_per_row, -1 + buffer_offset / 4); - - float4 buf_v4, out_v4, res_v4 = (float4)(0); - float result = 0; - // we assume elements_per_row is multple of WIDTH_MULTIPLE - for(int i = 0; i < WIDTH_MULTIPLE; ++ i, -- optr, -- bptr) + for(int i = 0; i < rows; ++i) { - if(i >= elements_per_row - cols) - { - result = *optr + *bptr * result; - } - *optr = result; + input[0] = vload2(0, optr[0]); + input[1] = vload2(0, optr[1]); + input[2] = vload2(0, optr[2]); + + result[0] = input[0] + _a * result[0]; + result[1] = input[1] + _a * result[1]; + result[2] = input[2] + _a * result[2]; + + vstore2(result[0], 0, optr[0]); + vstore2(result[1], 0, optr[1]); + vstore2(result[2], 0, optr[2]); + + optr[0] += elements_per_row; + optr[1] += elements_per_row; + optr[2] += elements_per_row; } - res_v4.x = result; - optr -= 3; - bptr -= 3; - for(int i = WIDTH_MULTIPLE / 4; i < elements_per_row / 4; ++i, optr -= 4, bptr -= 4) + + optr[0] = output + (rows - 1) * elements_per_row + gid + out_offset / 4; + optr[1] = output + (rows - 1) * elements_per_row + gid + out_offset / 4 + rows * elements_per_row; + optr[2] = output + (rows - 1) * elements_per_row + gid + out_offset / 4 + 2 * rows * elements_per_row; + result[0] = result[1] = result[2] = (float2)0; + + for(int i = 0; i < rows; ++i) { - buf_v4 = vload4(0, bptr); - out_v4 = vload4(0, optr); + input[0] = vload2(0, optr[0]); + input[1] = vload2(0, optr[1]); + input[2] = vload2(0, optr[2]); - res_v4.w = out_v4.w + buf_v4.w * res_v4.x; - res_v4.z = out_v4.z + buf_v4.z * res_v4.w; - res_v4.y = out_v4.y + buf_v4.y * res_v4.z; - res_v4.x = out_v4.x + buf_v4.x * res_v4.y; + result[0] = input[0] + _a * result[0]; + result[1] = input[1] + _a * result[1]; + result[2] = input[2] + _a * result[2]; - vstore4(res_v4, 0, optr); + vstore2(_gain * result[0], 0, optr[0]); + vstore2(_gain * result[1], 0, optr[1]); + vstore2(_gain * result[2], 0, optr[2]); + + optr[0] -= elements_per_row; + optr[1] -= elements_per_row; + optr[2] -= elements_per_row; } } +// +// end of _spatiotemporalLPfilter +///////////////////////////////////////////////////////////////////// + +//////////////// verticalCausalFilter_Irregular //////////////// //////////////// verticalCausalFilter_Irregular //////////////// kernel void verticalCausalFilter_Irregular( global float * output, @@ -256,22 +225,61 @@ kernel void verticalCausalFilter_Irregular( const int rows, const int elements_per_row, const int out_offset, - const int buffer_offset + const int buffer_offset, + const float gain ) { - int gid = get_global_id(0); + int gid = get_global_id(0) * 2; if(gid >= cols) { return; } - global float * optr = output + gid + out_offset / 4; + global float * optr[3]; global float * bptr = buffer + gid + buffer_offset / 4; - float result = 0; - for(int i = 0; i < rows; ++i, optr += elements_per_row, bptr += elements_per_row) - { - result = *optr + *bptr * result; - *optr = result; + float2 result[3] = { (float2)0, (float2)0, (float2)0 }; + float2 grad, input[3]; + optr[0] = output + gid + out_offset / 4; + optr[1] = output + gid + out_offset / 4 + rows * elements_per_row; + optr[2] = output + gid + out_offset / 4 + 2 * rows * elements_per_row; + for(int i = 0; i < rows; ++i, bptr += elements_per_row) + { + input[0] = vload2(0, optr[0]); + input[1] = vload2(0, optr[1]); + input[2] = vload2(0, optr[2]); + grad = vload2(0, bptr); + result[0] = input[0] + grad * result[0]; + result[1] = input[1] + grad * result[1]; + result[2] = input[2] + grad * result[2]; + vstore2(result[0], 0, optr[0]); + vstore2(result[1], 0, optr[1]); + vstore2(result[2], 0, optr[2]); + optr[0] += elements_per_row; + optr[1] += elements_per_row; + optr[2] += elements_per_row; + } + + int start_idx = mad24(rows - 1, elements_per_row, gid); + optr[0] = output + start_idx + out_offset / 4; + optr[1] = output + start_idx + out_offset / 4 + rows * elements_per_row; + optr[2] = output + start_idx + out_offset / 4 + 2 * rows * elements_per_row; + bptr = buffer + start_idx + buffer_offset / 4; + result[0] = result[1] = result[2] = (float2)0; + for(int i = 0; i < rows; ++i, bptr -= elements_per_row) + { + input[0] = vload2(0, optr[0]); + input[1] = vload2(0, optr[1]); + input[2] = vload2(0, optr[2]); + grad = vload2(0, bptr); + result[0] = input[0] + grad * result[0]; + result[1] = input[1] + grad * result[1]; + result[2] = input[2] + grad * result[2]; + vstore2(gain * result[0], 0, optr[0]); + vstore2(gain * result[1], 0, optr[1]); + vstore2(gain * result[2], 0, optr[2]); + optr[0] -= elements_per_row; + optr[1] -= elements_per_row; + optr[2] -= elements_per_row; } } @@ -314,41 +322,22 @@ kernel void adaptiveHorizontalCausalFilter_addInput( vstore4(res_v4, 0, optr); } - for(int i = 0; i < cols % 4; ++i, ++iptr, ++gptr, ++optr) - { - res_v4.w = *iptr + *gptr * res_v4.w; - *optr = res_v4.w; - } -} -//////////////// _adaptiveVerticalAnticausalFilter_multGain //////////////// -kernel void adaptiveVerticalAnticausalFilter_multGain( - global const float * gradient, - global float * output, - const int cols, - const int rows, - const int elements_per_row, - const int grad_offset, - const int out_offset, - const float gain -) -{ - int gid = get_global_id(0); - if(gid >= cols) - { - return; - } + optr = output + mad24(gid + 1, elements_per_row, -4 + out_offset / 4); + gptr = gradient + mad24(gid + 1, elements_per_row, -4 + grad_offset / 4); + res_v4 = (float4)(0); - int start_idx = mad24(rows - 1, elements_per_row, gid); + for(int i = 0; i < cols / 4; ++i, gptr -= 4, optr -= 4) + { + grad_v4 = vload4(0, gptr); + out_v4 = vload4(0, optr); - global const float * gptr = gradient + start_idx + grad_offset / 4; - global float * optr = output + start_idx + out_offset / 4; + res_v4.w = out_v4.w + grad_v4.w * res_v4.x; + res_v4.z = out_v4.z + grad_v4.z * res_v4.w; + res_v4.y = out_v4.y + grad_v4.y * res_v4.z; + res_v4.x = out_v4.x + grad_v4.x * res_v4.y; - float result = 0; - for(int i = 0; i < rows; ++i, gptr -= elements_per_row, optr -= elements_per_row) - { - result = *optr + *gptr * result; - *optr = gain * result; + vstore4(res_v4, 0, optr); } } @@ -367,17 +356,18 @@ kernel void localLuminanceAdaptation( const float _maxInputValue ) { - int gidx = get_global_id(0), gidy = get_global_id(1); + int gidx = get_global_id(0) * 4, gidy = get_global_id(1); if(gidx >= cols || gidy >= rows) { return; } int offset = mad24(gidy, elements_per_row, gidx); - - float X0 = luma[offset] * _localLuminanceFactor + _localLuminanceAddon; - float input_val = input[offset]; + float4 luma_vec = vload4(0, luma + offset); + float4 X0 = luma_vec * _localLuminanceFactor + _localLuminanceAddon; + float4 input_val = vload4(0, input + offset); // output of the following line may be different between GPU and CPU - output[offset] = (_maxInputValue + X0) * input_val / (input_val + X0 + 0.00000000001f); + float4 out_vec = (_maxInputValue + X0) * input_val / (input_val + X0 + 0.00000000001f); + vstore4(out_vec, 0, output + offset); } // end of basicretinafilter //------------------------------------------------------ @@ -403,7 +393,7 @@ kernel void amacrineCellsComputing( const float coeff ) { - int gidx = get_global_id(0), gidy = get_global_id(1); + int gidx = get_global_id(0) * 4, gidy = get_global_id(1); if(gidx >= cols || gidy >= rows) { return; @@ -417,13 +407,16 @@ kernel void amacrineCellsComputing( out_on += offset; out_off += offset; - float magnoXonPixelResult = coeff * (*out_on + *opl_on - *prev_in_on); - *out_on = fmax(magnoXonPixelResult, 0); - float magnoXoffPixelResult = coeff * (*out_off + *opl_off - *prev_in_off); - *out_off = fmax(magnoXoffPixelResult, 0); + float4 val_opl_on = vload4(0, opl_on); + float4 val_opl_off = vload4(0, opl_off); - *prev_in_on = *opl_on; - *prev_in_off = *opl_off; + float4 magnoXonPixelResult = coeff * (vload4(0, out_on) + val_opl_on - vload4(0, prev_in_on)); + vstore4(fmax(magnoXonPixelResult, 0), 0, out_on); + float4 magnoXoffPixelResult = coeff * (vload4(0, out_off) + val_opl_off - vload4(0, prev_in_off)); + vstore4(fmax(magnoXoffPixelResult, 0), 0, out_off); + + vstore4(val_opl_on, 0, prev_in_on); + vstore4(val_opl_off, 0, prev_in_off); } ///////////////////////////////////////////////////////// @@ -457,11 +450,7 @@ kernel void OPL_OnOffWaysComputing( parvo_off += offset; float4 diff = *photo_out - *horiz_out; - float4 isPositive;// = convert_float4(diff > (float4)(0.0f, 0.0f, 0.0f, 0.0f)); - isPositive.x = diff.x > 0.0f; - isPositive.y = diff.y > 0.0f; - isPositive.z = diff.z > 0.0f; - isPositive.w = diff.w > 0.0f; + float4 isPositive = convert_float4(abs(diff > (float4)0.0f)); float4 res_on = isPositive * diff; float4 res_off = (isPositive - (float4)(1.0f)) * diff; @@ -491,14 +480,19 @@ kernel void runColorMultiplexingBayer( const int elements_per_row ) { - int gidx = get_global_id(0), gidy = get_global_id(1); + int gidx = get_global_id(0) * 4, gidy = get_global_id(1); if(gidx >= cols || gidy >= rows) { return; } int offset = mad24(gidy, elements_per_row, gidx); - output[offset] = input[bayerSampleOffset(elements_per_row, rows, gidx, gidy)]; + float4 val; + val.x = input[bayerSampleOffset(elements_per_row, rows, gidx + 0, gidy)]; + val.y = input[bayerSampleOffset(elements_per_row, rows, gidx + 1, gidy)]; + val.z = input[bayerSampleOffset(elements_per_row, rows, gidx + 2, gidy)]; + val.w = input[bayerSampleOffset(elements_per_row, rows, gidx + 3, gidy)]; + vstore4(val, 0, output + offset); } kernel void runColorDemultiplexingBayer( @@ -509,14 +503,18 @@ kernel void runColorDemultiplexingBayer( const int elements_per_row ) { - int gidx = get_global_id(0), gidy = get_global_id(1); + int gidx = get_global_id(0) * 4, gidy = get_global_id(1); if(gidx >= cols || gidy >= rows) { return; } int offset = mad24(gidy, elements_per_row, gidx); - output[bayerSampleOffset(elements_per_row, rows, gidx, gidy)] = input[offset]; + float4 val = vload4(0, input + offset); + output[bayerSampleOffset(elements_per_row, rows, gidx + 0, gidy)] = val.x; + output[bayerSampleOffset(elements_per_row, rows, gidx + 1, gidy)] = val.y; + output[bayerSampleOffset(elements_per_row, rows, gidx + 2, gidy)] = val.z; + output[bayerSampleOffset(elements_per_row, rows, gidx + 3, gidy)] = val.w; } kernel void demultiplexAssign( @@ -550,16 +548,16 @@ kernel void normalizeGrayOutputCentredSigmoide( ) { - int gidx = get_global_id(0), gidy = get_global_id(1); + int gidx = get_global_id(0) * 4, gidy = get_global_id(1); if(gidx >= cols || gidy >= rows) { return; } int offset = mad24(gidy, elements_per_row, gidx); - float input_val = input[offset]; - output[offset] = meanval + - (meanval + X0) * (input_val - meanval) / (fabs(input_val - meanval) + X0); + float4 input_val = vload4(0, input + offset); + input_val = meanval + (meanval + X0) * (input_val - meanval) / (fabs(input_val - meanval) + X0); + vstore4(input_val, 0, output + offset); } //// normalize by photoreceptors density @@ -575,7 +573,7 @@ kernel void normalizePhotoDensity( const float pG ) { - const int gidx = get_global_id(0), gidy = get_global_id(1); + const int gidx = get_global_id(0) * 4, gidy = get_global_id(1); if(gidx >= cols || gidy >= rows) { return; @@ -583,16 +581,19 @@ kernel void normalizePhotoDensity( const int offset = mad24(gidy, elements_per_row, gidx); int index = offset; - float Cr = chroma[index] * colorDensity[index]; + float4 Cr = vload4(0, chroma + index) * vload4(0, colorDensity + index); index += elements_per_row * rows; - float Cg = chroma[index] * colorDensity[index]; + float4 Cg = vload4(0, chroma + index) * vload4(0, colorDensity + index); index += elements_per_row * rows; - float Cb = chroma[index] * colorDensity[index]; - - const float luma_res = (Cr + Cg + Cb) * pG; - luma[offset] = luma_res; - demultiplex[bayerSampleOffset(elements_per_row, rows, gidx, gidy)] = - multiplex[offset] - luma_res; + float4 Cb = vload4(0, chroma + index) * vload4(0, colorDensity + index); + + const float4 luma_res = (Cr + Cg + Cb) * pG; + vstore4(luma_res, 0, luma + offset); + float4 res_v4 = vload4(0, multiplex + offset) - luma_res; + demultiplex[bayerSampleOffset(elements_per_row, rows, gidx + 0, gidy)] = res_v4.x; + demultiplex[bayerSampleOffset(elements_per_row, rows, gidx + 1, gidy)] = res_v4.y; + demultiplex[bayerSampleOffset(elements_per_row, rows, gidx + 2, gidy)] = res_v4.z; + demultiplex[bayerSampleOffset(elements_per_row, rows, gidx + 3, gidy)] = res_v4.w; } @@ -629,7 +630,8 @@ kernel void computeGradient( const float horiz_grad = 0.5f * h_grad + 0.25f * (h_grad_p + h_grad_n); const float verti_grad = 0.5f * v_grad + 0.25f * (v_grad_p + v_grad_n); - const bool is_vertical_greater = horiz_grad < verti_grad; + const bool is_vertical_greater = (horiz_grad < verti_grad) && + ((verti_grad - horiz_grad) > 1e-5); gradient[offset + elements_per_row * rows] = is_vertical_greater ? 0.06f : 0.57f; gradient[offset ] = is_vertical_greater ? 0.57f : 0.06f; @@ -647,7 +649,7 @@ kernel void substractResidual( const float pB ) { - const int gidx = get_global_id(0), gidy = get_global_id(1); + const int gidx = get_global_id(0) * 4, gidy = get_global_id(1); if(gidx >= cols || gidy >= rows) { return; @@ -658,12 +660,15 @@ kernel void substractResidual( mad24(gidy + rows, elements_per_row, gidx), mad24(gidy + 2 * rows, elements_per_row, gidx) }; - float vals[3] = {input[indices[0]], input[indices[1]], input[indices[2]]}; - float residu = pR * vals[0] + pG * vals[1] + pB * vals[2]; - - input[indices[0]] = vals[0] - residu; - input[indices[1]] = vals[1] - residu; - input[indices[2]] = vals[2] - residu; + float4 vals[3]; + vals[0] = vload4(0, input + indices[0]); + vals[1] = vload4(0, input + indices[1]); + vals[2] = vload4(0, input + indices[2]); + + float4 residu = pR * vals[0] + pG * vals[1] + pB * vals[2]; + vstore4(vals[0] - residu, 0, input + indices[0]); + vstore4(vals[1] - residu, 0, input + indices[1]); + vstore4(vals[2] - residu, 0, input + indices[2]); } ///// clipRGBOutput_0_maxInputValue ///// @@ -675,15 +680,15 @@ kernel void clipRGBOutput_0_maxInputValue( const float maxVal ) { - const int gidx = get_global_id(0), gidy = get_global_id(1); + const int gidx = get_global_id(0) * 4, gidy = get_global_id(1); if(gidx >= cols || gidy >= rows) { return; } const int offset = mad24(gidy, elements_per_row, gidx); - float val = input[offset]; + float4 val = vload4(0, input + offset); val = clamp(val, 0.0f, maxVal); - input[offset] = val; + vstore4(val, 0, input + offset); } //// normalizeGrayOutputNearZeroCentreredSigmoide //// @@ -697,15 +702,16 @@ kernel void normalizeGrayOutputNearZeroCentreredSigmoide( const float X0cube ) { - const int gidx = get_global_id(0), gidy = get_global_id(1); + const int gidx = get_global_id(0) * 4, gidy = get_global_id(1); if(gidx >= cols || gidy >= rows) { return; } const int offset = mad24(gidy, elements_per_row, gidx); - float currentCubeLuminance = input[offset]; + float4 currentCubeLuminance = vload4(0, input + offset); currentCubeLuminance = currentCubeLuminance * currentCubeLuminance * currentCubeLuminance; - output[offset] = currentCubeLuminance * X0cube / (X0cube + currentCubeLuminance); + float4 val = currentCubeLuminance * X0cube / (X0cube + currentCubeLuminance); + vstore4(val, 0, output + offset); } //// centerReductImageLuminance //// @@ -718,15 +724,16 @@ kernel void centerReductImageLuminance( const float std_dev ) { - const int gidx = get_global_id(0), gidy = get_global_id(1); + const int gidx = get_global_id(0) * 4, gidy = get_global_id(1); if(gidx >= cols || gidy >= rows) { return; } const int offset = mad24(gidy, elements_per_row, gidx); - float val = input[offset]; - input[offset] = (val - mean) / std_dev; + float4 val = vload4(0, input + offset); + val = (val - mean) / std_dev; + vstore4(val, 0, input + offset); } //// inverseValue //// @@ -737,13 +744,15 @@ kernel void inverseValue( const int elements_per_row ) { - const int gidx = get_global_id(0), gidy = get_global_id(1); + const int gidx = get_global_id(0) * 4, gidy = get_global_id(1); if(gidx >= cols || gidy >= rows) { return; } const int offset = mad24(gidy, elements_per_row, gidx); - input[offset] = 1.f / input[offset]; + float4 val = vload4(0, input + offset); + val = 1.f / val; + vstore4(val, 0, input + offset); } #define CV_PI 3.1415926535897932384626433832795 diff --git a/modules/bioinspired/src/precomp.hpp b/modules/bioinspired/src/precomp.hpp index 61aeb5409..d2e19bdef 100644 --- a/modules/bioinspired/src/precomp.hpp +++ b/modules/bioinspired/src/precomp.hpp @@ -48,13 +48,10 @@ #include "opencv2/core/utility.hpp" #include "opencv2/core/private.hpp" #include "opencv2/core/ocl.hpp" +#include "opencv2/core/opencl/ocl_defs.hpp" #include -#ifdef HAVE_OPENCV_OCL - #include "opencv2/ocl/private/util.hpp" -#endif - namespace cv { diff --git a/modules/bioinspired/src/retina.cpp b/modules/bioinspired/src/retina.cpp index 1e518f81b..deb390659 100644 --- a/modules/bioinspired/src/retina.cpp +++ b/modules/bioinspired/src/retina.cpp @@ -70,6 +70,7 @@ */ #include "precomp.hpp" #include "retinafilter.hpp" +#include "retina_ocl.hpp" #include #include #include @@ -292,11 +293,25 @@ private: bool _convertCvMat2ValarrayBuffer(InputArray inputMatToConvert, std::valarray &outputValarrayMatrix); +#ifdef HAVE_OPENCL + ocl::RetinaOCLImpl* _ocl_retina; + + bool ocl_run(InputArray inputImage); + bool ocl_getParvo(OutputArray retinaOutput_parvo); + bool ocl_getMagno(OutputArray retinaOutput_magno); + bool ocl_getParvoRAW(OutputArray retinaOutput_parvo); + bool ocl_getMagnoRAW(OutputArray retinaOutput_magno); +#endif }; // smart pointers allocation : -Ptr createRetina(Size inputSize){ return makePtr(inputSize); } -Ptr createRetina(Size inputSize, const bool colorMode, int colorSamplingMethod, const bool useRetinaLogSampling, const float reductionFactor, const float samplingStrenght){ +Ptr createRetina(Size inputSize) +{ + return makePtr(inputSize); +} + +Ptr createRetina(Size inputSize, const bool colorMode, int colorSamplingMethod, const bool useRetinaLogSampling, const float reductionFactor, const float samplingStrenght) +{ return makePtr(inputSize, colorMode, colorSamplingMethod, useRetinaLogSampling, reductionFactor, samplingStrenght); } @@ -306,18 +321,34 @@ RetinaImpl::RetinaImpl(const cv::Size inputSz) { _retinaFilter = 0; _init(inputSz, true, RETINA_COLOR_BAYER, false); +#ifdef HAVE_OPENCL + _ocl_retina = 0; + if (inputSz.width % 4 == 0) + _ocl_retina = new ocl::RetinaOCLImpl(inputSz); +#endif } RetinaImpl::RetinaImpl(const cv::Size inputSz, const bool colorMode, int colorSamplingMethod, const bool useRetinaLogSampling, const float reductionFactor, const float samplingStrenght) { _retinaFilter = 0; _init(inputSz, colorMode, colorSamplingMethod, useRetinaLogSampling, reductionFactor, samplingStrenght); +#ifdef HAVE_OPENCL + _ocl_retina = 0; + if (inputSz.width % 4 == 0) + _ocl_retina = new ocl::RetinaOCLImpl(inputSz, colorMode, colorSamplingMethod, + useRetinaLogSampling, reductionFactor, samplingStrenght); +#endif } RetinaImpl::~RetinaImpl() { if (_retinaFilter) delete _retinaFilter; + +#ifdef HAVE_OPENCL + if (_ocl_retina) + delete _ocl_retina; +#endif } /** @@ -529,8 +560,18 @@ void RetinaImpl::setupIPLMagnoChannel(const bool normaliseOutput, const float pa _retinaParameters.IplMagno.localAdaptintegration_k = localAdaptintegration_k; } +#ifdef HAVE_OPENCL +bool RetinaImpl::ocl_run(InputArray inputMatToConvert) +{ + _ocl_retina->run(inputMatToConvert); + return true; +} +#endif + void RetinaImpl::run(InputArray inputMatToConvert) { + CV_OCL_RUN((_ocl_retina != 0), ocl_run(inputMatToConvert)); + // first convert input image to the compatible format : std::valarray const bool colorMode = _convertCvMat2ValarrayBuffer(inputMatToConvert.getMat(), _inputBuffer); // process the retina @@ -559,8 +600,18 @@ void RetinaImpl::applyFastToneMapping(InputArray inputImage, OutputArray outputT } +#ifdef HAVE_OPENCL +bool RetinaImpl::ocl_getParvo(OutputArray retinaOutput_parvo) +{ + _ocl_retina->getParvo(retinaOutput_parvo); + return true; +} +#endif + void RetinaImpl::getParvo(OutputArray retinaOutput_parvo) { + CV_OCL_RUN((_ocl_retina != 0) && retinaOutput_parvo.isUMat(), ocl_getParvo(retinaOutput_parvo)); + if (_retinaFilter->getColorMode()) { // reallocate output buffer (if necessary) @@ -572,24 +623,57 @@ void RetinaImpl::getParvo(OutputArray retinaOutput_parvo) } //retinaOutput_parvo/=255.0; } + +#ifdef HAVE_OPENCL +bool RetinaImpl::ocl_getMagno(OutputArray retinaOutput_magno) +{ + _ocl_retina->getMagno(retinaOutput_magno); + return true; +} +#endif + void RetinaImpl::getMagno(OutputArray retinaOutput_magno) { + CV_OCL_RUN((_ocl_retina != 0) && retinaOutput_magno.isUMat(), ocl_getMagno(retinaOutput_magno)); + // reallocate output buffer (if necessary) _convertValarrayBuffer2cvMat(_retinaFilter->getMovingContours(), _retinaFilter->getOutputNBrows(), _retinaFilter->getOutputNBcolumns(), false, retinaOutput_magno); //retinaOutput_magno/=255.0; } +#ifdef HAVE_OPENCL +bool RetinaImpl::ocl_getMagnoRAW(OutputArray magnoOutputBufferCopy) +{ + _ocl_retina->getMagnoRAW(magnoOutputBufferCopy); + return true; +} +#endif + // original API level data accessors : copy buffers if size matches, reallocate if required void RetinaImpl::getMagnoRAW(OutputArray magnoOutputBufferCopy){ + + CV_OCL_RUN((_ocl_retina != 0) && magnoOutputBufferCopy.isUMat(), ocl_getMagnoRAW(magnoOutputBufferCopy)); + // get magno channel header const cv::Mat magnoChannel=cv::Mat(getMagnoRAW()); // copy data magnoChannel.copyTo(magnoOutputBufferCopy); } +#ifdef HAVE_OPENCL +bool RetinaImpl::ocl_getParvoRAW(OutputArray parvoOutputBufferCopy) +{ + _ocl_retina->getParvoRAW(parvoOutputBufferCopy); + return true; +} +#endif + void RetinaImpl::getParvoRAW(OutputArray parvoOutputBufferCopy){ + + CV_OCL_RUN((_ocl_retina != 0) && parvoOutputBufferCopy.isUMat(), ocl_getParvoRAW(parvoOutputBufferCopy)); + // get parvo channel header - const cv::Mat parvoChannel=cv::Mat(getMagnoRAW()); + const cv::Mat parvoChannel=cv::Mat(getParvoRAW()); // copy data parvoChannel.copyTo(parvoOutputBufferCopy); } @@ -649,7 +733,7 @@ void RetinaImpl::_convertValarrayBuffer2cvMat(const std::valarray &grayMa for (unsigned int j=0;j(pixel)=(unsigned char)*(valarrayPTR++); + outMat.at(pixel)=(unsigned char)cvRound(*(valarrayPTR++)); } } } @@ -665,9 +749,9 @@ void RetinaImpl::_convertValarrayBuffer2cvMat(const std::valarray &grayMa { cv::Point2d pixel(j,i); cv::Vec3b pixelValues; - pixelValues[2]=(unsigned char)*(valarrayPTR); - pixelValues[1]=(unsigned char)*(valarrayPTR+nbPixels); - pixelValues[0]=(unsigned char)*(valarrayPTR+doubleNBpixels); + pixelValues[2]=(unsigned char)cvRound(*(valarrayPTR)); + pixelValues[1]=(unsigned char)cvRound(*(valarrayPTR+nbPixels)); + pixelValues[0]=(unsigned char)cvRound(*(valarrayPTR+doubleNBpixels)); outMat.at(pixel)=pixelValues; } @@ -729,7 +813,15 @@ bool RetinaImpl::_convertCvMat2ValarrayBuffer(InputArray inputMat, std::valarray return imageNumberOfChannels>1; // return bool : false for gray level image processing, true for color mode } -void RetinaImpl::clearBuffers() { _retinaFilter->clearAllBuffers(); } +void RetinaImpl::clearBuffers() +{ +#ifdef HAVE_OPENCL + if (_ocl_retina != 0) + _ocl_retina->clearBuffers(); +#endif + + _retinaFilter->clearAllBuffers(); +} void RetinaImpl::activateMovingContoursProcessing(const bool activate) { _retinaFilter->activateMovingContoursProcessing(activate); } diff --git a/modules/bioinspired/src/retina_ocl.cpp b/modules/bioinspired/src/retina_ocl.cpp index 364a6ea09..41a26c72c 100644 --- a/modules/bioinspired/src/retina_ocl.cpp +++ b/modules/bioinspired/src/retina_ocl.cpp @@ -48,70 +48,34 @@ #include #include -#ifdef HAVE_OPENCV_OCL +#ifdef HAVE_OPENCL -#include "opencl_kernels.hpp" +#include "opencl_kernels_bioinspired.hpp" #define NOT_IMPLEMENTED CV_Error(cv::Error::StsNotImplemented, "Not implemented") -namespace cv +namespace { -static ocl::ProgramEntry retina_kernel = ocl::bioinspired::retina_kernel; + template + inline int sizeOfArray(const T(&)[N]) + { + return (int)N; + } + inline void ensureSizeIsEnough(int rows, int cols, int type, cv::UMat &m) + { + m.create(rows, cols, type, m.usageFlags); + } +} + +namespace cv +{ namespace bioinspired { namespace ocl { using namespace cv::ocl; -class RetinaOCLImpl : public Retina -{ -public: - RetinaOCLImpl(Size getInputSize); - RetinaOCLImpl(Size getInputSize, const bool colorMode, int colorSamplingMethod = RETINA_COLOR_BAYER, const bool useRetinaLogSampling = false, const double reductionFactor = 1.0, const double samplingStrenght = 10.0); - virtual ~RetinaOCLImpl(); - - Size getInputSize(); - Size getOutputSize(); - - void setup(String retinaParameterFile = "", const bool applyDefaultSetupOnFailure = true); - void setup(cv::FileStorage &fs, const bool applyDefaultSetupOnFailure = true); - void setup(RetinaParameters newParameters); - - RetinaOCLImpl::RetinaParameters getParameters(); - - const String printSetup(); - virtual void write( String fs ) const; - virtual void write( FileStorage& fs ) const; - - void setupOPLandIPLParvoChannel(const bool colorMode = true, const bool normaliseOutput = true, const float photoreceptorsLocalAdaptationSensitivity = 0.7, const float photoreceptorsTemporalConstant = 0.5, const float photoreceptorsSpatialConstant = 0.53, const float horizontalCellsGain = 0, const float HcellsTemporalConstant = 1, const float HcellsSpatialConstant = 7, const float ganglionCellsSensitivity = 0.7); - void setupIPLMagnoChannel(const bool normaliseOutput = true, const float parasolCells_beta = 0, const float parasolCells_tau = 0, const float parasolCells_k = 7, const float amacrinCellsTemporalCutFrequency = 1.2, const float V0CompressionParameter = 0.95, const float localAdaptintegration_tau = 0, const float localAdaptintegration_k = 7); - - void run(InputArray inputImage); - void getParvo(OutputArray retinaOutput_parvo); - void getMagno(OutputArray retinaOutput_magno); - - void setColorSaturation(const bool saturateColors = true, const float colorSaturationValue = 4.0); - void clearBuffers(); - void activateMovingContoursProcessing(const bool activate); - void activateContoursProcessing(const bool activate); - - // unimplemented interfaces: - void applyFastToneMapping(InputArray /*inputImage*/, OutputArray /*outputToneMappedImage*/) { NOT_IMPLEMENTED; } - void getParvoRAW(OutputArray /*retinaOutput_parvo*/) { NOT_IMPLEMENTED; } - void getMagnoRAW(OutputArray /*retinaOutput_magno*/) { NOT_IMPLEMENTED; } - const Mat getMagnoRAW() const { NOT_IMPLEMENTED; return Mat(); } - const Mat getParvoRAW() const { NOT_IMPLEMENTED; return Mat(); } - -protected: - RetinaParameters _retinaParameters; - cv::ocl::oclMat _inputBuffer; - RetinaFilter* _retinaFilter; - bool convertToColorPlanes(const cv::ocl::oclMat& input, cv::ocl::oclMat &output); - void convertToInterleaved(const cv::ocl::oclMat& input, bool colorMode, cv::ocl::oclMat &output); - void _init(const Size getInputSize, const bool colorMode, int colorSamplingMethod = RETINA_COLOR_BAYER, const bool useRetinaLogSampling = false, const double reductionFactor = 1.0, const double samplingStrenght = 10.0); -}; - RetinaOCLImpl::RetinaOCLImpl(const cv::Size inputSz) { _retinaFilter = 0; @@ -133,7 +97,7 @@ RetinaOCLImpl::~RetinaOCLImpl() } /** -* retreive retina input buffer size +* retrieve retina input buffer size */ Size RetinaOCLImpl::getInputSize() { @@ -141,7 +105,7 @@ Size RetinaOCLImpl::getInputSize() } /** -* retreive retina output buffer size +* retrieve retina output buffer size */ Size RetinaOCLImpl::getOutputSize() { @@ -154,7 +118,7 @@ void RetinaOCLImpl::setColorSaturation(const bool saturateColors, const float co _retinaFilter->setColorSaturation(saturateColors, colorSaturationValue); } -struct RetinaOCLImpl::RetinaParameters RetinaOCLImpl::getParameters() +struct RetinaParameters RetinaOCLImpl::getParameters() { return _retinaParameters; } @@ -170,7 +134,7 @@ void RetinaOCLImpl::setup(String retinaParameterFile, const bool applyDefaultSet } catch(Exception &e) { - std::cout << "RetinaOCLImpl::setup: wrong/unappropriate xml parameter file : error report :`n=>" << e.what() << std::endl; + std::cout << "RetinaOCLImpl::setup: wrong/inappropriate xml parameter file : error report :`n=>" << e.what() << std::endl; if (applyDefaultSetupOnFailure) { std::cout << "RetinaOCLImpl::setup: resetting retina with default parameters" << std::endl; @@ -191,7 +155,7 @@ void RetinaOCLImpl::setup(cv::FileStorage &fs, const bool applyDefaultSetupOnFai // read parameters file if it exists or apply default setup if asked for if (!fs.isOpened()) { - std::cout << "RetinaOCLImpl::setup: provided parameters file could not be open... skeeping configuration" << std::endl; + std::cout << "RetinaOCLImpl::setup: provided parameters file could not be open... skipping configuration" << std::endl; return; // implicit else case : retinaParameterFile could be open (it exists at least) } @@ -230,15 +194,15 @@ void RetinaOCLImpl::setup(cv::FileStorage &fs, const bool applyDefaultSetupOnFai setupOPLandIPLParvoChannel(); setupIPLMagnoChannel(); } - std::cout << "RetinaOCLImpl::setup: wrong/unappropriate xml parameter file : error report :`n=>" << e.what() << std::endl; + std::cout << "RetinaOCLImpl::setup: wrong/inappropriate xml parameter file : error report :`n=>" << e.what() << std::endl; std::cout << "=> keeping current parameters" << std::endl; } } -void RetinaOCLImpl::setup(cv::bioinspired::Retina::RetinaParameters newConfiguration) +void RetinaOCLImpl::setup(cv::bioinspired::RetinaParameters newConfiguration) { // simply copy structures - memcpy(&_retinaParameters, &newConfiguration, sizeof(cv::bioinspired::Retina::RetinaParameters)); + memcpy(&_retinaParameters, &newConfiguration, sizeof(cv::bioinspired::RetinaParameters)); // apply setup setupOPLandIPLParvoChannel(_retinaParameters.OPLandIplParvo.colorMode, _retinaParameters.OPLandIplParvo.normaliseOutput, _retinaParameters.OPLandIplParvo.photoreceptorsLocalAdaptationSensitivity, _retinaParameters.OPLandIplParvo.photoreceptorsTemporalConstant, _retinaParameters.OPLandIplParvo.photoreceptorsSpatialConstant, _retinaParameters.OPLandIplParvo.horizontalCellsGain, _retinaParameters.OPLandIplParvo.hcellsTemporalConstant, _retinaParameters.OPLandIplParvo.hcellsSpatialConstant, _retinaParameters.OPLandIplParvo.ganglionCellsSensitivity); setupIPLMagnoChannel(_retinaParameters.IplMagno.normaliseOutput, _retinaParameters.IplMagno.parasolCells_beta, _retinaParameters.IplMagno.parasolCells_tau, _retinaParameters.IplMagno.parasolCells_k, _retinaParameters.IplMagno.amacrinCellsTemporalCutFrequency, _retinaParameters.IplMagno.V0CompressionParameter, _retinaParameters.IplMagno.localAdaptintegration_tau, _retinaParameters.IplMagno.localAdaptintegration_k); @@ -321,7 +285,7 @@ void RetinaOCLImpl::setupOPLandIPLParvoChannel(const bool colorMode, const bool _retinaFilter->setParvoGanglionCellsLocalAdaptationSensitivity(ganglionCellsSensitivity); _retinaFilter->activateNormalizeParvoOutput_0_maxOutputValue(normaliseOutput); - // update parameters struture + // update parameters structure _retinaParameters.OPLandIplParvo.colorMode = colorMode; _retinaParameters.OPLandIplParvo.normaliseOutput = normaliseOutput; @@ -340,7 +304,7 @@ void RetinaOCLImpl::setupIPLMagnoChannel(const bool normaliseOutput, const float _retinaFilter->setMagnoCoefficientsTable(parasolCells_beta, parasolCells_tau, parasolCells_k, amacrinCellsTemporalCutFrequency, V0CompressionParameter, localAdaptintegration_tau, localAdaptintegration_k); _retinaFilter->activateNormalizeMagnoOutput_0_maxOutputValue(normaliseOutput); - // update parameters struture + // update parameters structure _retinaParameters.IplMagno.normaliseOutput = normaliseOutput; _retinaParameters.IplMagno.parasolCells_beta = parasolCells_beta; _retinaParameters.IplMagno.parasolCells_tau = parasolCells_tau; @@ -353,7 +317,7 @@ void RetinaOCLImpl::setupIPLMagnoChannel(const bool normaliseOutput, const float void RetinaOCLImpl::run(InputArray input) { - oclMat &inputMatToConvert = getOclMatRef(input); + UMat inputMatToConvert = input.getUMat(); bool colorMode = convertToColorPlanes(inputMatToConvert, _inputBuffer); // first convert input image to the compatible format : std::valarray // process the retina @@ -365,7 +329,7 @@ void RetinaOCLImpl::run(InputArray input) void RetinaOCLImpl::getParvo(OutputArray output) { - oclMat &retinaOutput_parvo = getOclMatRef(output); + UMat &retinaOutput_parvo = output.getUMatRef(); if (_retinaFilter->getColorMode()) { // reallocate output buffer (if necessary) @@ -380,12 +344,12 @@ void RetinaOCLImpl::getParvo(OutputArray output) } void RetinaOCLImpl::getMagno(OutputArray output) { - oclMat &retinaOutput_magno = getOclMatRef(output); + UMat &retinaOutput_magno = output.getUMatRef(); // reallocate output buffer (if necessary) convertToInterleaved(_retinaFilter->getMovingContours(), false, retinaOutput_magno); //retinaOutput_magno/=255.0; } -// private method called by constructirs +// private method called by constructors void RetinaOCLImpl::_init(const cv::Size inputSz, const bool colorMode, int colorSamplingMethod, const bool useRetinaLogSampling, const double reductionFactor, const double samplingStrenght) { // basic error check @@ -408,22 +372,22 @@ void RetinaOCLImpl::_init(const cv::Size inputSz, const bool colorMode, int colo _retinaFilter->clearAllBuffers(); } -bool RetinaOCLImpl::convertToColorPlanes(const oclMat& input, oclMat &output) +bool RetinaOCLImpl::convertToColorPlanes(const UMat& input, UMat &output) { - oclMat convert_input; + UMat convert_input; input.convertTo(convert_input, CV_32F); if(convert_input.channels() == 3 || convert_input.channels() == 4) { - ocl::ensureSizeIsEnough(int(_retinaFilter->getInputNBrows() * 4), - int(_retinaFilter->getInputNBcolumns()), CV_32FC1, output); - oclMat channel_splits[4] = - { - output(Rect(Point(0, _retinaFilter->getInputNBrows() * 2), getInputSize())), - output(Rect(Point(0, _retinaFilter->getInputNBrows()), getInputSize())), - output(Rect(Point(0, 0), getInputSize())), - output(Rect(Point(0, _retinaFilter->getInputNBrows() * 3), getInputSize())) - }; - ocl::split(convert_input, channel_splits); + ensureSizeIsEnough(int(_retinaFilter->getInputNBrows() * 4), + int(_retinaFilter->getInputNBcolumns()), CV_32FC1, output); + std::vector channel_splits; + channel_splits.reserve(4); + channel_splits.push_back(output(Rect(Point(0, _retinaFilter->getInputNBrows() * 2), getInputSize()))); + channel_splits.push_back(output(Rect(Point(0, _retinaFilter->getInputNBrows()), getInputSize()))); + channel_splits.push_back(output(Rect(Point(0, 0), getInputSize()))); + channel_splits.push_back(output(Rect(Point(0, _retinaFilter->getInputNBrows() * 3), getInputSize()))); + + cv::split(convert_input, channel_splits); return true; } else if(convert_input.channels() == 1) @@ -437,13 +401,13 @@ bool RetinaOCLImpl::convertToColorPlanes(const oclMat& input, oclMat &output) return false; } } -void RetinaOCLImpl::convertToInterleaved(const oclMat& input, bool colorMode, oclMat &output) +void RetinaOCLImpl::convertToInterleaved(const UMat& input, bool colorMode, UMat &output) { input.convertTo(output, CV_8U); if(colorMode) { int numOfSplits = input.rows / getInputSize().height; - std::vector channel_splits(numOfSplits); + std::vector channel_splits(numOfSplits); for(int i = 0; i < static_cast(channel_splits.size()); i ++) { channel_splits[i] = @@ -472,6 +436,29 @@ void RetinaOCLImpl::activateContoursProcessing(const bool activate) _retinaFilter->activateContoursProcessing(activate); } +void RetinaOCLImpl::getParvoRAW(OutputArray retinaOutput_parvo) +{ + UMat raw_parvo; + + if (_retinaFilter->getColorMode()) + raw_parvo = _retinaFilter->getColorOutput(); + else + raw_parvo = _retinaFilter->getContours(); + + raw_parvo.copyTo(retinaOutput_parvo); +} + +void RetinaOCLImpl::getMagnoRAW(OutputArray retinaOutput_magno) +{ + UMat raw_magno = _retinaFilter->getMovingContours(); + raw_magno.copyTo(retinaOutput_magno); +} + +// unimplemented interfaces: +void RetinaOCLImpl::applyFastToneMapping(InputArray /*inputImage*/, OutputArray /*outputToneMappedImage*/) { NOT_IMPLEMENTED; } +const Mat RetinaOCLImpl::getMagnoRAW() const { NOT_IMPLEMENTED; return Mat(); } +const Mat RetinaOCLImpl::getParvoRAW() const { NOT_IMPLEMENTED; return Mat(); } + /////////////////////////////////////// ///////// BasicRetinaFilter /////////// /////////////////////////////////////// @@ -534,72 +521,73 @@ void BasicRetinaFilter::setLPfilterParameters(const float beta, const float tau, _filteringCoeficientsTable[1 + tableOffset] = (1.0f - a) * (1.0f - a) * (1.0f - a) * (1.0f - a) / (1.0f + _beta); _filteringCoeficientsTable[2 + tableOffset] = tau; } -const oclMat &BasicRetinaFilter::runFilter_LocalAdapdation(const oclMat &inputFrame, const oclMat &localLuminance) +const UMat &BasicRetinaFilter::runFilter_LocalAdapdation(const UMat &inputFrame, const UMat &localLuminance) { _localLuminanceAdaptation(inputFrame, localLuminance, _filterOutput); return _filterOutput; } -void BasicRetinaFilter::runFilter_LocalAdapdation(const oclMat &inputFrame, const oclMat &localLuminance, oclMat &outputFrame) +void BasicRetinaFilter::runFilter_LocalAdapdation(const UMat &inputFrame, const UMat &localLuminance, UMat &outputFrame) { _localLuminanceAdaptation(inputFrame, localLuminance, outputFrame); } -const oclMat &BasicRetinaFilter::runFilter_LocalAdapdation_autonomous(const oclMat &inputFrame) +const UMat &BasicRetinaFilter::runFilter_LocalAdapdation_autonomous(const UMat &inputFrame) { _spatiotemporalLPfilter(inputFrame, _filterOutput); _localLuminanceAdaptation(inputFrame, _filterOutput, _filterOutput); return _filterOutput; } -void BasicRetinaFilter::runFilter_LocalAdapdation_autonomous(const oclMat &inputFrame, oclMat &outputFrame) +void BasicRetinaFilter::runFilter_LocalAdapdation_autonomous(const UMat &inputFrame, UMat &outputFrame) { _spatiotemporalLPfilter(inputFrame, _filterOutput); _localLuminanceAdaptation(inputFrame, _filterOutput, outputFrame); } -void BasicRetinaFilter::_localLuminanceAdaptation(oclMat &inputOutputFrame, const oclMat &localLuminance) +void BasicRetinaFilter::_localLuminanceAdaptation(UMat &inputOutputFrame, const UMat &localLuminance) { _localLuminanceAdaptation(inputOutputFrame, localLuminance, inputOutputFrame, false); } -void BasicRetinaFilter::_localLuminanceAdaptation(const oclMat &inputFrame, const oclMat &localLuminance, oclMat &outputFrame, const bool updateLuminanceMean) +void BasicRetinaFilter::_localLuminanceAdaptation(const UMat &inputFrame, const UMat &localLuminance, UMat &outputFrame, const bool updateLuminanceMean) { if (updateLuminanceMean) { - float meanLuminance = saturate_cast(ocl::sum(inputFrame)[0]) / getNBpixels(); + float meanLuminance = saturate_cast(cv::sum(inputFrame)[0]) / getNBpixels(); updateCompressionParameter(meanLuminance); } int elements_per_row = static_cast(inputFrame.step / inputFrame.elemSize()); - Context * ctx = Context::getContext(); - std::vector > args; - size_t globalSize[] = {_NBcols, _NBrows, 1}; - size_t localSize[] = {16, 16, 1}; + size_t globalSize[] = {(size_t)_NBcols / 4, (size_t)_NBrows}; + size_t localSize[] = {16, 16}; - args.push_back(std::make_pair(sizeof(cl_mem), &localLuminance.data)); - args.push_back(std::make_pair(sizeof(cl_mem), &inputFrame.data)); - args.push_back(std::make_pair(sizeof(cl_mem), &outputFrame.data)); - args.push_back(std::make_pair(sizeof(cl_int), &_NBcols)); - args.push_back(std::make_pair(sizeof(cl_int), &_NBrows)); - args.push_back(std::make_pair(sizeof(cl_int), &elements_per_row)); - args.push_back(std::make_pair(sizeof(cl_float), &_localLuminanceAddon)); - args.push_back(std::make_pair(sizeof(cl_float), &_localLuminanceFactor)); - args.push_back(std::make_pair(sizeof(cl_float), &_maxInputValue)); - openCLExecuteKernel(ctx, &retina_kernel, "localLuminanceAdaptation", globalSize, localSize, args, -1, -1); + Kernel kernel("localLuminanceAdaptation", ocl::bioinspired::retina_kernel_oclsrc); + kernel.args(ocl::KernelArg::PtrReadOnly(localLuminance), + ocl::KernelArg::PtrReadOnly(inputFrame), + ocl::KernelArg::PtrWriteOnly(outputFrame), + (int)_NBcols, (int)_NBrows, (int)elements_per_row, + (float)_localLuminanceAddon, (float)_localLuminanceFactor, (float)_maxInputValue); + kernel.run(sizeOfArray(globalSize), globalSize, localSize, false); } -const oclMat &BasicRetinaFilter::runFilter_LPfilter(const oclMat &inputFrame, const unsigned int filterIndex) +const UMat &BasicRetinaFilter::runFilter_LPfilter(const UMat &inputFrame, const unsigned int filterIndex) { _spatiotemporalLPfilter(inputFrame, _filterOutput, filterIndex); return _filterOutput; } -void BasicRetinaFilter::runFilter_LPfilter(const oclMat &inputFrame, oclMat &outputFrame, const unsigned int filterIndex) +void BasicRetinaFilter::runFilter_LPfilter(const UMat &inputFrame, UMat &outputFrame, const unsigned int filterIndex) { _spatiotemporalLPfilter(inputFrame, outputFrame, filterIndex); } -void BasicRetinaFilter::_spatiotemporalLPfilter(const oclMat &inputFrame, oclMat &LPfilterOutput, const unsigned int filterIndex) +void BasicRetinaFilter::_spatiotemporalLPfilter(const UMat &inputFrame, UMat &LPfilterOutput, const unsigned int filterIndex) +{ + _spatiotemporalLPfilter_h(inputFrame, LPfilterOutput, filterIndex); + _spatiotemporalLPfilter_v(LPfilterOutput, 0); +} + +void BasicRetinaFilter::_spatiotemporalLPfilter_h(const UMat &inputFrame, UMat &LPfilterOutput, const unsigned int filterIndex) { unsigned int coefTableOffset = filterIndex * 3; @@ -608,137 +596,88 @@ void BasicRetinaFilter::_spatiotemporalLPfilter(const oclMat &inputFrame, oclMat _tau = _filteringCoeficientsTable[2 + coefTableOffset]; _horizontalCausalFilter_addInput(inputFrame, LPfilterOutput); - _horizontalAnticausalFilter(LPfilterOutput); - _verticalCausalFilter(LPfilterOutput); - _verticalAnticausalFilter_multGain(LPfilterOutput); } -void BasicRetinaFilter::_horizontalCausalFilter_addInput(const oclMat &inputFrame, oclMat &outputFrame) +void BasicRetinaFilter::_spatiotemporalLPfilter_v(UMat &LPfilterOutput, const unsigned int multichannel) { - int elements_per_row = static_cast(inputFrame.step / inputFrame.elemSize()); - - Context * ctx = Context::getContext(); - std::vector > args; - size_t globalSize[] = {_NBrows, 1, 1}; - size_t localSize[] = {256, 1, 1}; - - args.push_back(std::make_pair(sizeof(cl_mem), &inputFrame.data)); - args.push_back(std::make_pair(sizeof(cl_mem), &outputFrame.data)); - args.push_back(std::make_pair(sizeof(cl_int), &_NBcols)); - args.push_back(std::make_pair(sizeof(cl_int), &_NBrows)); - args.push_back(std::make_pair(sizeof(cl_int), &elements_per_row)); - args.push_back(std::make_pair(sizeof(cl_int), &inputFrame.offset)); - args.push_back(std::make_pair(sizeof(cl_int), &inputFrame.offset)); - args.push_back(std::make_pair(sizeof(cl_float), &_tau)); - args.push_back(std::make_pair(sizeof(cl_float), &_a)); - openCLExecuteKernel(ctx, &retina_kernel, "horizontalCausalFilter_addInput", globalSize, localSize, args, -1, -1); -} - -void BasicRetinaFilter::_horizontalAnticausalFilter(oclMat &outputFrame) -{ - int elements_per_row = static_cast(outputFrame.step / outputFrame.elemSize()); - - Context * ctx = Context::getContext(); - std::vector > args; - size_t globalSize[] = {_NBrows, 1, 1}; - size_t localSize[] = {256, 1, 1}; - - args.push_back(std::make_pair(sizeof(cl_mem), &outputFrame.data)); - args.push_back(std::make_pair(sizeof(cl_int), &_NBcols)); - args.push_back(std::make_pair(sizeof(cl_int), &_NBrows)); - args.push_back(std::make_pair(sizeof(cl_int), &elements_per_row)); - args.push_back(std::make_pair(sizeof(cl_int), &outputFrame.offset)); - args.push_back(std::make_pair(sizeof(cl_float), &_a)); - openCLExecuteKernel(ctx, &retina_kernel, "horizontalAnticausalFilter", globalSize, localSize, args, -1, -1); + if (multichannel == 0) + _verticalCausalFilter(LPfilterOutput); + else + _verticalCausalFilter_multichannel(LPfilterOutput); } -void BasicRetinaFilter::_verticalCausalFilter(oclMat &outputFrame) +void BasicRetinaFilter::_horizontalCausalFilter_addInput(const UMat &inputFrame, UMat &outputFrame) { - int elements_per_row = static_cast(outputFrame.step / outputFrame.elemSize()); + int elements_per_row = static_cast(inputFrame.step / inputFrame.elemSize()); - Context * ctx = Context::getContext(); - std::vector > args; - size_t globalSize[] = {_NBcols, 1, 1}; - size_t localSize[] = {256, 1, 1}; + size_t globalSize[] = {(size_t)_NBrows}; + size_t localSize[] = { 256 }; - args.push_back(std::make_pair(sizeof(cl_mem), &outputFrame.data)); - args.push_back(std::make_pair(sizeof(cl_int), &_NBcols)); - args.push_back(std::make_pair(sizeof(cl_int), &_NBrows)); - args.push_back(std::make_pair(sizeof(cl_int), &elements_per_row)); - args.push_back(std::make_pair(sizeof(cl_int), &outputFrame.offset)); - args.push_back(std::make_pair(sizeof(cl_float), &_a)); - openCLExecuteKernel(ctx, &retina_kernel, "verticalCausalFilter", globalSize, localSize, args, -1, -1); + Kernel kernel("horizontalCausalFilter_addInput", ocl::bioinspired::retina_kernel_oclsrc); + kernel.args(ocl::KernelArg::PtrReadOnly(inputFrame), + ocl::KernelArg::PtrWriteOnly(outputFrame), + (int)_NBcols, (int)_NBrows, (int)elements_per_row, + (int)inputFrame.offset, (int)inputFrame.offset, + (float)_tau, (float)_a); + kernel.run(sizeOfArray(globalSize), globalSize, localSize, false); } -void BasicRetinaFilter::_verticalAnticausalFilter_multGain(oclMat &outputFrame) +void BasicRetinaFilter::_verticalCausalFilter(UMat &outputFrame) { int elements_per_row = static_cast(outputFrame.step / outputFrame.elemSize()); - Context * ctx = Context::getContext(); - std::vector > args; - size_t globalSize[] = {_NBcols, 1, 1}; - size_t localSize[] = {256, 1, 1}; + size_t globalSize[] = {(size_t)_NBcols / 2}; + size_t localSize[] = { 256 }; - args.push_back(std::make_pair(sizeof(cl_mem), &outputFrame.data)); - args.push_back(std::make_pair(sizeof(cl_int), &_NBcols)); - args.push_back(std::make_pair(sizeof(cl_int), &_NBrows)); - args.push_back(std::make_pair(sizeof(cl_int), &elements_per_row)); - args.push_back(std::make_pair(sizeof(cl_int), &outputFrame.offset)); - args.push_back(std::make_pair(sizeof(cl_float), &_a)); - args.push_back(std::make_pair(sizeof(cl_float), &_gain)); - openCLExecuteKernel(ctx, &retina_kernel, "verticalAnticausalFilter_multGain", globalSize, localSize, args, -1, -1); + Kernel kernel("verticalCausalFilter", ocl::bioinspired::retina_kernel_oclsrc); + kernel.args(ocl::KernelArg::PtrReadWrite(outputFrame), + (int)_NBcols, (int)_NBrows, (int)elements_per_row, + (int)outputFrame.offset, (float)_a, (float)_gain); + kernel.run(sizeOfArray(globalSize), globalSize, localSize, false); } -void BasicRetinaFilter::_horizontalAnticausalFilter_Irregular(oclMat &outputFrame, const oclMat &spatialConstantBuffer) +void BasicRetinaFilter::_verticalCausalFilter_multichannel(UMat &outputFrame) { int elements_per_row = static_cast(outputFrame.step / outputFrame.elemSize()); - Context * ctx = Context::getContext(); - std::vector > args; - size_t globalSize[] = {outputFrame.rows, 1, 1}; - size_t localSize[] = {256, 1, 1}; + size_t globalSize[] = {(size_t)_NBcols / 2}; + size_t localSize[] = { 256 }; - args.push_back(std::make_pair(sizeof(cl_mem), &outputFrame.data)); - args.push_back(std::make_pair(sizeof(cl_mem), &spatialConstantBuffer.data)); - args.push_back(std::make_pair(sizeof(cl_int), &outputFrame.cols)); - args.push_back(std::make_pair(sizeof(cl_int), &outputFrame.rows)); - args.push_back(std::make_pair(sizeof(cl_int), &elements_per_row)); - args.push_back(std::make_pair(sizeof(cl_int), &outputFrame.offset)); - args.push_back(std::make_pair(sizeof(cl_int), &spatialConstantBuffer.offset)); - openCLExecuteKernel(ctx, &retina_kernel, "horizontalAnticausalFilter_Irregular", globalSize, localSize, args, -1, -1); + Kernel kernel("verticalCausalFilter_multichannel", ocl::bioinspired::retina_kernel_oclsrc); + kernel.args(ocl::KernelArg::PtrReadWrite(outputFrame), + (int)_NBcols, (int)_NBrows, (int)elements_per_row, + (int)outputFrame.offset, (float)_a, (float)_gain); + kernel.run(sizeOfArray(globalSize), globalSize, localSize, false); } // vertical anticausal filter -void BasicRetinaFilter::_verticalCausalFilter_Irregular(oclMat &outputFrame, const oclMat &spatialConstantBuffer) +void BasicRetinaFilter::_verticalCausalFilter_Irregular(UMat &outputFrame, const UMat &spatialConstantBuffer) { int elements_per_row = static_cast(outputFrame.step / outputFrame.elemSize()); - Context * ctx = Context::getContext(); - std::vector > args; - size_t globalSize[] = {outputFrame.cols, 1, 1}; - size_t localSize[] = {256, 1, 1}; + size_t globalSize[] = {(size_t)outputFrame.cols / 2}; + size_t localSize[] = { 256 }; - args.push_back(std::make_pair(sizeof(cl_mem), &outputFrame.data)); - args.push_back(std::make_pair(sizeof(cl_mem), &spatialConstantBuffer.data)); - args.push_back(std::make_pair(sizeof(cl_int), &outputFrame.cols)); - args.push_back(std::make_pair(sizeof(cl_int), &outputFrame.rows)); - args.push_back(std::make_pair(sizeof(cl_int), &elements_per_row)); - args.push_back(std::make_pair(sizeof(cl_int), &outputFrame.offset)); - args.push_back(std::make_pair(sizeof(cl_int), &spatialConstantBuffer.offset)); - openCLExecuteKernel(ctx, &retina_kernel, "verticalCausalFilter_Irregular", globalSize, localSize, args, -1, -1); + Kernel kernel("verticalCausalFilter_Irregular", ocl::bioinspired::retina_kernel_oclsrc); + kernel.args(ocl::KernelArg::PtrReadWrite(outputFrame), + ocl::KernelArg::PtrReadWrite(spatialConstantBuffer), + (int)outputFrame.cols, (int)(outputFrame.rows / 3), + (int)elements_per_row, (int)outputFrame.offset, + (int)spatialConstantBuffer.offset, (float)_gain); + kernel.run(sizeOfArray(globalSize), globalSize, localSize, false); } -void normalizeGrayOutput_0_maxOutputValue(oclMat &inputOutputBuffer, const float maxOutputValue) +void normalizeGrayOutput_0_maxOutputValue(UMat &inputOutputBuffer, const float maxOutputValue) { double min_val, max_val; - ocl::minMax(inputOutputBuffer, &min_val, &max_val); + cv::minMaxLoc(inputOutputBuffer, &min_val, &max_val); float factor = maxOutputValue / static_cast(max_val - min_val); float offset = - static_cast(min_val) * factor; - ocl::multiply(factor, inputOutputBuffer, inputOutputBuffer); - ocl::add(inputOutputBuffer, offset, inputOutputBuffer); + cv::multiply(factor, inputOutputBuffer, inputOutputBuffer); + cv::add(inputOutputBuffer, offset, inputOutputBuffer); } -void normalizeGrayOutputCentredSigmoide(const float meanValue, const float sensitivity, oclMat &in, oclMat &out, const float maxValue) +void normalizeGrayOutputCentredSigmoide(const float meanValue, const float sensitivity, UMat &in, UMat &out, const float maxValue) { if (sensitivity == 1.0f) { @@ -749,63 +688,54 @@ void normalizeGrayOutputCentredSigmoide(const float meanValue, const float sensi float X0 = maxValue / (sensitivity - 1.0f); - Context * ctx = Context::getContext(); - std::vector > args; - size_t globalSize[] = {in.cols, out.rows, 1}; - size_t localSize[] = {16, 16, 1}; + size_t globalSize[] = {(size_t)in.cols / 4, (size_t)out.rows}; + size_t localSize[] = {16, 16}; int elements_per_row = static_cast(out.step / out.elemSize()); - args.push_back(std::make_pair(sizeof(cl_mem), &in.data)); - args.push_back(std::make_pair(sizeof(cl_mem), &out.data)); - args.push_back(std::make_pair(sizeof(cl_int), &in.cols)); - args.push_back(std::make_pair(sizeof(cl_int), &in.rows)); - args.push_back(std::make_pair(sizeof(cl_int), &elements_per_row)); - args.push_back(std::make_pair(sizeof(cl_float), &meanValue)); - args.push_back(std::make_pair(sizeof(cl_float), &X0)); - openCLExecuteKernel(ctx, &retina_kernel, "normalizeGrayOutputCentredSigmoide", globalSize, localSize, args, -1, -1); + Kernel kernel("normalizeGrayOutputCentredSigmoide", ocl::bioinspired::retina_kernel_oclsrc); + kernel.args(ocl::KernelArg::PtrReadOnly(in), + ocl::KernelArg::PtrWriteOnly(out), + (int)in.cols, (int)in.rows, (int)elements_per_row, + (float)meanValue, (float)X0); + kernel.run(sizeOfArray(globalSize), globalSize, localSize, false); } -void normalizeGrayOutputNearZeroCentreredSigmoide(oclMat &inputPicture, oclMat &outputBuffer, const float sensitivity, const float maxOutputValue) +void normalizeGrayOutputNearZeroCentreredSigmoide(UMat &inputPicture, UMat &outputBuffer, const float sensitivity, const float maxOutputValue) { float X0cube = sensitivity * sensitivity * sensitivity; - Context * ctx = Context::getContext(); - std::vector > args; - size_t globalSize[] = {inputPicture.cols, inputPicture.rows, 1}; - size_t localSize[] = {16, 16, 1}; + size_t globalSize[] = {(size_t)inputPicture.cols, (size_t)inputPicture.rows}; + size_t localSize[] = { 16, 16 }; int elements_per_row = static_cast(inputPicture.step / inputPicture.elemSize()); - args.push_back(std::make_pair(sizeof(cl_mem), &inputPicture.data)); - args.push_back(std::make_pair(sizeof(cl_mem), &outputBuffer.data)); - args.push_back(std::make_pair(sizeof(cl_int), &inputPicture.cols)); - args.push_back(std::make_pair(sizeof(cl_int), &inputPicture.rows)); - args.push_back(std::make_pair(sizeof(cl_int), &elements_per_row)); - args.push_back(std::make_pair(sizeof(cl_float), &maxOutputValue)); - args.push_back(std::make_pair(sizeof(cl_float), &X0cube)); - openCLExecuteKernel(ctx, &retina_kernel, "normalizeGrayOutputNearZeroCentreredSigmoide", globalSize, localSize, args, -1, -1); + + Kernel kernel("normalizeGrayOutputNearZeroCentreredSigmoide", ocl::bioinspired::retina_kernel_oclsrc); + kernel.args(ocl::KernelArg::PtrReadOnly(inputPicture), + ocl::KernelArg::PtrWriteOnly(outputBuffer), + (int)inputPicture.cols, (int)inputPicture.rows, (int)elements_per_row, + (float)maxOutputValue, (float)X0cube); + kernel.run(sizeOfArray(globalSize), globalSize, localSize, false); } -void centerReductImageLuminance(oclMat &inputoutput) +void centerReductImageLuminance(UMat &inputoutput) { Scalar mean, stddev; - cv::meanStdDev((Mat)inputoutput, mean, stddev); + cv::meanStdDev(inputoutput.getMat(ACCESS_READ), mean, stddev); - Context * ctx = Context::getContext(); - std::vector > args; - size_t globalSize[] = {inputoutput.cols, inputoutput.rows, 1}; - size_t localSize[] = {16, 16, 1}; + Context ctx = Context::getDefault(); + size_t globalSize[] = {(size_t)inputoutput.cols / 4, (size_t)inputoutput.rows}; + size_t localSize[] = {16, 16}; float f_mean = static_cast(mean[0]); float f_stddev = static_cast(stddev[0]); int elements_per_row = static_cast(inputoutput.step / inputoutput.elemSize()); - args.push_back(std::make_pair(sizeof(cl_mem), &inputoutput.data)); - args.push_back(std::make_pair(sizeof(cl_int), &inputoutput.cols)); - args.push_back(std::make_pair(sizeof(cl_int), &inputoutput.rows)); - args.push_back(std::make_pair(sizeof(cl_int), &elements_per_row)); - args.push_back(std::make_pair(sizeof(cl_float), &f_mean)); - args.push_back(std::make_pair(sizeof(cl_float), &f_stddev)); - openCLExecuteKernel(ctx, &retina_kernel, "centerReductImageLuminance", globalSize, localSize, args, -1, -1); + + Kernel kernel("centerReductImageLuminance", ocl::bioinspired::retina_kernel_oclsrc); + kernel.args(ocl::KernelArg::PtrReadWrite(inputoutput), + (int)inputoutput.cols, (int)inputoutput.rows, (int)elements_per_row, + (float)f_mean, (float)f_stddev); + kernel.run(sizeOfArray(globalSize), globalSize, localSize, false); } /////////////////////////////////////// @@ -873,7 +803,7 @@ void ParvoRetinaFilter::setOPLandParvoFiltersParameters(const float beta1, const setLPfilterParameters(0, tau1, k1, 2); } -const oclMat &ParvoRetinaFilter::runFilter(const oclMat &inputFrame, const bool useParvoOutput) +const UMat &ParvoRetinaFilter::runFilter(const UMat &inputFrame, const bool useParvoOutput) { _spatiotemporalLPfilter(inputFrame, _photoreceptorsOutput); _spatiotemporalLPfilter(_photoreceptorsOutput, _horizontalCellsOutput, 1); @@ -886,7 +816,7 @@ const oclMat &ParvoRetinaFilter::runFilter(const oclMat &inputFrame, const bool _localLuminanceAdaptation(_parvocellularOutputON, _localAdaptationON); _spatiotemporalLPfilter(_bipolarCellsOutputOFF, _localAdaptationOFF, 2); _localLuminanceAdaptation(_parvocellularOutputOFF, _localAdaptationOFF); - ocl::subtract(_parvocellularOutputON, _parvocellularOutputOFF, _parvocellularOutputONminusOFF); + cv::subtract(_parvocellularOutputON, _parvocellularOutputOFF, _parvocellularOutputONminusOFF); } return _parvocellularOutputONminusOFF; @@ -895,21 +825,18 @@ void ParvoRetinaFilter::_OPL_OnOffWaysComputing() { int elements_per_row = static_cast(_photoreceptorsOutput.step / _photoreceptorsOutput.elemSize()); - Context * ctx = Context::getContext(); - std::vector > args; - size_t globalSize[] = {(_photoreceptorsOutput.cols + 3) / 4, _photoreceptorsOutput.rows, 1}; - size_t localSize[] = {16, 16, 1}; + size_t globalSize[] = {((size_t)_photoreceptorsOutput.cols + 3) / 4, (size_t)_photoreceptorsOutput.rows}; + size_t localSize[] = { 16, 16 }; - args.push_back(std::make_pair(sizeof(cl_mem), &_photoreceptorsOutput.data)); - args.push_back(std::make_pair(sizeof(cl_mem), &_horizontalCellsOutput.data)); - args.push_back(std::make_pair(sizeof(cl_mem), &_bipolarCellsOutputON.data)); - args.push_back(std::make_pair(sizeof(cl_mem), &_bipolarCellsOutputOFF.data)); - args.push_back(std::make_pair(sizeof(cl_mem), &_parvocellularOutputON.data)); - args.push_back(std::make_pair(sizeof(cl_mem), &_parvocellularOutputOFF.data)); - args.push_back(std::make_pair(sizeof(cl_int), &_photoreceptorsOutput.cols)); - args.push_back(std::make_pair(sizeof(cl_int), &_photoreceptorsOutput.rows)); - args.push_back(std::make_pair(sizeof(cl_int), &elements_per_row)); - openCLExecuteKernel(ctx, &retina_kernel, "OPL_OnOffWaysComputing", globalSize, localSize, args, -1, -1); + Kernel kernel("OPL_OnOffWaysComputing", ocl::bioinspired::retina_kernel_oclsrc); + kernel.args(ocl::KernelArg::PtrReadOnly(_photoreceptorsOutput), + ocl::KernelArg::PtrReadOnly(_horizontalCellsOutput), + ocl::KernelArg::PtrWriteOnly(_bipolarCellsOutputON), + ocl::KernelArg::PtrWriteOnly(_bipolarCellsOutputOFF), + ocl::KernelArg::PtrWriteOnly(_parvocellularOutputON), + ocl::KernelArg::PtrWriteOnly(_parvocellularOutputOFF), + (int)_photoreceptorsOutput.cols, (int)_photoreceptorsOutput.rows, (int)elements_per_row); + kernel.run(sizeOfArray(globalSize), globalSize, localSize, false); } /////////////////////////////////////// @@ -978,31 +905,28 @@ void MagnoRetinaFilter::setCoefficientsTable(const float parasolCells_beta, cons } void MagnoRetinaFilter::_amacrineCellsComputing( - const oclMat &OPL_ON, - const oclMat &OPL_OFF + const UMat &OPL_ON, + const UMat &OPL_OFF ) { int elements_per_row = static_cast(OPL_ON.step / OPL_ON.elemSize()); - Context * ctx = Context::getContext(); - std::vector > args; - size_t globalSize[] = {OPL_ON.cols, OPL_ON.rows, 1}; - size_t localSize[] = {16, 16, 1}; + size_t globalSize[] = {(size_t)OPL_ON.cols / 4, (size_t)OPL_ON.rows}; + size_t localSize[] = { 16, 16 }; - args.push_back(std::make_pair(sizeof(cl_mem), &OPL_ON.data)); - args.push_back(std::make_pair(sizeof(cl_mem), &OPL_OFF.data)); - args.push_back(std::make_pair(sizeof(cl_mem), &_previousInput_ON.data)); - args.push_back(std::make_pair(sizeof(cl_mem), &_previousInput_OFF.data)); - args.push_back(std::make_pair(sizeof(cl_mem), &_amacrinCellsTempOutput_ON.data)); - args.push_back(std::make_pair(sizeof(cl_mem), &_amacrinCellsTempOutput_OFF.data)); - args.push_back(std::make_pair(sizeof(cl_int), &OPL_ON.cols)); - args.push_back(std::make_pair(sizeof(cl_int), &OPL_ON.rows)); - args.push_back(std::make_pair(sizeof(cl_int), &elements_per_row)); - args.push_back(std::make_pair(sizeof(cl_float), &_temporalCoefficient)); - openCLExecuteKernel(ctx, &retina_kernel, "amacrineCellsComputing", globalSize, localSize, args, -1, -1); + Kernel kernel("amacrineCellsComputing", ocl::bioinspired::retina_kernel_oclsrc); + kernel.args(ocl::KernelArg::PtrReadOnly(OPL_ON), + ocl::KernelArg::PtrReadOnly(OPL_OFF), + ocl::KernelArg::PtrReadWrite(_previousInput_ON), + ocl::KernelArg::PtrReadWrite(_previousInput_OFF), + ocl::KernelArg::PtrReadWrite(_amacrinCellsTempOutput_ON), + ocl::KernelArg::PtrReadWrite(_amacrinCellsTempOutput_OFF), + (int)OPL_ON.cols, (int)OPL_ON.rows, (int)elements_per_row, + (float)_temporalCoefficient); + kernel.run(sizeOfArray(globalSize), globalSize, localSize, false); } -const oclMat &MagnoRetinaFilter::runFilter(const oclMat &OPL_ON, const oclMat &OPL_OFF) +const UMat &MagnoRetinaFilter::runFilter(const UMat &OPL_ON, const UMat &OPL_OFF) { // Compute the high pass temporal filter _amacrineCellsComputing(OPL_ON, OPL_OFF); @@ -1018,7 +942,7 @@ const oclMat &MagnoRetinaFilter::runFilter(const oclMat &OPL_ON, const oclMat &O _spatiotemporalLPfilter(_magnoXOutputOFF, _localProcessBufferOFF, 1); _localLuminanceAdaptation(_magnoXOutputOFF, _localProcessBufferOFF); - _magnoYOutput = _magnoXOutputON + _magnoXOutputOFF; + add(_magnoXOutputON, _magnoXOutputOFF, _magnoYOutput); return _magnoYOutput; } @@ -1029,7 +953,7 @@ const oclMat &MagnoRetinaFilter::runFilter(const oclMat &OPL_ON, const oclMat &O // define an array of ROI headers of input x #define MAKE_OCLMAT_SLICES(x, n) \ - oclMat x##_slices[n];\ + UMat x##_slices[n];\ for(int _SLICE_INDEX_ = 0; _SLICE_INDEX_ < n; _SLICE_INDEX_ ++)\ {\ x##_slices[_SLICE_INDEX_] = x(getROI(_SLICE_INDEX_));\ @@ -1107,20 +1031,17 @@ void RetinaColor::resize(const unsigned int NBrows, const unsigned int NBcolumns clearAllBuffers(); } -static void inverseValue(oclMat &input) +static void inverseValue(UMat &input) { int elements_per_row = static_cast(input.step / input.elemSize()); - Context * ctx = Context::getContext(); - std::vector > args; - size_t globalSize[] = {input.cols, input.rows, 1}; - size_t localSize[] = {16, 16, 1}; + size_t globalSize[] = {(size_t)input.cols / 4, (size_t)input.rows}; + size_t localSize[] = { 16, 16 }; - args.push_back(std::make_pair(sizeof(cl_mem), &input.data)); - args.push_back(std::make_pair(sizeof(cl_int), &input.cols)); - args.push_back(std::make_pair(sizeof(cl_int), &input.rows)); - args.push_back(std::make_pair(sizeof(cl_int), &elements_per_row)); - openCLExecuteKernel(ctx, &retina_kernel, "inverseValue", globalSize, localSize, args, -1, -1); + Kernel kernel("inverseValue", ocl::bioinspired::retina_kernel_oclsrc); + kernel.args(ocl::KernelArg::PtrReadWrite(input), + (int)input.cols, (int)input.rows, (int)elements_per_row); + kernel.run(sizeOfArray(globalSize), globalSize, localSize, false); } void RetinaColor::_initColorSampling() @@ -1129,76 +1050,69 @@ void RetinaColor::_initColorSampling() _pR = _pB = 0.25; _pG = 0.5; // filling the mosaic buffer: - _RGBmosaic = 0; - Mat tmp_mat(_NBrows * 3, _NBcols, CV_32FC1); + Mat tmp_mat(_NBrows * 3, _NBcols, CV_32FC1, Scalar(0)); float * tmp_mat_ptr = tmp_mat.ptr(); - tmp_mat.setTo(0); for (unsigned int index = 0 ; index < getNBpixels(); ++index) { tmp_mat_ptr[bayerSampleOffset(index)] = 1.0; } - _RGBmosaic.upload(tmp_mat); + tmp_mat.copyTo(_RGBmosaic); // computing photoreceptors local density MAKE_OCLMAT_SLICES(_RGBmosaic, 3); MAKE_OCLMAT_SLICES(_colorLocalDensity, 3); _colorLocalDensity.setTo(0); - _spatiotemporalLPfilter(_RGBmosaic_slices[0], _colorLocalDensity_slices[0]); - _spatiotemporalLPfilter(_RGBmosaic_slices[1], _colorLocalDensity_slices[1]); - _spatiotemporalLPfilter(_RGBmosaic_slices[2], _colorLocalDensity_slices[2]); + _spatiotemporalLPfilter_h(_RGBmosaic_slices[0], _colorLocalDensity_slices[0]); + _spatiotemporalLPfilter_h(_RGBmosaic_slices[1], _colorLocalDensity_slices[1]); + _spatiotemporalLPfilter_h(_RGBmosaic_slices[2], _colorLocalDensity_slices[2]); + _spatiotemporalLPfilter_v(_colorLocalDensity, 1); - //_colorLocalDensity = oclMat(_colorLocalDensity.size(), _colorLocalDensity.type(), 1.f) / _colorLocalDensity; + //_colorLocalDensity = UMat(_colorLocalDensity.size(), _colorLocalDensity.type(), 1.f) / _colorLocalDensity; inverseValue(_colorLocalDensity); _objectInit = true; } -static void demultiplex(const oclMat &input, oclMat &ouput) +static void demultiplex(const UMat &input, UMat &ouput) { int elements_per_row = static_cast(input.step / input.elemSize()); - Context * ctx = Context::getContext(); - std::vector > args; - size_t globalSize[] = {input.cols, input.rows, 1}; - size_t localSize[] = {16, 16, 1}; + size_t globalSize[] = {(size_t)input.cols / 4, (size_t)input.rows}; + size_t localSize[] = { 16, 16 }; - args.push_back(std::make_pair(sizeof(cl_mem), &input.data)); - args.push_back(std::make_pair(sizeof(cl_mem), &ouput.data)); - args.push_back(std::make_pair(sizeof(cl_int), &input.cols)); - args.push_back(std::make_pair(sizeof(cl_int), &input.rows)); - args.push_back(std::make_pair(sizeof(cl_int), &elements_per_row)); - openCLExecuteKernel(ctx, &retina_kernel, "runColorDemultiplexingBayer", globalSize, localSize, args, -1, -1); + Kernel kernel("runColorDemultiplexingBayer", ocl::bioinspired::retina_kernel_oclsrc); + kernel.args(ocl::KernelArg::PtrReadOnly(input), + ocl::KernelArg::PtrWriteOnly(ouput), + (int)input.cols, (int)input.rows, (int)elements_per_row); + kernel.run(sizeOfArray(globalSize), globalSize, localSize, false); } static void normalizePhotoDensity( - const oclMat &chroma, - const oclMat &colorDensity, - const oclMat &multiplex, - oclMat &ocl_luma, - oclMat &demultiplex, + const UMat &chroma, + const UMat &colorDensity, + const UMat &multiplex, + UMat &ocl_luma, + UMat &demultiplex, const float pG ) { int elements_per_row = static_cast(ocl_luma.step / ocl_luma.elemSize()); - Context * ctx = Context::getContext(); - std::vector > args; - size_t globalSize[] = {ocl_luma.cols, ocl_luma.rows, 1}; - size_t localSize[] = {16, 16, 1}; + size_t globalSize[] = {(size_t)ocl_luma.cols / 4, (size_t)ocl_luma.rows}; + size_t localSize[] = { 16, 16 }; - args.push_back(std::make_pair(sizeof(cl_mem), &chroma.data)); - args.push_back(std::make_pair(sizeof(cl_mem), &colorDensity.data)); - args.push_back(std::make_pair(sizeof(cl_mem), &multiplex.data)); - args.push_back(std::make_pair(sizeof(cl_mem), &ocl_luma.data)); - args.push_back(std::make_pair(sizeof(cl_mem), &demultiplex.data)); - args.push_back(std::make_pair(sizeof(cl_int), &ocl_luma.cols)); - args.push_back(std::make_pair(sizeof(cl_int), &ocl_luma.rows)); - args.push_back(std::make_pair(sizeof(cl_int), &elements_per_row)); - args.push_back(std::make_pair(sizeof(cl_float), &pG)); - openCLExecuteKernel(ctx, &retina_kernel, "normalizePhotoDensity", globalSize, localSize, args, -1, -1); + Kernel kernel("normalizePhotoDensity", ocl::bioinspired::retina_kernel_oclsrc); + kernel.args(ocl::KernelArg::PtrReadOnly(chroma), + ocl::KernelArg::PtrReadOnly(colorDensity), + ocl::KernelArg::PtrReadOnly(multiplex), + ocl::KernelArg::PtrWriteOnly(ocl_luma), + ocl::KernelArg::PtrWriteOnly(demultiplex), + (int)ocl_luma.cols, (int)ocl_luma.rows, (int)elements_per_row, + (float)pG); + kernel.run(sizeOfArray(globalSize), globalSize, localSize, false); } static void substractResidual( - oclMat &colorDemultiplex, + UMat &colorDemultiplex, float pR, float pG, float pB @@ -1206,43 +1120,35 @@ static void substractResidual( { int elements_per_row = static_cast(colorDemultiplex.step / colorDemultiplex.elemSize()); - Context * ctx = Context::getContext(); - std::vector > args; int rows = colorDemultiplex.rows / 3, cols = colorDemultiplex.cols; - size_t globalSize[] = {cols, rows, 1}; - size_t localSize[] = {16, 16, 1}; + size_t globalSize[] = {(size_t)cols / 4, (size_t)rows}; + size_t localSize[] = { 16, 16 }; - args.push_back(std::make_pair(sizeof(cl_mem), &colorDemultiplex.data)); - args.push_back(std::make_pair(sizeof(cl_int), &cols)); - args.push_back(std::make_pair(sizeof(cl_int), &rows)); - args.push_back(std::make_pair(sizeof(cl_int), &elements_per_row)); - args.push_back(std::make_pair(sizeof(cl_float), &pR)); - args.push_back(std::make_pair(sizeof(cl_float), &pG)); - args.push_back(std::make_pair(sizeof(cl_float), &pB)); - openCLExecuteKernel(ctx, &retina_kernel, "substractResidual", globalSize, localSize, args, -1, -1); + Kernel kernel("substractResidual", ocl::bioinspired::retina_kernel_oclsrc); + kernel.args(ocl::KernelArg::PtrReadWrite(colorDemultiplex), + (int)cols, (int)rows, (int)elements_per_row, + (float)pR, (float)pG, (float)pB); + kernel.run(sizeOfArray(globalSize), globalSize, localSize, false); } -static void demultiplexAssign(const oclMat& input, const oclMat& output) +static void demultiplexAssign(const UMat& input, const UMat& output) { // only supports bayer int elements_per_row = static_cast(input.step / input.elemSize()); - Context * ctx = Context::getContext(); - std::vector > args; int rows = input.rows / 3, cols = input.cols; - size_t globalSize[] = {cols, rows, 1}; - size_t localSize[] = {16, 16, 1}; + size_t globalSize[] = {(size_t)cols, (size_t)rows}; + size_t localSize[] = { 16, 16 }; - args.push_back(std::make_pair(sizeof(cl_mem), &input.data)); - args.push_back(std::make_pair(sizeof(cl_mem), &output.data)); - args.push_back(std::make_pair(sizeof(cl_int), &cols)); - args.push_back(std::make_pair(sizeof(cl_int), &rows)); - args.push_back(std::make_pair(sizeof(cl_int), &elements_per_row)); - openCLExecuteKernel(ctx, &retina_kernel, "demultiplexAssign", globalSize, localSize, args, -1, -1); + Kernel kernel("demultiplexAssign", ocl::bioinspired::retina_kernel_oclsrc); + kernel.args(ocl::KernelArg::PtrReadOnly(input), + ocl::KernelArg::PtrWriteOnly(output), + (int)cols, (int)rows, (int)elements_per_row); + kernel.run(sizeOfArray(globalSize), globalSize, localSize, false); } void RetinaColor::runColorDemultiplexing( - const oclMat &ocl_multiplexed_input, + const UMat &ocl_multiplexed_input, const bool adaptiveFiltering, const float maxInputValue ) @@ -1262,9 +1168,10 @@ void RetinaColor::runColorDemultiplexing( CV_Assert(adaptiveFiltering == false); } - _spatiotemporalLPfilter(_demultiplexedTempBuffer_slices[0], _chrominance_slices[0]); - _spatiotemporalLPfilter(_demultiplexedTempBuffer_slices[1], _chrominance_slices[1]); - _spatiotemporalLPfilter(_demultiplexedTempBuffer_slices[2], _chrominance_slices[2]); + _spatiotemporalLPfilter_h(_demultiplexedTempBuffer_slices[0], _chrominance_slices[0]); + _spatiotemporalLPfilter_h(_demultiplexedTempBuffer_slices[1], _chrominance_slices[1]); + _spatiotemporalLPfilter_h(_demultiplexedTempBuffer_slices[2], _chrominance_slices[2]); + _spatiotemporalLPfilter_v(_chrominance, 1); if (!adaptiveFiltering)// compute the gradient on the luminance { @@ -1277,27 +1184,39 @@ void RetinaColor::runColorDemultiplexing( // compute the gradient of the luminance _computeGradient(_luminance, _imageGradient); - _adaptiveSpatialLPfilter(_RGBmosaic_slices[0], _imageGradient, _chrominance_slices[0]); - _adaptiveSpatialLPfilter(_RGBmosaic_slices[1], _imageGradient, _chrominance_slices[1]); - _adaptiveSpatialLPfilter(_RGBmosaic_slices[2], _imageGradient, _chrominance_slices[2]); + _adaptiveSpatialLPfilter_h(_RGBmosaic_slices[0], _imageGradient, _chrominance_slices[0]); + _adaptiveSpatialLPfilter_h(_RGBmosaic_slices[1], _imageGradient, _chrominance_slices[1]); + _adaptiveSpatialLPfilter_h(_RGBmosaic_slices[2], _imageGradient, _chrominance_slices[2]); + _adaptiveSpatialLPfilter_v(_imageGradient, _chrominance); - _adaptiveSpatialLPfilter(_demultiplexedTempBuffer_slices[0], _imageGradient, _demultiplexedColorFrame_slices[0]); - _adaptiveSpatialLPfilter(_demultiplexedTempBuffer_slices[1], _imageGradient, _demultiplexedColorFrame_slices[1]); - _adaptiveSpatialLPfilter(_demultiplexedTempBuffer_slices[2], _imageGradient, _demultiplexedColorFrame_slices[2]); + _adaptiveSpatialLPfilter_h(_demultiplexedTempBuffer_slices[0], _imageGradient, _demultiplexedColorFrame_slices[0]); + _adaptiveSpatialLPfilter_h(_demultiplexedTempBuffer_slices[1], _imageGradient, _demultiplexedColorFrame_slices[1]); + _adaptiveSpatialLPfilter_h(_demultiplexedTempBuffer_slices[2], _imageGradient, _demultiplexedColorFrame_slices[2]); + _adaptiveSpatialLPfilter_v(_imageGradient, _demultiplexedColorFrame); - _demultiplexedColorFrame /= _chrominance; // per element division + divide(_demultiplexedColorFrame, _chrominance, _demultiplexedColorFrame); substractResidual(_demultiplexedColorFrame, _pR, _pG, _pB); runColorMultiplexing(_demultiplexedColorFrame, _tempMultiplexedFrame); _demultiplexedTempBuffer.setTo(0); - _luminance = ocl_multiplexed_input - _tempMultiplexedFrame; + subtract(ocl_multiplexed_input, _tempMultiplexedFrame, _luminance); demultiplexAssign(_demultiplexedColorFrame, _demultiplexedTempBuffer); - for(int i = 0; i < 3; i ++) - { - _spatiotemporalLPfilter(_demultiplexedTempBuffer_slices[i], _demultiplexedTempBuffer_slices[i]); - _demultiplexedColorFrame_slices[i] = _demultiplexedTempBuffer_slices[i] * _colorLocalDensity_slices[i] + _luminance; - } + _spatiotemporalLPfilter_h(_demultiplexedTempBuffer_slices[0], _demultiplexedTempBuffer_slices[0]); + _spatiotemporalLPfilter_h(_demultiplexedTempBuffer_slices[1], _demultiplexedTempBuffer_slices[1]); + _spatiotemporalLPfilter_h(_demultiplexedTempBuffer_slices[2], _demultiplexedTempBuffer_slices[2]); + _spatiotemporalLPfilter_v(_demultiplexedTempBuffer, 1); + + multiply(_demultiplexedTempBuffer, _colorLocalDensity, _demultiplexedColorFrame); + + std::vector m; + UMat _luminance_concat; + + m.push_back(_luminance); + m.push_back(_luminance); + m.push_back(_luminance); + vconcat(m, _luminance_concat); + add(_demultiplexedColorFrame, _luminance_concat, _demultiplexedColorFrame); } // eliminate saturated colors by simple clipping values to the input range clipRGBOutput_0_maxInputValue(_demultiplexedColorFrame, maxInputValue); @@ -1307,44 +1226,38 @@ void RetinaColor::runColorDemultiplexing( ocl::normalizeGrayOutputCentredSigmoide(128, maxInputValue, _demultiplexedColorFrame, _demultiplexedColorFrame); } } -void RetinaColor::runColorMultiplexing(const oclMat &demultiplexedInputFrame, oclMat &multiplexedFrame) +void RetinaColor::runColorMultiplexing(const UMat &demultiplexedInputFrame, UMat &multiplexedFrame) { int elements_per_row = static_cast(multiplexedFrame.step / multiplexedFrame.elemSize()); - Context * ctx = Context::getContext(); - std::vector > args; - size_t globalSize[] = {multiplexedFrame.cols, multiplexedFrame.rows, 1}; - size_t localSize[] = {16, 16, 1}; + size_t globalSize[] = {(size_t)multiplexedFrame.cols / 4, (size_t)multiplexedFrame.rows}; + size_t localSize[] = { 16, 16 }; - args.push_back(std::make_pair(sizeof(cl_mem), &demultiplexedInputFrame.data)); - args.push_back(std::make_pair(sizeof(cl_mem), &multiplexedFrame.data)); - args.push_back(std::make_pair(sizeof(cl_int), &multiplexedFrame.cols)); - args.push_back(std::make_pair(sizeof(cl_int), &multiplexedFrame.rows)); - args.push_back(std::make_pair(sizeof(cl_int), &elements_per_row)); - openCLExecuteKernel(ctx, &retina_kernel, "runColorMultiplexingBayer", globalSize, localSize, args, -1, -1); + Kernel kernel("runColorMultiplexingBayer", ocl::bioinspired::retina_kernel_oclsrc); + kernel.args(ocl::KernelArg::PtrReadOnly(demultiplexedInputFrame), + ocl::KernelArg::PtrWriteOnly(multiplexedFrame), + (int)multiplexedFrame.cols, (int)multiplexedFrame.rows, (int)elements_per_row); + kernel.run(sizeOfArray(globalSize), globalSize, localSize, false); } -void RetinaColor::clipRGBOutput_0_maxInputValue(oclMat &inputOutputBuffer, const float maxInputValue) +void RetinaColor::clipRGBOutput_0_maxInputValue(UMat &inputOutputBuffer, const float maxInputValue) { // the kernel is equivalent to: //ocl::threshold(inputOutputBuffer, inputOutputBuffer, maxInputValue, maxInputValue, THRESH_TRUNC); //ocl::threshold(inputOutputBuffer, inputOutputBuffer, 0, 0, THRESH_TOZERO); int elements_per_row = static_cast(inputOutputBuffer.step / inputOutputBuffer.elemSize()); - Context * ctx = Context::getContext(); - std::vector > args; - size_t globalSize[] = {_NBcols, inputOutputBuffer.rows, 1}; - size_t localSize[] = {16, 16, 1}; + size_t globalSize[] = {(size_t)_NBcols / 4, (size_t)inputOutputBuffer.rows}; + size_t localSize[] = { 16, 16 }; - args.push_back(std::make_pair(sizeof(cl_mem), &inputOutputBuffer.data)); - args.push_back(std::make_pair(sizeof(cl_int), &_NBcols)); - args.push_back(std::make_pair(sizeof(cl_int), &inputOutputBuffer.rows)); - args.push_back(std::make_pair(sizeof(cl_int), &elements_per_row)); - args.push_back(std::make_pair(sizeof(cl_float), &maxInputValue)); - openCLExecuteKernel(ctx, &retina_kernel, "clipRGBOutput_0_maxInputValue", globalSize, localSize, args, -1, -1); + Kernel kernel("clipRGBOutput_0_maxInputValue", ocl::bioinspired::retina_kernel_oclsrc); + kernel.args(ocl::KernelArg::PtrReadWrite(inputOutputBuffer), + (int)_NBcols, (int)inputOutputBuffer.rows, (int)elements_per_row, + (float)maxInputValue); + kernel.run(sizeOfArray(globalSize), globalSize, localSize, false); } -void RetinaColor::_adaptiveSpatialLPfilter(const oclMat &inputFrame, const oclMat &gradient, oclMat &outputFrame) +void RetinaColor::_adaptiveSpatialLPfilter_h(const UMat &inputFrame, const UMat &gradient, UMat &outputFrame) { /**********/ _gain = (1 - 0.57f) * (1 - 0.57f) * (1 - 0.06f) * (1 - 0.06f); @@ -1352,69 +1265,41 @@ void RetinaColor::_adaptiveSpatialLPfilter(const oclMat &inputFrame, const oclMa // launch the serie of 1D directional filters in order to compute the 2D low pass filter // -> horizontal filters work with the first layer of imageGradient _adaptiveHorizontalCausalFilter_addInput(inputFrame, gradient, outputFrame); - _horizontalAnticausalFilter_Irregular(outputFrame, gradient); - // -> horizontal filters work with the second layer of imageGradient - _verticalCausalFilter_Irregular(outputFrame, gradient(getROI(1))); - _adaptiveVerticalAnticausalFilter_multGain(gradient, outputFrame); } -void RetinaColor::_adaptiveHorizontalCausalFilter_addInput(const oclMat &inputFrame, const oclMat &gradient, oclMat &outputFrame) +void RetinaColor::_adaptiveSpatialLPfilter_v(const UMat &gradient, UMat &outputFrame) { - int elements_per_row = static_cast(inputFrame.step / inputFrame.elemSize()); - - Context * ctx = Context::getContext(); - std::vector > args; - size_t globalSize[] = {_NBrows, 1, 1}; - size_t localSize[] = {256, 1, 1}; - - args.push_back(std::make_pair(sizeof(cl_mem), &inputFrame.data)); - args.push_back(std::make_pair(sizeof(cl_mem), &gradient.data)); - args.push_back(std::make_pair(sizeof(cl_mem), &outputFrame.data)); - args.push_back(std::make_pair(sizeof(cl_int), &_NBcols)); - args.push_back(std::make_pair(sizeof(cl_int), &_NBrows)); - args.push_back(std::make_pair(sizeof(cl_int), &elements_per_row)); - args.push_back(std::make_pair(sizeof(cl_int), &inputFrame.offset)); - args.push_back(std::make_pair(sizeof(cl_int), &gradient.offset)); - args.push_back(std::make_pair(sizeof(cl_int), &outputFrame.offset)); - openCLExecuteKernel(ctx, &retina_kernel, "adaptiveHorizontalCausalFilter_addInput", globalSize, localSize, args, -1, -1); + _verticalCausalFilter_Irregular(outputFrame, gradient(getROI(1))); } -void RetinaColor::_adaptiveVerticalAnticausalFilter_multGain(const oclMat &gradient, oclMat &outputFrame) +void RetinaColor::_adaptiveHorizontalCausalFilter_addInput(const UMat &inputFrame, const UMat &gradient, UMat &outputFrame) { - int elements_per_row = static_cast(outputFrame.step / outputFrame.elemSize()); - - Context * ctx = Context::getContext(); - std::vector > args; - size_t globalSize[] = {_NBcols, 1, 1}; - size_t localSize[] = {256, 1, 1}; + int elements_per_row = static_cast(inputFrame.step / inputFrame.elemSize()); - int gradOffset = gradient.offset + static_cast(gradient.step * _NBrows); + size_t globalSize[] = {(size_t)_NBrows}; + size_t localSize[] = { 256 }; - args.push_back(std::make_pair(sizeof(cl_mem), &gradient.data)); - args.push_back(std::make_pair(sizeof(cl_mem), &outputFrame.data)); - args.push_back(std::make_pair(sizeof(cl_int), &_NBcols)); - args.push_back(std::make_pair(sizeof(cl_int), &_NBrows)); - args.push_back(std::make_pair(sizeof(cl_int), &elements_per_row)); - args.push_back(std::make_pair(sizeof(cl_int), &gradOffset)); - args.push_back(std::make_pair(sizeof(cl_int), &outputFrame.offset)); - args.push_back(std::make_pair(sizeof(cl_float), &_gain)); - openCLExecuteKernel(ctx, &retina_kernel, "adaptiveVerticalAnticausalFilter_multGain", globalSize, localSize, args, -1, -1); + Kernel kernel("adaptiveHorizontalCausalFilter_addInput", ocl::bioinspired::retina_kernel_oclsrc); + kernel.args(ocl::KernelArg::PtrReadOnly(inputFrame), + ocl::KernelArg::PtrReadOnly(gradient), + ocl::KernelArg::PtrWriteOnly(outputFrame), + (int)_NBcols, (int)_NBrows, (int)elements_per_row, (int)inputFrame.offset, + (int)gradient.offset, (int)outputFrame.offset); + kernel.run(sizeOfArray(globalSize), globalSize, localSize, false); } -void RetinaColor::_computeGradient(const oclMat &luminance, oclMat &gradient) + +void RetinaColor::_computeGradient(const UMat &luminance, UMat &gradient) { int elements_per_row = static_cast(luminance.step / luminance.elemSize()); - Context * ctx = Context::getContext(); - std::vector > args; - size_t globalSize[] = {_NBcols, _NBrows, 1}; - size_t localSize[] = {16, 16, 1}; + size_t globalSize[] = {(size_t)_NBcols, (size_t)_NBrows}; + size_t localSize[] = { 16, 16 }; - args.push_back(std::make_pair(sizeof(cl_mem), &luminance.data)); - args.push_back(std::make_pair(sizeof(cl_mem), &gradient.data)); - args.push_back(std::make_pair(sizeof(cl_int), &_NBcols)); - args.push_back(std::make_pair(sizeof(cl_int), &_NBrows)); - args.push_back(std::make_pair(sizeof(cl_int), &elements_per_row)); - openCLExecuteKernel(ctx, &retina_kernel, "computeGradient", globalSize, localSize, args, -1, -1); + Kernel kernel("computeGradient", ocl::bioinspired::retina_kernel_oclsrc); + kernel.args(ocl::KernelArg::PtrReadOnly(luminance), + ocl::KernelArg::PtrWriteOnly(gradient), + (int)_NBcols, (int)_NBrows, (int)elements_per_row); + kernel.run(sizeOfArray(globalSize), globalSize, localSize, false); } /////////////////////////////////////// @@ -1500,7 +1385,7 @@ void RetinaFilter::setGlobalParameters(const float OPLspatialResponse1, const fl _setInitPeriodCount(); } -bool RetinaFilter::checkInput(const oclMat &input, const bool) +bool RetinaFilter::checkInput(const UMat &input, const bool) { BasicRetinaFilter *inputTarget = &_photoreceptorsPrefilter; @@ -1518,7 +1403,7 @@ bool RetinaFilter::checkInput(const oclMat &input, const bool) } // main function that runs the filter for a given input frame -bool RetinaFilter::runFilter(const oclMat &imageInput, const bool useAdaptiveFiltering, const bool processRetinaParvoMagnoMapping, const bool useColorMode, const bool inputIsColorMultiplexed) +bool RetinaFilter::runFilter(const UMat &imageInput, const bool useAdaptiveFiltering, const bool processRetinaParvoMagnoMapping, const bool useColorMode, const bool inputIsColorMultiplexed) { // preliminary check bool processSuccess = true; @@ -1537,8 +1422,8 @@ bool RetinaFilter::runFilter(const oclMat &imageInput, const bool useAdaptiveFil _useColorMode = useColorMode; - oclMat selectedPhotoreceptorsLocalAdaptationInput = imageInput; - oclMat selectedPhotoreceptorsColorInput = imageInput; + UMat selectedPhotoreceptorsLocalAdaptationInput = imageInput; + UMat selectedPhotoreceptorsColorInput = imageInput; //********** Following is input data specific photoreceptors processing if (useColorMode && (!inputIsColorMultiplexed)) // not multiplexed color input case @@ -1592,7 +1477,7 @@ bool RetinaFilter::runFilter(const oclMat &imageInput, const bool useAdaptiveFil return processSuccess; } -const oclMat &RetinaFilter::getContours() +const UMat &RetinaFilter::getContours() { if (_useColorMode) { @@ -1605,8 +1490,8 @@ const oclMat &RetinaFilter::getContours() } void RetinaFilter::_processRetinaParvoMagnoMapping() { - oclMat parvo = _ParvoRetinaFilter.getOutput(); - oclMat magno = _MagnoRetinaFilter.getOutput(); + UMat parvo = _ParvoRetinaFilter.getOutput(); + UMat magno = _MagnoRetinaFilter.getOutput(); int halfRows = parvo.rows / 2; int halfCols = parvo.cols / 2; @@ -1614,30 +1499,19 @@ void RetinaFilter::_processRetinaParvoMagnoMapping() int elements_per_row = static_cast(parvo.step / parvo.elemSize()); - Context * ctx = Context::getContext(); - std::vector > args; - size_t globalSize[] = {parvo.cols, parvo.rows, 1}; - size_t localSize[] = {16, 16, 1}; - - args.push_back(std::make_pair(sizeof(cl_mem), &parvo.data)); - args.push_back(std::make_pair(sizeof(cl_mem), &magno.data)); - args.push_back(std::make_pair(sizeof(cl_int), &parvo.cols)); - args.push_back(std::make_pair(sizeof(cl_int), &parvo.rows)); - args.push_back(std::make_pair(sizeof(cl_int), &halfCols)); - args.push_back(std::make_pair(sizeof(cl_int), &halfRows)); - args.push_back(std::make_pair(sizeof(cl_int), &elements_per_row)); - args.push_back(std::make_pair(sizeof(cl_float), &minDistance)); - openCLExecuteKernel(ctx, &retina_kernel, "processRetinaParvoMagnoMapping", globalSize, localSize, args, -1, -1); -} -} /* namespace ocl */ + size_t globalSize[] = {(size_t)parvo.cols, (size_t)parvo.rows}; + size_t localSize[] = { 16, 16 }; -Ptr createRetina_OCL(Size getInputSize){ return makePtr(getInputSize); } -Ptr createRetina_OCL(Size getInputSize, const bool colorMode, int colorSamplingMethod, const bool useRetinaLogSampling, const double reductionFactor, const double samplingStrenght) -{ - return makePtr(getInputSize, colorMode, colorSamplingMethod, useRetinaLogSampling, reductionFactor, samplingStrenght); + Kernel kernel("processRetinaParvoMagnoMapping", ocl::bioinspired::retina_kernel_oclsrc); + kernel.args(ocl::KernelArg::PtrReadOnly(parvo), + ocl::KernelArg::PtrReadOnly(magno), + (int)parvo.cols, (int)parvo.rows, (int)halfCols, + (int)halfRows, (int)elements_per_row, (float)minDistance); + kernel.run(sizeOfArray(globalSize), globalSize, localSize, false); } +} /* namespace ocl */ } /* namespace bioinspired */ } /* namespace cv */ -#endif /* #ifdef HAVE_OPENCV_OCL */ +#endif /* #ifdef HAVE_OPENCL */ diff --git a/modules/bioinspired/src/retina_ocl.hpp b/modules/bioinspired/src/retina_ocl.hpp index 11da48b93..777e67001 100644 --- a/modules/bioinspired/src/retina_ocl.hpp +++ b/modules/bioinspired/src/retina_ocl.hpp @@ -47,8 +47,9 @@ #define __OCL_RETINA_HPP__ #include "precomp.hpp" +#include "opencv2/bioinspired/retina.hpp" -#ifdef HAVE_OPENCV_OCL +#ifdef HAVE_OPENCL // please refer to c++ headers for API comments namespace cv @@ -57,10 +58,10 @@ namespace bioinspired { namespace ocl { -void normalizeGrayOutputCentredSigmoide(const float meanValue, const float sensitivity, cv::ocl::oclMat &in, cv::ocl::oclMat &out, const float maxValue = 255.f); -void normalizeGrayOutput_0_maxOutputValue(cv::ocl::oclMat &inputOutputBuffer, const float maxOutputValue = 255.0); -void normalizeGrayOutputNearZeroCentreredSigmoide(cv::ocl::oclMat &inputPicture, cv::ocl::oclMat &outputBuffer, const float sensitivity = 40, const float maxOutputValue = 255.0f); -void centerReductImageLuminance(cv::ocl::oclMat &inputOutputBuffer); +void normalizeGrayOutputCentredSigmoide(const float meanValue, const float sensitivity, UMat &in, UMat &out, const float maxValue = 255.f); +void normalizeGrayOutput_0_maxOutputValue(UMat &inputOutputBuffer, const float maxOutputValue = 255.0); +void normalizeGrayOutputNearZeroCentreredSigmoide(UMat &inputPicture, UMat &outputBuffer, const float sensitivity = 40, const float maxOutputValue = 255.0f); +void centerReductImageLuminance(UMat &inputOutputBuffer); class BasicRetinaFilter { @@ -81,13 +82,13 @@ public: clearSecondaryBuffer(); } void resize(const unsigned int NBrows, const unsigned int NBcolumns); - const cv::ocl::oclMat &runFilter_LPfilter(const cv::ocl::oclMat &inputFrame, const unsigned int filterIndex = 0); - void runFilter_LPfilter(const cv::ocl::oclMat &inputFrame, cv::ocl::oclMat &outputFrame, const unsigned int filterIndex = 0); - void runFilter_LPfilter_Autonomous(cv::ocl::oclMat &inputOutputFrame, const unsigned int filterIndex = 0); - const cv::ocl::oclMat &runFilter_LocalAdapdation(const cv::ocl::oclMat &inputOutputFrame, const cv::ocl::oclMat &localLuminance); - void runFilter_LocalAdapdation(const cv::ocl::oclMat &inputFrame, const cv::ocl::oclMat &localLuminance, cv::ocl::oclMat &outputFrame); - const cv::ocl::oclMat &runFilter_LocalAdapdation_autonomous(const cv::ocl::oclMat &inputFrame); - void runFilter_LocalAdapdation_autonomous(const cv::ocl::oclMat &inputFrame, cv::ocl::oclMat &outputFrame); + const UMat &runFilter_LPfilter(const UMat &inputFrame, const unsigned int filterIndex = 0); + void runFilter_LPfilter(const UMat &inputFrame, UMat &outputFrame, const unsigned int filterIndex = 0); + void runFilter_LPfilter_Autonomous(UMat &inputOutputFrame, const unsigned int filterIndex = 0); + const UMat &runFilter_LocalAdapdation(const UMat &inputOutputFrame, const UMat &localLuminance); + void runFilter_LocalAdapdation(const UMat &inputFrame, const UMat &localLuminance, UMat &outputFrame); + const UMat &runFilter_LocalAdapdation_autonomous(const UMat &inputFrame); + void runFilter_LocalAdapdation_autonomous(const UMat &inputFrame, UMat &outputFrame); void setLPfilterParameters(const float beta, const float tau, const float k, const unsigned int filterIndex = 0); inline void setV0CompressionParameter(const float v0, const float maxInputValue, const float) { @@ -122,7 +123,7 @@ public: { return _v0 / _maxInputValue; } - inline const cv::ocl::oclMat &getOutput() const + inline const UMat &getOutput() const { return _filterOutput; } @@ -166,8 +167,8 @@ protected: unsigned int _halfNBrows; unsigned int _halfNBcolumns; - cv::ocl::oclMat _filterOutput; - cv::ocl::oclMat _localBuffer; + UMat _filterOutput; + UMat _localBuffer; std::valarray _filteringCoeficientsTable; float _v0; @@ -180,19 +181,19 @@ protected: float _tau; float _gain; - void _spatiotemporalLPfilter(const cv::ocl::oclMat &inputFrame, cv::ocl::oclMat &LPfilterOutput, const unsigned int coefTableOffset = 0); - float _squaringSpatiotemporalLPfilter(const cv::ocl::oclMat &inputFrame, cv::ocl::oclMat &outputFrame, const unsigned int filterIndex = 0); - void _spatiotemporalLPfilter_Irregular(const cv::ocl::oclMat &inputFrame, cv::ocl::oclMat &outputFrame, const unsigned int filterIndex = 0); - void _localSquaringSpatioTemporalLPfilter(const cv::ocl::oclMat &inputFrame, cv::ocl::oclMat &LPfilterOutput, const unsigned int *integrationAreas, const unsigned int filterIndex = 0); - void _localLuminanceAdaptation(const cv::ocl::oclMat &inputFrame, const cv::ocl::oclMat &localLuminance, cv::ocl::oclMat &outputFrame, const bool updateLuminanceMean = true); - void _localLuminanceAdaptation(cv::ocl::oclMat &inputOutputFrame, const cv::ocl::oclMat &localLuminance); - void _localLuminanceAdaptationPosNegValues(const cv::ocl::oclMat &inputFrame, const cv::ocl::oclMat &localLuminance, float *outputFrame); - void _horizontalCausalFilter_addInput(const cv::ocl::oclMat &inputFrame, cv::ocl::oclMat &outputFrame); - void _horizontalAnticausalFilter(cv::ocl::oclMat &outputFrame); - void _verticalCausalFilter(cv::ocl::oclMat &outputFrame); - void _horizontalAnticausalFilter_Irregular(cv::ocl::oclMat &outputFrame, const cv::ocl::oclMat &spatialConstantBuffer); - void _verticalCausalFilter_Irregular(cv::ocl::oclMat &outputFrame, const cv::ocl::oclMat &spatialConstantBuffer); - void _verticalAnticausalFilter_multGain(cv::ocl::oclMat &outputFrame); + void _spatiotemporalLPfilter(const UMat &inputFrame, UMat &LPfilterOutput, const unsigned int coefTableOffset = 0); + void _spatiotemporalLPfilter_h(const UMat &inputFrame, UMat &LPfilterOutput, const unsigned int coefTableOffset = 0); + void _spatiotemporalLPfilter_v(UMat &LPfilterOutput, const unsigned int multichannel = 0); + float _squaringSpatiotemporalLPfilter(const UMat &inputFrame, UMat &outputFrame, const unsigned int filterIndex = 0); + void _spatiotemporalLPfilter_Irregular(const UMat &inputFrame, UMat &outputFrame, const unsigned int filterIndex = 0); + void _localSquaringSpatioTemporalLPfilter(const UMat &inputFrame, UMat &LPfilterOutput, const unsigned int *integrationAreas, const unsigned int filterIndex = 0); + void _localLuminanceAdaptation(const UMat &inputFrame, const UMat &localLuminance, UMat &outputFrame, const bool updateLuminanceMean = true); + void _localLuminanceAdaptation(UMat &inputOutputFrame, const UMat &localLuminance); + void _localLuminanceAdaptationPosNegValues(const UMat &inputFrame, const UMat &localLuminance, float *outputFrame); + void _horizontalCausalFilter_addInput(const UMat &inputFrame, UMat &outputFrame); + void _verticalCausalFilter(UMat &outputFrame); + void _verticalCausalFilter_multichannel(UMat &outputFrame); + void _verticalCausalFilter_Irregular(UMat &outputFrame, const UMat &spatialConstantBuffer); }; class MagnoRetinaFilter: public BasicRetinaFilter @@ -204,17 +205,17 @@ public: void resize(const unsigned int NBrows, const unsigned int NBcolumns); void setCoefficientsTable(const float parasolCells_beta, const float parasolCells_tau, const float parasolCells_k, const float amacrinCellsTemporalCutFrequency, const float localAdaptIntegration_tau, const float localAdaptIntegration_k); - const cv::ocl::oclMat &runFilter(const cv::ocl::oclMat &OPL_ON, const cv::ocl::oclMat &OPL_OFF); + const UMat &runFilter(const UMat &OPL_ON, const UMat &OPL_OFF); - inline const cv::ocl::oclMat &getMagnoON() const + inline const UMat &getMagnoON() const { return _magnoXOutputON; } - inline const cv::ocl::oclMat &getMagnoOFF() const + inline const UMat &getMagnoOFF() const { return _magnoXOutputOFF; } - inline const cv::ocl::oclMat &getMagnoYsaturated() const + inline const UMat &getMagnoYsaturated() const { return _magnoYsaturated; } @@ -227,19 +228,19 @@ public: return this->_filteringCoeficientsTable[2]; } private: - cv::ocl::oclMat _previousInput_ON; - cv::ocl::oclMat _previousInput_OFF; - cv::ocl::oclMat _amacrinCellsTempOutput_ON; - cv::ocl::oclMat _amacrinCellsTempOutput_OFF; - cv::ocl::oclMat _magnoXOutputON; - cv::ocl::oclMat _magnoXOutputOFF; - cv::ocl::oclMat _localProcessBufferON; - cv::ocl::oclMat _localProcessBufferOFF; - cv::ocl::oclMat _magnoYOutput; - cv::ocl::oclMat _magnoYsaturated; + UMat _previousInput_ON; + UMat _previousInput_OFF; + UMat _amacrinCellsTempOutput_ON; + UMat _amacrinCellsTempOutput_OFF; + UMat _magnoXOutputON; + UMat _magnoXOutputOFF; + UMat _localProcessBufferON; + UMat _localProcessBufferOFF; + UMat _magnoYOutput; + UMat _magnoYsaturated; float _temporalCoefficient; - void _amacrineCellsComputing(const cv::ocl::oclMat &OPL_ON, const cv::ocl::oclMat &OPL_OFF); + void _amacrineCellsComputing(const UMat &OPL_ON, const UMat &OPL_OFF); }; class ParvoRetinaFilter: public BasicRetinaFilter @@ -255,34 +256,34 @@ public: { BasicRetinaFilter::setLPfilterParameters(0, tau, k, 2); } - const cv::ocl::oclMat &runFilter(const cv::ocl::oclMat &inputFrame, const bool useParvoOutput = true); + const UMat &runFilter(const UMat &inputFrame, const bool useParvoOutput = true); - inline const cv::ocl::oclMat &getPhotoreceptorsLPfilteringOutput() const + inline const UMat &getPhotoreceptorsLPfilteringOutput() const { return _photoreceptorsOutput; } - inline const cv::ocl::oclMat &getHorizontalCellsOutput() const + inline const UMat &getHorizontalCellsOutput() const { return _horizontalCellsOutput; } - inline const cv::ocl::oclMat &getParvoON() const + inline const UMat &getParvoON() const { return _parvocellularOutputON; } - inline const cv::ocl::oclMat &getParvoOFF() const + inline const UMat &getParvoOFF() const { return _parvocellularOutputOFF; } - inline const cv::ocl::oclMat &getBipolarCellsON() const + inline const UMat &getBipolarCellsON() const { return _bipolarCellsOutputON; } - inline const cv::ocl::oclMat &getBipolarCellsOFF() const + inline const UMat &getBipolarCellsOFF() const { return _bipolarCellsOutputOFF; } @@ -297,15 +298,15 @@ public: return this->_filteringCoeficientsTable[5]; } private: - cv::ocl::oclMat _photoreceptorsOutput; - cv::ocl::oclMat _horizontalCellsOutput; - cv::ocl::oclMat _parvocellularOutputON; - cv::ocl::oclMat _parvocellularOutputOFF; - cv::ocl::oclMat _bipolarCellsOutputON; - cv::ocl::oclMat _bipolarCellsOutputOFF; - cv::ocl::oclMat _localAdaptationOFF; - cv::ocl::oclMat _localAdaptationON; - cv::ocl::oclMat _parvocellularOutputONminusOFF; + UMat _photoreceptorsOutput; + UMat _horizontalCellsOutput; + UMat _parvocellularOutputON; + UMat _parvocellularOutputOFF; + UMat _bipolarCellsOutputON; + UMat _bipolarCellsOutputOFF; + UMat _localAdaptationOFF; + UMat _localAdaptationON; + UMat _parvocellularOutputONminusOFF; void _OPL_OnOffWaysComputing(); }; class RetinaColor: public BasicRetinaFilter @@ -316,12 +317,12 @@ public: void clearAllBuffers(); void resize(const unsigned int NBrows, const unsigned int NBcolumns); - inline void runColorMultiplexing(const cv::ocl::oclMat &inputRGBFrame) + inline void runColorMultiplexing(const UMat &inputRGBFrame) { runColorMultiplexing(inputRGBFrame, _multiplexedFrame); } - void runColorMultiplexing(const cv::ocl::oclMat &demultiplexedInputFrame, cv::ocl::oclMat &multiplexedFrame); - void runColorDemultiplexing(const cv::ocl::oclMat &multiplexedColorFrame, const bool adaptiveFiltering = false, const float maxInputValue = 255.0); + void runColorMultiplexing(const UMat &demultiplexedInputFrame, UMat &multiplexedFrame); + void runColorDemultiplexing(const UMat &multiplexedColorFrame, const bool adaptiveFiltering = false, const float maxInputValue = 255.0); void setColorSaturation(const bool saturateColors = true, const float colorSaturationValue = 4.0) { @@ -334,29 +335,29 @@ public: setLPfilterParameters(beta, tau, k); } - bool applyKrauskopfLMS2Acr1cr2Transform(cv::ocl::oclMat &result); - bool applyLMS2LabTransform(cv::ocl::oclMat &result); - inline const cv::ocl::oclMat &getMultiplexedFrame() const + bool applyKrauskopfLMS2Acr1cr2Transform(UMat &result); + bool applyLMS2LabTransform(UMat &result); + inline const UMat &getMultiplexedFrame() const { return _multiplexedFrame; } - inline const cv::ocl::oclMat &getDemultiplexedColorFrame() const + inline const UMat &getDemultiplexedColorFrame() const { return _demultiplexedColorFrame; } - inline const cv::ocl::oclMat &getLuminance() const + inline const UMat &getLuminance() const { return _luminance; } - inline const cv::ocl::oclMat &getChrominance() const + inline const UMat &getChrominance() const { return _chrominance; } - void clipRGBOutput_0_maxInputValue(cv::ocl::oclMat &inputOutputBuffer, const float maxOutputValue = 255.0); + void clipRGBOutput_0_maxInputValue(UMat &inputOutputBuffer, const float maxOutputValue = 255.0); void normalizeRGBOutput_0_maxOutputValue(const float maxOutputValue = 255.0); - inline void setDemultiplexedColorFrame(const cv::ocl::oclMat &demultiplexedImage) + inline void setDemultiplexedColorFrame(const UMat &demultiplexedImage) { _demultiplexedColorFrame = demultiplexedImage; } @@ -372,26 +373,26 @@ protected: int _samplingMethod; bool _saturateColors; float _colorSaturationValue; - cv::ocl::oclMat _luminance; - cv::ocl::oclMat _multiplexedFrame; - cv::ocl::oclMat _RGBmosaic; - cv::ocl::oclMat _tempMultiplexedFrame; - cv::ocl::oclMat _demultiplexedTempBuffer; - cv::ocl::oclMat _demultiplexedColorFrame; - cv::ocl::oclMat _chrominance; - cv::ocl::oclMat _colorLocalDensity; - cv::ocl::oclMat _imageGradient; + UMat _luminance; + UMat _multiplexedFrame; + UMat _RGBmosaic; + UMat _tempMultiplexedFrame; + UMat _demultiplexedTempBuffer; + UMat _demultiplexedColorFrame; + UMat _chrominance; + UMat _colorLocalDensity; + UMat _imageGradient; float _pR, _pG, _pB; bool _objectInit; void _initColorSampling(); - void _adaptiveSpatialLPfilter(const cv::ocl::oclMat &inputFrame, const cv::ocl::oclMat &gradient, cv::ocl::oclMat &outputFrame); - void _adaptiveHorizontalCausalFilter_addInput(const cv::ocl::oclMat &inputFrame, const cv::ocl::oclMat &gradient, cv::ocl::oclMat &outputFrame); - void _adaptiveVerticalAnticausalFilter_multGain(const cv::ocl::oclMat &gradient, cv::ocl::oclMat &outputFrame); - void _computeGradient(const cv::ocl::oclMat &luminance, cv::ocl::oclMat &gradient); + void _adaptiveSpatialLPfilter_h(const UMat &inputFrame, const UMat &gradient, UMat &outputFrame); + void _adaptiveSpatialLPfilter_v(const UMat &gradient, UMat &outputFrame); + void _adaptiveHorizontalCausalFilter_addInput(const UMat &inputFrame, const UMat &gradient, UMat &outputFrame); + void _computeGradient(const UMat &luminance, UMat &gradient); void _normalizeOutputs_0_maxOutputValue(void); - void _applyImageColorSpaceConversion(const cv::ocl::oclMat &inputFrame, cv::ocl::oclMat &outputFrame, const float *transformTable); + void _applyImageColorSpaceConversion(const UMat &inputFrame, UMat &outputFrame, const float *transformTable); }; class RetinaFilter { @@ -401,8 +402,8 @@ public: void clearAllBuffers(); void resize(const unsigned int NBrows, const unsigned int NBcolumns); - bool checkInput(const cv::ocl::oclMat &input, const bool colorMode); - bool runFilter(const cv::ocl::oclMat &imageInput, const bool useAdaptiveFiltering = true, const bool processRetinaParvoMagnoMapping = false, const bool useColorMode = false, const bool inputIsColorMultiplexed = false); + bool checkInput(const UMat &input, const bool colorMode); + bool runFilter(const UMat &imageInput, const bool useAdaptiveFiltering = true, const bool processRetinaParvoMagnoMapping = false, const bool useColorMode = false, const bool inputIsColorMultiplexed = false); void setGlobalParameters(const float OPLspatialResponse1 = 0.7, const float OPLtemporalresponse1 = 1, const float OPLassymetryGain = 0, const float OPLspatialResponse2 = 5, const float OPLtemporalresponse2 = 1, const float LPfilterSpatialResponse = 5, const float LPfilterGain = 0, const float LPfilterTemporalresponse = 0, const float MovingContoursExtractorCoefficient = 5, const bool normalizeParvoOutput_0_maxOutputValue = false, const bool normalizeMagnoOutput_0_maxOutputValue = false, const float maxOutputValue = 255.0, const float maxInputValue = 255.0, const float meanValue = 128.0); @@ -467,16 +468,16 @@ public: { _colorEngine.setColorSaturation(saturateColors, colorSaturationValue); } - inline const cv::ocl::oclMat &getLocalAdaptation() const + inline const UMat &getLocalAdaptation() const { return _photoreceptorsPrefilter.getOutput(); } - inline const cv::ocl::oclMat &getPhotoreceptors() const + inline const UMat &getPhotoreceptors() const { return _ParvoRetinaFilter.getPhotoreceptorsLPfilteringOutput(); } - inline const cv::ocl::oclMat &getHorizontalCells() const + inline const UMat &getHorizontalCells() const { return _ParvoRetinaFilter.getHorizontalCellsOutput(); } @@ -484,20 +485,20 @@ public: { return _useParvoOutput; } - bool getParvoFoveaResponse(cv::ocl::oclMat &parvoFovealResponse); + bool getParvoFoveaResponse(UMat &parvoFovealResponse); inline void activateContoursProcessing(const bool useParvoOutput) { _useParvoOutput = useParvoOutput; } - const cv::ocl::oclMat &getContours(); + const UMat &getContours(); - inline const cv::ocl::oclMat &getContoursON() const + inline const UMat &getContoursON() const { return _ParvoRetinaFilter.getParvoON(); } - inline const cv::ocl::oclMat &getContoursOFF() const + inline const UMat &getContoursOFF() const { return _ParvoRetinaFilter.getParvoOFF(); } @@ -512,41 +513,41 @@ public: _useMagnoOutput = useMagnoOutput; } - inline const cv::ocl::oclMat &getMovingContours() const + inline const UMat &getMovingContours() const { return _MagnoRetinaFilter.getOutput(); } - inline const cv::ocl::oclMat &getMovingContoursSaturated() const + inline const UMat &getMovingContoursSaturated() const { return _MagnoRetinaFilter.getMagnoYsaturated(); } - inline const cv::ocl::oclMat &getMovingContoursON() const + inline const UMat &getMovingContoursON() const { return _MagnoRetinaFilter.getMagnoON(); } - inline const cv::ocl::oclMat &getMovingContoursOFF() const + inline const UMat &getMovingContoursOFF() const { return _MagnoRetinaFilter.getMagnoOFF(); } - inline const cv::ocl::oclMat &getRetinaParvoMagnoMappedOutput() const + inline const UMat &getRetinaParvoMagnoMappedOutput() const { return _retinaParvoMagnoMappedFrame; } - inline const cv::ocl::oclMat &getParvoContoursChannel() const + inline const UMat &getParvoContoursChannel() const { return _colorEngine.getLuminance(); } - inline const cv::ocl::oclMat &getParvoChrominance() const + inline const UMat &getParvoChrominance() const { return _colorEngine.getChrominance(); } - inline const cv::ocl::oclMat &getColorOutput() const + inline const UMat &getColorOutput() const { return _colorEngine.getDemultiplexedColorFrame(); } @@ -609,7 +610,7 @@ private: unsigned int _ellapsedFramesSinceLastReset; unsigned int _globalTemporalConstant; - cv::ocl::oclMat _retinaParvoMagnoMappedFrame; + UMat _retinaParvoMagnoMappedFrame; BasicRetinaFilter _photoreceptorsPrefilter; ParvoRetinaFilter _ParvoRetinaFilter; MagnoRetinaFilter _MagnoRetinaFilter; @@ -623,12 +624,60 @@ private: void _setInitPeriodCount(); void _processRetinaParvoMagnoMapping(); - void _runGrayToneMapping(const cv::ocl::oclMat &grayImageInput, cv::ocl::oclMat &grayImageOutput , const float PhotoreceptorsCompression = 0.6, const float ganglionCellsCompression = 0.6); + void _runGrayToneMapping(const UMat &grayImageInput, UMat &grayImageOutput , const float PhotoreceptorsCompression = 0.6, const float ganglionCellsCompression = 0.6); +}; + +class RetinaOCLImpl : public Retina +{ +public: + RetinaOCLImpl(Size getInputSize); + RetinaOCLImpl(Size getInputSize, const bool colorMode, int colorSamplingMethod = RETINA_COLOR_BAYER, const bool useRetinaLogSampling = false, const double reductionFactor = 1.0, const double samplingStrenght = 10.0); + virtual ~RetinaOCLImpl(); + + Size getInputSize(); + Size getOutputSize(); + + void setup(String retinaParameterFile = "", const bool applyDefaultSetupOnFailure = true); + void setup(cv::FileStorage &fs, const bool applyDefaultSetupOnFailure = true); + void setup(RetinaParameters newParameters); + + RetinaParameters getParameters(); + + const String printSetup(); + virtual void write(String fs) const; + virtual void write(FileStorage& fs) const; + + void setupOPLandIPLParvoChannel(const bool colorMode = true, const bool normaliseOutput = true, const float photoreceptorsLocalAdaptationSensitivity = 0.7, const float photoreceptorsTemporalConstant = 0.5, const float photoreceptorsSpatialConstant = 0.53, const float horizontalCellsGain = 0, const float HcellsTemporalConstant = 1, const float HcellsSpatialConstant = 7, const float ganglionCellsSensitivity = 0.7); + void setupIPLMagnoChannel(const bool normaliseOutput = true, const float parasolCells_beta = 0, const float parasolCells_tau = 0, const float parasolCells_k = 7, const float amacrinCellsTemporalCutFrequency = 1.2, const float V0CompressionParameter = 0.95, const float localAdaptintegration_tau = 0, const float localAdaptintegration_k = 7); + + void run(InputArray inputImage); + void getParvo(OutputArray retinaOutput_parvo); + void getMagno(OutputArray retinaOutput_magno); + + void setColorSaturation(const bool saturateColors = true, const float colorSaturationValue = 4.0); + void clearBuffers(); + void activateMovingContoursProcessing(const bool activate); + void activateContoursProcessing(const bool activate); + + // unimplemented interfaces: + void applyFastToneMapping(InputArray /*inputImage*/, OutputArray /*outputToneMappedImage*/); + void getParvoRAW(OutputArray /*retinaOutput_parvo*/); + void getMagnoRAW(OutputArray /*retinaOutput_magno*/); + const Mat getMagnoRAW() const; + const Mat getParvoRAW() const; + +protected: + RetinaParameters _retinaParameters; + UMat _inputBuffer; + RetinaFilter* _retinaFilter; + bool convertToColorPlanes(const UMat& input, UMat &output); + void convertToInterleaved(const UMat& input, bool colorMode, UMat &output); + void _init(const Size getInputSize, const bool colorMode, int colorSamplingMethod = RETINA_COLOR_BAYER, const bool useRetinaLogSampling = false, const double reductionFactor = 1.0, const double samplingStrenght = 10.0); }; } /* namespace ocl */ } /* namespace bioinspired */ } /* namespace cv */ -#endif /* HAVE_OPENCV_OCL */ +#endif /* HAVE_OPENCL */ #endif /* __OCL_RETINA_HPP__ */ diff --git a/modules/bioinspired/test/test_retina_ocl.cpp b/modules/bioinspired/test/test_retina_ocl.cpp index bfccdd557..8f3067e30 100644 --- a/modules/bioinspired/test/test_retina_ocl.cpp +++ b/modules/bioinspired/test/test_retina_ocl.cpp @@ -44,87 +44,39 @@ //M*/ #include "test_precomp.hpp" -#include "opencv2/opencv_modules.hpp" -#include "opencv2/bioinspired.hpp" -#include "opencv2/imgproc.hpp" -#include "opencv2/highgui.hpp" +#include "opencv2/ts/ocl_test.hpp" -#include "opencv2/core/ocl.hpp" // cv::ocl::haveOpenCL +#ifdef HAVE_OPENCL -#if defined(HAVE_OPENCV_OCL) - -#include "opencv2/ocl.hpp" #define RETINA_ITERATIONS 5 -static double checkNear(const cv::Mat &m1, const cv::Mat &m2) -{ - return cv::norm(m1, m2, cv::NORM_INF); -} - -#define PARAM_TEST_CASE(name, ...) struct name : testing::TestWithParam< std::tr1::tuple< __VA_ARGS__ > > -#define GET_PARAM(k) std::tr1::get< k >(GetParam()) - -static int oclInit = false; -static int oclAvailable = false; +namespace cvtest { +namespace ocl { PARAM_TEST_CASE(Retina_OCL, bool, int, bool, double, double) { bool colorMode; int colorSamplingMethod; bool useLogSampling; - double reductionFactor; - double samplingStrength; + float reductionFactor; + float samplingStrength; virtual void SetUp() { colorMode = GET_PARAM(0); colorSamplingMethod = GET_PARAM(1); useLogSampling = GET_PARAM(2); - reductionFactor = GET_PARAM(3); - samplingStrength = GET_PARAM(4); - - if (!oclInit) - { - if (cv::ocl::haveOpenCL()) - { - try - { - const cv::ocl::DeviceInfo& dev = cv::ocl::Context::getContext()->getDeviceInfo(); - std::cout << "Device name:" << dev.deviceName << std::endl; - oclAvailable = true; - } - catch (...) - { - std::cout << "Device name: N/A" << std::endl; - } - } - oclInit = true; - } + reductionFactor = static_cast(GET_PARAM(3)); + samplingStrength = static_cast(GET_PARAM(4)); } }; -TEST_P(Retina_OCL, Accuracy) +OCL_TEST_P(Retina_OCL, Accuracy) { - if (!oclAvailable) - { - std::cout << "SKIP test" << std::endl; - return; - } - - using namespace cv; Mat input = imread(cvtest::TS::ptr()->get_data_path() + "shared/lena.png", colorMode); CV_Assert(!input.empty()); - ocl::oclMat ocl_input(input); - - Ptr ocl_retina = bioinspired::createRetina_OCL( - input.size(), - colorMode, - colorSamplingMethod, - useLogSampling, - reductionFactor, - samplingStrength); - Ptr gold_retina = bioinspired::createRetina( + Ptr retina = bioinspired::createRetina( input.size(), colorMode, colorSamplingMethod, @@ -134,31 +86,35 @@ TEST_P(Retina_OCL, Accuracy) Mat gold_parvo; Mat gold_magno; - ocl::oclMat ocl_parvo; - ocl::oclMat ocl_magno; + UMat ocl_parvo; + UMat ocl_magno; for(int i = 0; i < RETINA_ITERATIONS; i ++) { - ocl_retina->run(ocl_input); - gold_retina->run(input); - - gold_retina->getParvo(gold_parvo); - gold_retina->getMagno(gold_magno); + OCL_OFF(retina->run(input)); + OCL_OFF(retina->getParvo(gold_parvo)); + OCL_OFF(retina->getMagno(gold_magno)); + OCL_OFF(retina->clearBuffers()); - ocl_retina->getParvo(ocl_parvo); - ocl_retina->getMagno(ocl_magno); + OCL_ON(retina->run(input)); + OCL_ON(retina->getParvo(ocl_parvo)); + OCL_ON(retina->getMagno(ocl_magno)); + OCL_ON(retina->clearBuffers()); - int eps = colorMode ? 2 : 1; + int eps = 1; - EXPECT_LE(checkNear(gold_parvo, (Mat)ocl_parvo), eps); - EXPECT_LE(checkNear(gold_magno, (Mat)ocl_magno), eps); + EXPECT_MAT_NEAR(gold_parvo, ocl_parvo, eps); + EXPECT_MAT_NEAR(gold_magno, ocl_magno, eps); } } -INSTANTIATE_TEST_CASE_P(Contrib, Retina_OCL, testing::Combine( +OCL_INSTANTIATE_TEST_CASE_P(Contrib, Retina_OCL, testing::Combine( testing::Bool(), testing::Values((int)cv::bioinspired::RETINA_COLOR_BAYER), testing::Values(false/*,true*/), testing::Values(1.0, 0.5), testing::Values(10.0, 5.0))); -#endif + +} } // namespace cvtest::ocl + +#endif // HAVE_OPENCL