From c3100eeb19082f65b45e260e0b7006dc892611aa Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Wed, 13 Aug 2014 12:03:06 +0400 Subject: [PATCH] Fixed buffer initialization in reduce kernel. Enabled OCL version of reduce for SUM, MAX, MIN modes. --- modules/core/src/matrix.cpp | 3 --- modules/core/src/opencl/reduce2.cl | 20 ++++++++++++++++---- modules/core/test/ocl/test_arithm.cpp | 2 +- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index af0fe2219b..c475bce731 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -3462,9 +3462,6 @@ static bool ocl_reduce(InputArray _src, OutputArray _dst, if (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F)) return false; - if ((op == CV_REDUCE_SUM && sdepth == CV_32F) || op == CV_REDUCE_MIN || op == CV_REDUCE_MAX) - return false; - if (op == CV_REDUCE_AVG) { if (sdepth < CV_32S && ddepth < CV_32S) diff --git a/modules/core/src/opencl/reduce2.cl b/modules/core/src/opencl/reduce2.cl index 457378cc13..645d69867b 100644 --- a/modules/core/src/opencl/reduce2.cl +++ b/modules/core/src/opencl/reduce2.cl @@ -108,7 +108,10 @@ __kernel void reduce_horz_opt(__global const uchar * srcptr, int src_step, int s int src_index = mad24(y, src_step, mad24(x, (int)sizeof(srcT) * cn, src_offset)); __global const srcT * src = (__global const srcT *)(srcptr + src_index); - bufT tmp[cn] = { INIT_VALUE }; + bufT tmp[cn]; + #pragma unroll + for (int c = 0; c < cn; ++c) + tmp[c] = INIT_VALUE; int src_step_mul = BUF_COLS * cn; for (int idx = x; idx < cols; idx += BUF_COLS, src += src_step_mul) @@ -140,7 +143,10 @@ __kernel void reduce_horz_opt(__global const uchar * srcptr, int src_step, int s int dst_index = mad24(y, dst_step, dst_offset); __global dstT * dst = (__global dstT *)(dstptr + dst_index); - bufT tmp[cn] = { INIT_VALUE }; + bufT tmp[cn]; + #pragma unroll + for (int c = 0; c < cn; ++c) + tmp[c] = INIT_VALUE; #pragma unroll for (int xin = 0; xin < BUF_COLS / 2; xin ++) @@ -179,7 +185,10 @@ __kernel void reduce(__global const uchar * srcptr, int src_step, int src_offset int dst_index = mad24(x, (int)sizeof(dstT0) * cn, dst_offset); __global dstT0 * dst = (__global dstT0 *)(dstptr + dst_index); - dstT tmp[cn] = { INIT_VALUE }; + dstT tmp[cn]; + #pragma unroll + for (int c = 0; c < cn; ++c) + tmp[c] = INIT_VALUE; for (int y = 0; y < rows; ++y, src_index += src_step) { @@ -209,7 +218,10 @@ __kernel void reduce(__global const uchar * srcptr, int src_step, int src_offset __global const srcT * src = (__global const srcT *)(srcptr + src_index); __global dstT * dst = (__global dstT *)(dstptr + dst_index); - dstT tmp[cn] = { INIT_VALUE }; + dstT tmp[cn]; + #pragma unroll + for (int c = 0; c < cn; ++c) + tmp[c] = INIT_VALUE; for (int x = 0; x < cols; ++x, src += cn) { diff --git a/modules/core/test/ocl/test_arithm.cpp b/modules/core/test/ocl/test_arithm.cpp index 3af01f3d50..1dd17f948f 100644 --- a/modules/core/test/ocl/test_arithm.cpp +++ b/modules/core/test/ocl/test_arithm.cpp @@ -1704,7 +1704,7 @@ OCL_TEST_P(ReduceSum, Mat) OCL_OFF(cv::reduce(src_roi, dst_roi, dim, CV_REDUCE_SUM, dtype)); OCL_ON(cv::reduce(usrc_roi, udst_roi, dim, CV_REDUCE_SUM, dtype)); - double eps = ddepth <= CV_32S ? 1 : 1e-4; + double eps = ddepth <= CV_32S ? 1 : 7e-4; OCL_EXPECT_MATS_NEAR(dst, eps); } }