From 2e68f892257103560349c2451a71c76b668b898e Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Mon, 18 Sep 2017 19:04:46 +0300 Subject: [PATCH] ocl: update kernel global size adjustment Prevents 10000x1 => 10000x8 transformation after getContinuousSize() call --- modules/core/src/ocl.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index 1741d67ed4..8fea1d2b1e 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -2269,7 +2269,7 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[], return false; cl_command_queue qq = getQueue(q); - size_t offset[CV_MAX_DIM] = {0}, globalsize[CV_MAX_DIM] = {1,1,1}; + size_t globalsize[CV_MAX_DIM] = {1,1,1}; size_t total = 1; CV_Assert(_globalsize != 0); for (int i = 0; i < dims; i++) @@ -2278,15 +2278,16 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[], dims == 1 ? 64 : dims == 2 ? (i == 0 ? 256 : 8) : dims == 3 ? (8>>(int)(i>0)) : 1; CV_Assert( val > 0 ); total *= _globalsize[i]; - globalsize[i] = ((_globalsize[i] + val - 1)/val)*val; + if (_globalsize[i] == 1) + val = 1; + globalsize[i] = divUp(_globalsize[i], (unsigned int)val) * val; } - if( total == 0 ) - return true; + CV_Assert(total > 0); if( p->haveTempDstUMats ) sync = true; cl_event asyncEvent = 0; cl_int retval = clEnqueueNDRangeKernel(qq, p->handle, (cl_uint)dims, - offset, globalsize, _localsize, 0, 0, + NULL, globalsize, _localsize, 0, 0, sync ? 0 : &asyncEvent); #if CV_OPENCL_SHOW_RUN_ERRORS if (retval != CL_SUCCESS)