diff --git a/modules/imgproc/src/opencl/threshold.cl b/modules/imgproc/src/opencl/threshold.cl index 6282aa86f8..43f1ea2c96 100644 --- a/modules/imgproc/src/opencl/threshold.cl +++ b/modules/imgproc/src/opencl/threshold.cl @@ -56,26 +56,37 @@ __kernel void threshold(__global const uchar * srcptr, int src_step, int src_off T1 thresh, T1 max_val, T1 min_val) { int gx = get_global_id(0); - int gy = get_global_id(1); + int gy = get_global_id(1) * STRIDE_SIZE; - if (gx < cols && gy < rows) + if (gx < cols) { int src_index = mad24(gy, src_step, mad24(gx, (int)sizeof(T), src_offset)); int dst_index = mad24(gy, dst_step, mad24(gx, (int)sizeof(T), dst_offset)); - T sdata = *(__global const T *)(srcptr + src_index); - __global T * dst = (__global T *)(dstptr + dst_index); + #pragma unroll + for (int i = 0; i < STRIDE_SIZE; i++) + { + if (gy < rows) + { + T sdata = *(__global const T *)(srcptr + src_index); + __global T * dst = (__global T *)(dstptr + dst_index); -#ifdef THRESH_BINARY - dst[0] = sdata > (thresh) ? (T)(max_val) : (T)(0); -#elif defined THRESH_BINARY_INV - dst[0] = sdata > (thresh) ? (T)(0) : (T)(max_val); -#elif defined THRESH_TRUNC - dst[0] = clamp(sdata, (T)min_val, (T)(thresh)); -#elif defined THRESH_TOZERO - dst[0] = sdata > (thresh) ? sdata : (T)(0); -#elif defined THRESH_TOZERO_INV - dst[0] = sdata > (thresh) ? (T)(0) : sdata; -#endif + #ifdef THRESH_BINARY + dst[0] = sdata > (thresh) ? (T)(max_val) : (T)(0); + #elif defined THRESH_BINARY_INV + dst[0] = sdata > (thresh) ? (T)(0) : (T)(max_val); + #elif defined THRESH_TRUNC + dst[0] = clamp(sdata, (T)min_val, (T)(thresh)); + #elif defined THRESH_TOZERO + dst[0] = sdata > (thresh) ? sdata : (T)(0); + #elif defined THRESH_TOZERO_INV + dst[0] = sdata > (thresh) ? (T)(0) : sdata; + #endif + + gy++; + src_index += src_step; + dst_index += dst_step; + } + } } } diff --git a/modules/imgproc/src/thresh.cpp b/modules/imgproc/src/thresh.cpp index b32a4365cf..988fc9e9f6 100644 --- a/modules/imgproc/src/thresh.cpp +++ b/modules/imgproc/src/thresh.cpp @@ -833,9 +833,12 @@ static bool ocl_threshold( InputArray _src, OutputArray _dst, double & thresh, d const char * const thresholdMap[] = { "THRESH_BINARY", "THRESH_BINARY_INV", "THRESH_TRUNC", "THRESH_TOZERO", "THRESH_TOZERO_INV" }; + ocl::Device dev = ocl::Device::getDefault(); + int stride_size = dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU) ? 4 : 1; + ocl::Kernel k("threshold", ocl::imgproc::threshold_oclsrc, - format("-D %s -D T=%s -D T1=%s%s", thresholdMap[thresh_type], - ocl::typeToStr(ktype), ocl::typeToStr(depth), + format("-D %s -D T=%s -D T1=%s -D STRIDE_SIZE=%d%s", thresholdMap[thresh_type], + ocl::typeToStr(ktype), ocl::typeToStr(depth), stride_size, doubleSupport ? " -D DOUBLE_SUPPORT" : "")); if (k.empty()) return false; @@ -856,6 +859,7 @@ static bool ocl_threshold( InputArray _src, OutputArray _dst, double & thresh, d ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(min_val)))); size_t globalsize[2] = { dst.cols * cn / kercn, dst.rows }; + globalsize[1] = (globalsize[1] + stride_size - 1) / stride_size; return k.run(2, globalsize, NULL, false); }