diff --git a/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp b/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp index 5eee1da4a0..a1164273ac 100644 --- a/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp +++ b/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp @@ -1081,9 +1081,16 @@ bool OCL4DNNConvSpatial::convolve(const UMat &bottom, UMat &top, kernel.set(argIdx++, (uint16_t)output_h_); kernel.set(argIdx++, (uint16_t)pad_w_); kernel.set(argIdx++, (uint16_t)pad_h_); - if (!kernel.run_(3, config->global_work_size, - (config->use_null_local) ? NULL : config->local_work_size, - false)) + + size_t wgs = kernel.workGroupSize(); + if (!wgs) + { + CV_LOG_ERROR(NULL, "DNN/OpenCL: Can't query workGroupSize of Basic kernel"); + return false; + } + size_t lws[1] = { wgs }; + size_t gws[1] = { roundUp((size_t)output_w_ * output_h_ * M_, (unsigned)lws[0]) }; + if (!kernel.run_(1, gws, lws, false)) { CV_LOG_ERROR(NULL, "DNN/OpenCL: Basic kernel run failed"); return false; diff --git a/modules/dnn/src/opencl/conv_layer_spatial.cl b/modules/dnn/src/opencl/conv_layer_spatial.cl index e7bbacd4c4..455f0ed7ea 100644 --- a/modules/dnn/src/opencl/conv_layer_spatial.cl +++ b/modules/dnn/src/opencl/conv_layer_spatial.cl @@ -158,10 +158,14 @@ __kernel void ConvolveBasic( ) { __global Dtype* convolved_image = convolved_image_base + convolved_image_base_offset; - const int outputX = get_global_id(0); - const int outputY = get_global_id(1); - const int kernelNum = get_global_id(2) * ZPAR; - if (outputX < output_width && outputY < output_height) + const int out_idx = get_global_id(0); // 1D task layout: [output_width * output_height * OUTPUT_Z] + const int plane_size = output_width * output_height; + const int out_plane_idx = out_idx % plane_size; + const int outputZ = out_idx / plane_size; + const int outputY = out_plane_idx / output_width; + const int outputX = out_plane_idx % output_width; + const int kernelNum = outputZ * ZPAR; + if (kernelNum < OUTPUT_Z) { Dtype sum[ZPAR]; for (int kern = 0; kern < ZPAR; kern++)