Merge pull request #20774 from alalek:dnn_ocl_fix_conv_basic_workgroup

pull/20794/head
Alexander Alekhin 3 years ago
commit 4d587c341b
  1. 13
      modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp
  2. 12
      modules/dnn/src/opencl/conv_layer_spatial.cl

@ -1081,9 +1081,16 @@ bool OCL4DNNConvSpatial<float>::convolve(const UMat &bottom, UMat &top,
kernel.set(argIdx++, (uint16_t)output_h_);
kernel.set(argIdx++, (uint16_t)pad_w_);
kernel.set(argIdx++, (uint16_t)pad_h_);
if (!kernel.run_(3, config->global_work_size,
(config->use_null_local) ? NULL : config->local_work_size,
false))
size_t wgs = kernel.workGroupSize();
if (!wgs)
{
CV_LOG_ERROR(NULL, "DNN/OpenCL: Can't query workGroupSize of Basic kernel");
return false;
}
size_t lws[1] = { wgs };
size_t gws[1] = { roundUp((size_t)output_w_ * output_h_ * M_, (unsigned)lws[0]) };
if (!kernel.run_(1, gws, lws, false))
{
CV_LOG_ERROR(NULL, "DNN/OpenCL: Basic kernel run failed");
return false;

@ -158,10 +158,14 @@ __kernel void ConvolveBasic(
)
{
__global Dtype* convolved_image = convolved_image_base + convolved_image_base_offset;
const int outputX = get_global_id(0);
const int outputY = get_global_id(1);
const int kernelNum = get_global_id(2) * ZPAR;
if (outputX < output_width && outputY < output_height)
const int out_idx = get_global_id(0); // 1D task layout: [output_width * output_height * OUTPUT_Z]
const int plane_size = output_width * output_height;
const int out_plane_idx = out_idx % plane_size;
const int outputZ = out_idx / plane_size;
const int outputY = out_plane_idx / output_width;
const int outputX = out_plane_idx % output_width;
const int kernelNum = outputZ * ZPAR;
if (kernelNum < OUTPUT_Z)
{
Dtype sum[ZPAR];
for (int kern = 0; kern < ZPAR; kern++)

Loading…
Cancel
Save