Merge pull request #20794 from alalek:dnn_ocl_fix_conv_dwconv_workgroup

pull/20795/head
Alexander Alekhin 3 years ago
commit 327b98eb13
  1. 15
      modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp
  2. 11
      modules/dnn/src/opencl/conv_layer_spatial.cl

@ -1034,12 +1034,15 @@ bool OCL4DNNConvSpatial<float>::convolve(const UMat &bottom, UMat &top,
kernel.set(argIdx++, (uint16_t)output_w_); kernel.set(argIdx++, (uint16_t)output_w_);
kernel.set(argIdx++, (uint16_t)output_h_); kernel.set(argIdx++, (uint16_t)output_h_);
size_t global_size[3]; size_t wgs = kernel.workGroupSize();
global_size[0] = output_w_; if (!wgs)
global_size[1] = output_h_; {
global_size[2] = num_output_ * num_; CV_LOG_ERROR(NULL, "DNN/OpenCL: Can't query workGroupSize of DWCONV kernel");
return false;
if (!kernel.run_(3, global_size, NULL, false)) }
size_t lws[1] = { wgs };
size_t gws[1] = { roundUp((size_t)output_w_ * output_h_ * num_output_ * num_, (unsigned)lws[0]) };
if (!kernel.run_(1, gws, lws, false))
{ {
CV_LOG_ERROR(NULL, "DNN/OpenCL: DWCONV kernel run failed"); CV_LOG_ERROR(NULL, "DNN/OpenCL: DWCONV kernel run failed");
return false; return false;

@ -1850,10 +1850,13 @@ __kernel void DWCONV(
const ushort output_width, const ushort output_width,
const ushort output_height) { const ushort output_height) {
__global Dtype* convolved_image = convolved_image_base + convolved_image_offset; __global Dtype* convolved_image = convolved_image_base + convolved_image_offset;
const int outputX = get_global_id(0); const int out_idx = get_global_id(0); // 1D task layout: [output_width * output_height * OUTPUT_Z]
const int outputY = get_global_id(1); const int plane_size = output_width * output_height;
const int outputZ = get_global_id(2); const int out_plane_idx = out_idx % plane_size;
if(outputX < output_width && outputY < output_height) const int outputZ = out_idx / plane_size;
const int outputY = out_plane_idx / output_width;
const int outputX = out_plane_idx % output_width;
if (outputZ < OUTPUT_Z)
{ {
Dtype sum = 0.; Dtype sum = 0.;

Loading…
Cancel
Save