Merge pull request #20648 from alalek:issue_20615

pull/20655/head
Alexander Alekhin 4 years ago
commit 7967683296
  1. 12
      modules/dnn/src/opencl/conv_spatial_helper.cl

@ -62,8 +62,8 @@ __kernel void TEMPLATE(copyWeightsSwizzled, Dtype)
//Original location //Original location
//Output location //Output location
int outputSublayer = channels / swizzleFactor; //int outputSublayer = channels / swizzleFactor;
int outputSublayerIndex = channels % swizzleFactor; //int outputSublayerIndex = channels % swizzleFactor;
int filter = sX / (kernel_w*kernel_h*channels); int filter = sX / (kernel_w*kernel_h*channels);
int kernel_X = sX % kernel_w; int kernel_X = sX % kernel_w;
@ -73,6 +73,10 @@ __kernel void TEMPLATE(copyWeightsSwizzled, Dtype)
int FP = filter / swizzleFactor; int FP = filter / swizzleFactor;
int F1 = filter % swizzleFactor; int F1 = filter % swizzleFactor;
weightOut[FP*(kernel_w*kernel_h*channels*swizzleFactor) + kernel_C*(kernel_w*kernel_h*swizzleFactor) + kernel_Y*(kernel_w*swizzleFactor) + kernel_X*swizzleFactor + F1] int idxOut = FP*(kernel_w*kernel_h*channels*swizzleFactor) + kernel_C*(kernel_w*kernel_h*swizzleFactor) + kernel_Y*(kernel_w*swizzleFactor) + kernel_X*swizzleFactor + F1;
= weightIn[filter*(kernel_w*kernel_h*channels) + kernel_C*(kernel_w*kernel_h) + kernel_Y*kernel_w + kernel_X]; int idxIn = filter*(kernel_w*kernel_h*channels) + kernel_C*(kernel_w*kernel_h) + kernel_Y*kernel_w + kernel_X;
// idxIn is not valid if (filter >= outputs) - no data for these elements. Output alignment gaps are filled by zeros
Dtype v = (filter < outputs) ? weightIn[idxIn] : (Dtype)0;
weightOut[idxOut] = v;
} }

Loading…
Cancel
Save