Merge pull request #21426 from alalek:dnn_simd_unaligned_weights_fix

pull/21430/head^2
Alexander Alekhin 3 years ago
commit 5e327af327
  1. 5
      modules/dnn/src/layers/convolution_layer.cpp
  2. 2
      modules/dnn/src/layers/layers_common.simd.hpp

@ -347,7 +347,9 @@ public:
if (!blobs.empty())
{
Mat wm = blobs[0].reshape(1, numOutput);
if( wm.step1() % VEC_ALIGN != 0 )
if ((wm.step1() % VEC_ALIGN != 0) ||
!isAligned<VEC_ALIGN * sizeof(float)>(wm.data)
)
{
int newcols = (int)alignSize(wm.step1(), VEC_ALIGN);
Mat wm_buffer = Mat(numOutput, newcols, wm.type());
@ -1299,7 +1301,6 @@ public:
}
}
}
// now compute dot product of the weights
// and im2row-transformed part of the tensor
#if CV_TRY_AVX512_SKX

@ -81,6 +81,8 @@ void fastConv( const float* weights, size_t wstep, const float* bias,
int blockSize, int vecsize, int vecsize_aligned,
const float* relu, bool initOutput )
{
CV_Assert(isAligned<32>(weights));
int outCn = outShape[1];
size_t outPlaneSize = outShape[2]*outShape[3];
float r0 = 1.f, r1 = 1.f, r2 = 1.f;

Loading…
Cancel
Save