diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 1236ff5783..f5c158453d 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -347,7 +347,9 @@ public: if (!blobs.empty()) { Mat wm = blobs[0].reshape(1, numOutput); - if( wm.step1() % VEC_ALIGN != 0 ) + if ((wm.step1() % VEC_ALIGN != 0) || + !isAligned(wm.data) + ) { int newcols = (int)alignSize(wm.step1(), VEC_ALIGN); Mat wm_buffer = Mat(numOutput, newcols, wm.type()); @@ -1299,7 +1301,6 @@ public: } } } - // now compute dot product of the weights // and im2row-transformed part of the tensor #if CV_TRY_AVX512_SKX diff --git a/modules/dnn/src/layers/layers_common.simd.hpp b/modules/dnn/src/layers/layers_common.simd.hpp index accc644676..049d1f8b02 100644 --- a/modules/dnn/src/layers/layers_common.simd.hpp +++ b/modules/dnn/src/layers/layers_common.simd.hpp @@ -81,6 +81,8 @@ void fastConv( const float* weights, size_t wstep, const float* bias, int blockSize, int vecsize, int vecsize_aligned, const float* relu, bool initOutput ) { + CV_Assert(isAligned<32>(weights)); + int outCn = outShape[1]; size_t outPlaneSize = outShape[2]*outShape[3]; float r0 = 1.f, r1 = 1.f, r2 = 1.f;