diff --git a/modules/dnn/src/int8layers/convolution_layer.cpp b/modules/dnn/src/int8layers/convolution_layer.cpp index 05749885c0..ea29610222 100644 --- a/modules/dnn/src/int8layers/convolution_layer.cpp +++ b/modules/dnn/src/int8layers/convolution_layer.cpp @@ -159,7 +159,7 @@ public: enum { VEC_ALIGN = 32, DFT_TYPE = CV_8S }; Mat weightsMat; std::vector biasvec; - Mat outputMultiplier; + std::vector outputMultiplier; Mat activationLUT; Ptr activ; @@ -249,10 +249,14 @@ public: Mat biasMat = blobs[1]; biasvec.resize(numOutput+2); + + Mat outMult = blobs[2]; + outputMultiplier.resize(numOutput+2); for(int i = 0; i < numOutput; i++ ) + { biasvec[i] = biasMat.at(i); - - outputMultiplier = blobs[2]; + outputMultiplier[i] = outMult.at(i); + } } bool setActivation(const Ptr& layer) CV_OVERRIDE @@ -283,16 +287,17 @@ public: for (int i = 0; i < outCn; ++i) { - float off = outputMultiplier.at(i) * output_sc; + float off = outputMultiplier[i] * output_sc; if (!w.empty()) off *= w.at(i); if (!b.empty()) biasvec[i] += (int)std::round(b.at(i)/off); - outputMultiplier.at(i) = off/new_sc; + outputMultiplier[i] = off/new_sc; } biasvec[outCn] = biasvec[outCn+1] = biasvec[outCn-1]; + outputMultiplier[outCn] = outputMultiplier[outCn+1] = outputMultiplier[outCn-1]; } class ParallelConv : public cv::ParallelLoopBody @@ -315,7 +320,7 @@ public: bool useAVX512; int blk_size_cn; int inpZp, outZp; - const float* multiplier; + const std::vector* multiplier; ParallelConv() : input_(0), weights_(0), output_(0), ngroups_(0), nstripes_(0), @@ -323,7 +328,7 @@ public: , blk_size_cn(0), inpZp(0), outZp(0), multiplier(0) {} - static void run( const Mat& input, Mat& output, const Mat& weights, const Mat& multipliers, + static void run( const Mat& input, Mat& output, const Mat& weights, const std::vector& multipliers, const std::vector& biasvec, const Mat& activLUT, const std::vector& kernel_size, const std::vector& strides, const std::vector& pads_begin, const std::vector& pads_end, @@ -392,7 +397,7 @@ public: p.inpZp = inp_Zp; p.outZp = out_Zp; - p.multiplier = multipliers.ptr(0); + p.multiplier = &multipliers; p.ofstab_.resize(karea * ncn); int* ofstab = &p.ofstab_[0]; @@ -501,6 +506,7 @@ public: const int8_t* wptr_orig_ = weights_->ptr(); size_t wstep = weights_->step1(); const int* biasptr_ = &biasvec_->at(0); + const float* multptr_ = &multiplier->at(0); const int* lutptr_ = !activLUT_->empty() ? activLUT_->ptr() : 0; int* data_out0_ = output_->ptr(); AutoBuffer rowbuf0_; @@ -539,7 +545,7 @@ public: int startOutCn = (subsampleIdx % ngroups)*outCn; const int8_t* wptr_orig = wptr_orig_ + wstep*startOutCn; const int* biasptr = biasptr_ + startOutCn; - const float* multptr = multiplier + startOutCn; + const float* multptr = multptr_ + startOutCn; for( int cn0 = 0; cn0 < inpCn; cn0 += blk_size_cn ) {