From 74574dfae47711405a126971c87fb142b042f740 Mon Sep 17 00:00:00 2001 From: Alexander Nesterov Date: Fri, 8 Feb 2019 13:12:33 -0100 Subject: [PATCH] Added optimization fuse --- modules/dnn/src/layers/convolution_layer.cpp | 90 +++++++++++++++----- modules/dnn/test/test_backends.cpp | 9 +- modules/dnn/test/test_torch_importer.cpp | 8 ++ 3 files changed, 86 insertions(+), 21 deletions(-) diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 90645d531f..b872130ccd 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -61,6 +61,8 @@ namespace dnn class BaseConvolutionLayerImpl : public ConvolutionLayer { public: + bool newWeightAndBias; + std::vector weightsMultipliers; BaseConvolutionLayerImpl(const LayerParams ¶ms) { setParamsFrom(params); @@ -84,6 +86,8 @@ public: CV_Assert(numOutput % ngroups == 0); CV_Assert(adjustPad.width < stride.width && adjustPad.height < stride.height); + + newWeightAndBias = false; } void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE @@ -134,6 +138,20 @@ public: (dilation.height == 1 && dilation.width == 1); } + virtual bool tryFuse(Ptr& top) CV_OVERRIDE + { + Mat w, b; + top->getScaleShift(w, b); + if (!w.empty() || !b.empty()) + { + fuseWeights(w, b); + return true; + } + return false; + } + + virtual void fuseWeights(const Mat& w_, const Mat& b_) = 0; + virtual void applyHalideScheduler(Ptr& node, const std::vector &inputs, const std::vector &outputs, @@ -184,11 +202,9 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl public: enum { VEC_ALIGN = 8, DFT_TYPE = CV_32F }; Mat weightsMat; - std::vector weightsMultipliers; std::vector biasvec; std::vector reluslope; Ptr activ; - bool newWeightAndBias; bool fusedBias; #ifdef HAVE_OPENCL @@ -200,7 +216,6 @@ public: #endif ConvolutionLayerImpl(const LayerParams ¶ms) : BaseConvolutionLayerImpl(params) { - newWeightAndBias = false; fusedBias = false; #ifdef HAVE_OPENCL newActiv = false; @@ -346,19 +361,7 @@ public: return !activ.empty(); } - virtual bool tryFuse(Ptr& top) CV_OVERRIDE - { - Mat w, b; - top->getScaleShift(w, b); - if (!w.empty() || !b.empty()) - { - fuseWeights(w, b); - return true; - } - return false; - } - - void fuseWeights(const Mat& w_, const Mat& b_) + void fuseWeights(const Mat& w_, const Mat& b_) CV_OVERRIDE { // Convolution weights have OIHW data layout. Parameters fusion in case of // (conv(I) + b1 ) * w + b2 @@ -1238,6 +1241,45 @@ public: pad.width = pad_l; pad.height = pad_t; + + weightsMultipliers.assign(numOutput, 1.0); + if (weightsMat.empty()) + { + transpose(blobs[0].reshape(1, blobs[0].size[0]), weightsMat); + biasesMat = hasBias() ? blobs[1].reshape(1, numOutput) + : Mat::zeros(numOutput, 1, CV_32F); + } + } + + void fuseWeights(const Mat& w_, const Mat& b_) CV_OVERRIDE + { + Mat w = w_.total() == 1 ? Mat(1, numOutput, CV_32F, Scalar(w_.at(0))) : w_; + Mat b = b_.total() == 1 ? Mat(1, numOutput, CV_32F, Scalar(b_.at(0))) : b_; + + CV_Assert_N(!weightsMat.empty(), + w.empty() || numOutput == w.total(), + b.empty() || numOutput == b.total()); + + if (!w.empty()) + { + transpose(blobs[0].reshape(1, blobs[0].size[0]), weightsMat); + weightsMat = weightsMat.reshape(1, numOutput); + for (int i = 0; i < numOutput; ++i) + { + double wi = w.at(i); + weightsMultipliers[i] *= wi; + cv::multiply(weightsMat.row(i), weightsMultipliers[i], weightsMat.row(i)); + biasesMat.at(i) *= wi; + } + weightsMat = weightsMat.reshape(1, weightsMat.total() / blobs[0].size[0]); + } + + if (!b.empty()) + { + cv::add(biasesMat, b.reshape(1, numOutput), biasesMat); + } + + newWeightAndBias = !w.empty() || !b.empty(); } class MatMulInvoker : public ParallelLoopBody @@ -1505,11 +1547,19 @@ public: if (umat_weights.empty()) { - transpose(blobs[0].reshape(1, inpCn), umat_weights); - if (hasBias()) - blobs[1].reshape(1, outCn).copyTo(umat_biases); + if (newWeightAndBias) + { + weightsMat.copyTo(umat_weights); + biasesMat.copyTo(umat_biases); + } else - umat_biases = UMat::zeros(outCn, 1, CV_32F); + { + transpose(blobs[0].reshape(1, inpCn), umat_weights); + if (hasBias()) + blobs[1].reshape(1, outCn).copyTo(umat_biases); + else + umat_biases = UMat::zeros(outCn, 1, CV_32F); + } } String buildopt = format("-DT=%s ", ocl::typeToStr(inputs[0].type())); diff --git a/modules/dnn/test/test_backends.cpp b/modules/dnn/test/test_backends.cpp index 8485bbedad..10f7b02e11 100644 --- a/modules/dnn/test/test_backends.cpp +++ b/modules/dnn/test/test_backends.cpp @@ -305,9 +305,16 @@ TEST_P(DNNTestNetwork, DenseNet_121) TEST_P(DNNTestNetwork, FastNeuralStyle_eccv16) { if (backend == DNN_BACKEND_HALIDE || - (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) || (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)) throw SkipTestException(""); + +#if defined(INF_ENGINE_RELEASE) +#if INF_ENGINE_RELEASE <= 2018050000 + if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL) + throw SkipTestException(""); +#endif +#endif + Mat img = imread(findDataFile("dnn/googlenet_1.png", false)); Mat inp = blobFromImage(img, 1.0, Size(320, 240), Scalar(103.939, 116.779, 123.68), false, false); // Output image has values in range [-143.526, 148.539]. diff --git a/modules/dnn/test/test_torch_importer.cpp b/modules/dnn/test/test_torch_importer.cpp index 4e00b10279..11e6ee49e8 100644 --- a/modules/dnn/test/test_torch_importer.cpp +++ b/modules/dnn/test/test_torch_importer.cpp @@ -394,6 +394,14 @@ TEST_P(Test_Torch_nets, ENet_accuracy) TEST_P(Test_Torch_nets, FastNeuralStyle_accuracy) { checkBackend(); + +#if defined(INF_ENGINE_RELEASE) +#if INF_ENGINE_RELEASE <= 2018050000 + if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL) + throw SkipTestException(""); +#endif +#endif + std::string models[] = {"dnn/fast_neural_style_eccv16_starry_night.t7", "dnn/fast_neural_style_instance_norm_feathers.t7"}; std::string targets[] = {"dnn/lena_starry_night.png", "dnn/lena_feathers.png"};