Added optimization fuse

6 years ago · 74574dfae4
parent 3132c8ee08
commit 74574dfae4
3 changed files with 86 additions and 21 deletions
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@ -61,6 +61,8 @@ namespace dnn
 class BaseConvolutionLayerImpl : public ConvolutionLayer
 {
 public:
    bool newWeightAndBias;
    std::vector<double> weightsMultipliers;
    BaseConvolutionLayerImpl(const LayerParams &params)
    {
        setParamsFrom(params);
@ -84,6 +86,8 @@ public:
        CV_Assert(numOutput % ngroups == 0);
        CV_Assert(adjustPad.width < stride.width &&
                  adjustPad.height < stride.height);
        newWeightAndBias = false;
    }
    void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
@ -134,6 +138,20 @@ public:
        (dilation.height == 1 && dilation.width == 1);
    }
    virtual bool tryFuse(Ptr<Layer>& top) CV_OVERRIDE
    {
        Mat w, b;
        top->getScaleShift(w, b);
        if (!w.empty() || !b.empty())
        {
            fuseWeights(w, b);
            return true;
        }
        return false;
    }
    virtual void fuseWeights(const Mat& w_, const Mat& b_) = 0;
    virtual void applyHalideScheduler(Ptr<BackendNode>& node,
                                      const std::vector<Mat*> &inputs,
                                      const std::vector<Mat> &outputs,
@ -184,11 +202,9 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
 public:
    enum { VEC_ALIGN = 8, DFT_TYPE = CV_32F };
    Mat weightsMat;
    std::vector<double> weightsMultipliers;
    std::vector<float> biasvec;
    std::vector<float> reluslope;
    Ptr<ActivationLayer> activ;
    bool newWeightAndBias;
    bool fusedBias;
 #ifdef HAVE_OPENCL
@ -200,7 +216,6 @@ public:
 #endif
    ConvolutionLayerImpl(const LayerParams &params) : BaseConvolutionLayerImpl(params)
    {
        newWeightAndBias = false;
        fusedBias = false;
 #ifdef HAVE_OPENCL
        newActiv = false;
@ -346,19 +361,7 @@ public:
        return !activ.empty();
    }
-    virtual bool tryFuse(Ptr<Layer>& top) CV_OVERRIDE
+    void fuseWeights(const Mat& w_, const Mat& b_) CV_OVERRIDE
    {
        Mat w, b;
        top->getScaleShift(w, b);
        if (!w.empty() || !b.empty())
        {
            fuseWeights(w, b);
            return true;
        }
        return false;
    }
    void fuseWeights(const Mat& w_, const Mat& b_)
    {
        // Convolution weights have OIHW data layout. Parameters fusion in case of
        // (conv(I) + b1 ) * w + b2
@ -1238,6 +1241,45 @@ public:
        pad.width = pad_l;
        pad.height = pad_t;
        weightsMultipliers.assign(numOutput, 1.0);
        if (weightsMat.empty())
        {
            transpose(blobs[0].reshape(1, blobs[0].size[0]), weightsMat);
            biasesMat = hasBias() ? blobs[1].reshape(1, numOutput)
                                  : Mat::zeros(numOutput, 1, CV_32F);
        }
    }
    void fuseWeights(const Mat& w_, const Mat& b_) CV_OVERRIDE
    {
        Mat w = w_.total() == 1 ? Mat(1, numOutput, CV_32F, Scalar(w_.at<float>(0))) : w_;
        Mat b = b_.total() == 1 ? Mat(1, numOutput, CV_32F, Scalar(b_.at<float>(0))) : b_;
        CV_Assert_N(!weightsMat.empty(),
                     w.empty() || numOutput == w.total(),
                     b.empty() || numOutput == b.total());
        if (!w.empty())
        {
            transpose(blobs[0].reshape(1, blobs[0].size[0]), weightsMat);
            weightsMat = weightsMat.reshape(1, numOutput);
            for (int i = 0; i < numOutput; ++i)
            {
                double wi = w.at<float>(i);
                weightsMultipliers[i] *= wi;
                cv::multiply(weightsMat.row(i), weightsMultipliers[i], weightsMat.row(i));
                biasesMat.at<float>(i) *= wi;
            }
            weightsMat = weightsMat.reshape(1, weightsMat.total() / blobs[0].size[0]);
        }
        if (!b.empty())
        {
            cv::add(biasesMat, b.reshape(1, numOutput), biasesMat);
        }
        newWeightAndBias = !w.empty() || !b.empty();
    }
    class MatMulInvoker : public ParallelLoopBody
@ -1505,11 +1547,19 @@ public:
        if (umat_weights.empty())
        {
-            transpose(blobs[0].reshape(1, inpCn), umat_weights);
+            if (newWeightAndBias)
-            if (hasBias())
+            {
-                blobs[1].reshape(1, outCn).copyTo(umat_biases);
+                weightsMat.copyTo(umat_weights);
                biasesMat.copyTo(umat_biases);
            }
            else
-                umat_biases = UMat::zeros(outCn, 1, CV_32F);
+            {
                transpose(blobs[0].reshape(1, inpCn), umat_weights);
                if (hasBias())
                    blobs[1].reshape(1, outCn).copyTo(umat_biases);
                else
                    umat_biases = UMat::zeros(outCn, 1, CV_32F);
            }
        }
        String buildopt = format("-DT=%s ", ocl::typeToStr(inputs[0].type()));
--- a/modules/dnn/test/test_backends.cpp
+++ b/modules/dnn/test/test_backends.cpp
@ -305,9 +305,16 @@ TEST_P(DNNTestNetwork, DenseNet_121)
 TEST_P(DNNTestNetwork, FastNeuralStyle_eccv16)
 {
    if (backend == DNN_BACKEND_HALIDE ||
        (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) ||
        (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD))
        throw SkipTestException("");
 #if defined(INF_ENGINE_RELEASE)
 #if INF_ENGINE_RELEASE <= 2018050000
    if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL)
        throw SkipTestException("");
 #endif
 #endif
    Mat img = imread(findDataFile("dnn/googlenet_1.png", false));
    Mat inp = blobFromImage(img, 1.0, Size(320, 240), Scalar(103.939, 116.779, 123.68), false, false);
    // Output image has values in range [-143.526, 148.539].
--- a/modules/dnn/test/test_torch_importer.cpp
+++ b/modules/dnn/test/test_torch_importer.cpp
@ -394,6 +394,14 @@ TEST_P(Test_Torch_nets, ENet_accuracy)
 TEST_P(Test_Torch_nets, FastNeuralStyle_accuracy)
 {
    checkBackend();
 #if defined(INF_ENGINE_RELEASE)
 #if INF_ENGINE_RELEASE <= 2018050000
    if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL)
        throw SkipTestException("");
 #endif
 #endif
    std::string models[] = {"dnn/fast_neural_style_eccv16_starry_night.t7",
                            "dnn/fast_neural_style_instance_norm_feathers.t7"};
    std::string targets[] = {"dnn/lena_starry_night.png", "dnn/lena_feathers.png"};