From a6baedd02c659b6a224dcaff31d977e4d918619f Mon Sep 17 00:00:00 2001
From: Dmitry Kurtaev <dmitry.kurtaev+github@gmail.com>
Date: Mon, 12 Feb 2018 18:55:27 +0300
Subject: [PATCH] Fix deconvolution layer. Add batch norm layer with
 mean-variance normalization from TensorFlow.

---
 modules/dnn/src/layers/convolution_layer.cpp |  37 ++++++-
 modules/dnn/src/layers/mvn_layer.cpp         |   8 ++
 modules/dnn/src/tensorflow/tf_importer.cpp   | 103 ++++++++++++++++---
 modules/dnn/test/test_tf_importer.cpp        |   6 ++
 4 files changed, 136 insertions(+), 18 deletions(-)
diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp
index 64c2212f59..2b1ddb4952 100644
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@@ -1127,8 +1127,25 @@ public:
         int inpH = inputs[0][2];
         int inpW = inputs[0][3];
 
-        int outH = stride.height * (inpH - 1) + kernel.height - 2 * pad.height + adjustPad.height;
-        int outW = stride.width * (inpW - 1) + kernel.width - 2 * pad.width + adjustPad.width;
+        int outH = -1, outW = -1;
+        if (padMode.empty())
+        {
+            outH = stride.height * (inpH - 1) + kernel.height - 2 * pad.height + adjustPad.height;
+            outW = stride.width * (inpW - 1) + kernel.width - 2 * pad.width + adjustPad.width;
+        }
+        else if (padMode == "VALID")
+        {
+            outH = stride.height * (inpH - 1) + kernel.height + adjustPad.height;
+            outW = stride.width * (inpW - 1) + kernel.width + adjustPad.width;
+        }
+        else if (padMode == "SAME")
+        {
+            outH = stride.height * (inpH - 1) + 1 + adjustPad.height;
+            outW = stride.width * (inpW - 1) + 1 + adjustPad.width;
+        }
+        else
+            CV_Error(Error::StsError, "Unsupported padding mode " + padMode);
+
         int outCn = numOutput;
 
         CV_Assert(outCn % blobs[0].size[1] == 0);
@@ -1150,6 +1167,14 @@ public:
         return false;
     }
 
+    void finalize(const std::vector<Mat*> &inputs, std::vector<Mat> &outputs)
+    {
+        BaseConvolutionLayerImpl::finalize(inputs, outputs);
+        getConvPoolPaddings(Size(outputs[0].size[3], outputs[0].size[2]),
+                            Size(inputs[0]->size[3], inputs[0]->size[2]),
+                            kernel, stride, padMode, dilation, pad);
+    }
+
     class MatMulInvoker : public ParallelLoopBody
     {
     public:
@@ -1316,6 +1341,7 @@ public:
                         int kernel_h, int kernel_w,
                         int pad_h, int pad_w,
                         int stride_h, int stride_w,
+                        int height_col, int width_col,
                         float* data_im,
                         const float* biasvec,
                         bool is1x1)
@@ -1329,8 +1355,8 @@ public:
             t.kernel_h = kernel_h; t.kernel_w = kernel_w;
             t.pad_h = pad_h; t.pad_w = pad_w;
             t.stride_h = stride_h; t.stride_w = stride_w;
-            t.height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
-            t.width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
+            t.height_col = height_col;
+            t.width_col = width_col;
             t.nstripes = nstripes;
             t.is1x1 = is1x1;
             t.biasvec = biasvec;
@@ -1520,6 +1546,7 @@ public:
             const Mat& inp = *inputs[ii];
             Mat& out = outputs[ii];
             int numImg = inp.size[0];
+            int inpH = inp.size[2], inpW = inp.size[3];
             int outH = out.size[2], outW = out.size[3];
 
             Mat convBlob = inputs[ii]->reshape(1, numImg*inpCn);
@@ -1542,7 +1569,7 @@ public:
 
                     Col2ImInvoker::run(colMat.ptr<float>(), outGroupCn, outH, outW,
                                        kernel.height, kernel.width, pad.height, pad.width,
-                                       stride.height, stride.width, dstMat.ptr<float>(),
+                                       stride.height, stride.width, inpH, inpW, dstMat.ptr<float>(),
                                        curBiasMat.ptr<float>(), is1x1flag);
                 }
             }
diff --git a/modules/dnn/src/layers/mvn_layer.cpp b/modules/dnn/src/layers/mvn_layer.cpp
index c911b741b4..a286fec5bb 100644
--- a/modules/dnn/src/layers/mvn_layer.cpp
+++ b/modules/dnn/src/layers/mvn_layer.cpp
@@ -273,6 +273,14 @@ public:
             int i, newRows = 1;
             for( i = 0; i < splitDim; i++ )
                 newRows *= inpBlob.size[i];
+
+            if (inpBlob.total() == newRows)
+            {
+                // MVN is applied to single values at an every row.
+                outBlob.setTo(0);
+                return;
+            }
+
             Mat inpMat = inpBlob.reshape(1, newRows);
             Mat outMat = outBlob.reshape(1, newRows);
 
diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp
index ccb028ba1c..f6103f0619 100644
--- a/modules/dnn/src/tensorflow/tf_importer.cpp
+++ b/modules/dnn/src/tensorflow/tf_importer.cpp
@@ -1160,8 +1160,35 @@ void TFImporter::populateNet(Net dstNet)
                 int id;
                 if (scaleMat.total() == 1)  // is a scalar.
                 {
-                    layerParams.set("scale", scaleMat.at<float>(0));
-                    id = dstNet.addLayer(name, "Power", layerParams);
+                    // Try to match with a LeakyRelu:
+                    // node {
+                    //   name: "LeakyRelu/mul"
+                    //   op: "Mul"
+                    //   input: "LeakyRelu/alpha"
+                    //   input: "input"
+                    // }
+                    // node {
+                    //   name: "LeakyRelu/Maximum"
+                    //   op: "Maximum"
+                    //   input: "LeakyRelu/mul"
+                    //   input: "input"
+                    // }
+                    StrIntVector next_layers = getNextLayers(net, name, "Maximum");
+                    if (!next_layers.empty())
+                    {
+                        int maximumLayerIdx = next_layers[0].second;
+                        ExcludeLayer(net, maximumLayerIdx, 0, false);
+                        layers_to_ignore.insert(next_layers[0].first);
+
+                        layerParams.set("negative_slope", scaleMat.at<float>(0));
+                        id = dstNet.addLayer(name, "ReLU", layerParams);
+                    }
+                    else
+                    {
+                        // Just a multiplication.
+                        layerParams.set("scale", scaleMat.at<float>(0));
+                        id = dstNet.addLayer(name, "Power", layerParams);
+                    }
                 }
                 else  // is a vector
                 {
@@ -1241,16 +1268,37 @@ void TFImporter::populateNet(Net dstNet)
             if (layer.input_size() != 5)
                 CV_Error(Error::StsNotImplemented,
                          "Expected gamma, beta, mean and std");
+            Pin inpId = parsePin(layer.input(0));
+
+            bool isTraining = hasLayerAttr(layer, "is_training") && getLayerAttr(layer, "is_training").b();
 
             layerParams.blobs.resize(4);
-            // gamma
-            blobFromTensor(getConstBlob(layer, value_id, 1), layerParams.blobs[2]);
-            // beta
-            blobFromTensor(getConstBlob(layer, value_id, 2), layerParams.blobs[3]);
-            // mean
-            blobFromTensor(getConstBlob(layer, value_id, 3), layerParams.blobs[0]);
-            // std
-            blobFromTensor(getConstBlob(layer, value_id, 4), layerParams.blobs[1]);
+            Mat gamma, beta, mean, std;
+            blobFromTensor(getConstBlob(layer, value_id, 1), gamma);
+            blobFromTensor(getConstBlob(layer, value_id, 2), beta);
+            if (isTraining)
+            {
+                mean = Mat::zeros(1, beta.total(), CV_32F);
+                std = Mat::ones(1, beta.total(), CV_32F);
+
+                // Add an extra layer: Mean-Variance normalization
+                LayerParams mvnParams;
+                std::string mvnName = name + "/MVN";
+                CV_Assert(layer_id.find(mvnName) == layer_id.end());
+                int mvnId = dstNet.addLayer(mvnName, "MVN", mvnParams);
+                layer_id[mvnName] = mvnId;
+                connect(layer_id, dstNet, inpId, mvnId, 0);
+                inpId = Pin(mvnName);
+            }
+            else
+            {
+                blobFromTensor(getConstBlob(layer, value_id, 3), mean);
+                blobFromTensor(getConstBlob(layer, value_id, 4), std);
+            }
+            layerParams.blobs[0] = mean;
+            layerParams.blobs[1] = std;
+            layerParams.blobs[2] = gamma;
+            layerParams.blobs[3] = beta;
 
             if (hasLayerAttr(layer, "epsilon"))
                 layerParams.set("eps", getLayerAttr(layer, "epsilon").f());
@@ -1262,7 +1310,7 @@ void TFImporter::populateNet(Net dstNet)
             layer_id[name] = id;
 
             // one input only
-            connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
+            connect(layer_id, dstNet, inpId, id, 0);
         }
         else if (type == "Conv2DBackpropInput")
         {
@@ -1293,13 +1341,42 @@ void TFImporter::populateNet(Net dstNet)
             kernelFromTensor(getConstBlob(layer, value_id, 1), layerParams.blobs[0]);
 
             const int* kshape = layerParams.blobs[0].size.p;
-            layerParams.set("kernel_h", kshape[2]);
-            layerParams.set("kernel_w", kshape[3]);
+            const int kernelH = kshape[2];
+            const int kernelW = kshape[3];
+            layerParams.set("kernel_h", kernelH);
+            layerParams.set("kernel_w", kernelW);
             layerParams.set("num_output", kshape[1]);
 
             setStrides(layerParams, layer);
             setPadding(layerParams, layer);
 
+            // For convolution layer, output shape computes as
+            // o = 1 + (i - k + 2*p) / s
+            // i - input size, o - output size, k - kernel size, p - pad, s - stride
+            // In TensorFlow, p == 0 is padMode == 'VALID' or p == (k - 1) / 2
+            // considering that k is odd.
+            // SAME:  o = 1 + (i - 1) / s
+            // VALID: o = 1 + i / s
+            // Deconvolution's layer output shape computes as
+            // SAME:  o = 1 + (i - 1)*s
+            // VALID: o = (i - 1)*s
+            // If output_shape differs from formulas above then adjust padding is applied.
+
+            const int strideY = layerParams.get<int>("stride_h");
+            const int strideX = layerParams.get<int>("stride_w");
+            Mat outShape = getTensorContent(getConstBlob(layer, value_id, 0));
+            const int outH = outShape.at<int>(2);
+            const int outW = outShape.at<int>(1);
+            if (layerParams.get<String>("pad_mode") == "SAME")
+            {
+                layerParams.set("adj_w", (outW - 1) % strideX);
+                layerParams.set("adj_h", (outH - 1) % strideY);
+            }
+            else if (layerParams.get<String>("pad_mode") == "VALID")
+            {
+                layerParams.set("adj_w", (outW - kernelW) % strideX);
+                layerParams.set("adj_h", (outH - kernelH) % strideY);
+            }
             int id = dstNet.addLayer(name, "Deconvolution", layerParams);
             layer_id[name] = id;
 
diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp
index 9cebd23823..1210d12e93 100644
--- a/modules/dnn/test/test_tf_importer.cpp
+++ b/modules/dnn/test/test_tf_importer.cpp
@@ -150,6 +150,8 @@ TEST(Test_TensorFlow, batch_norm)
     runTensorFlowNet("batch_norm");
     runTensorFlowNet("fused_batch_norm");
     runTensorFlowNet("batch_norm_text", DNN_TARGET_CPU, true);
+    runTensorFlowNet("mvn_batch_norm");
+    runTensorFlowNet("mvn_batch_norm_1x1");
 }
 
 OCL_TEST(Test_TensorFlow, batch_norm)
@@ -170,6 +172,10 @@ TEST(Test_TensorFlow, pooling)
 TEST(Test_TensorFlow, deconvolution)
 {
     runTensorFlowNet("deconvolution");
+    runTensorFlowNet("deconvolution_same");
+    runTensorFlowNet("deconvolution_stride_2_same");
+    runTensorFlowNet("deconvolution_adj_pad_valid");
+    runTensorFlowNet("deconvolution_adj_pad_same");
 }
 
 OCL_TEST(Test_TensorFlow, deconvolution)