From a6baedd02c659b6a224dcaff31d977e4d918619f Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Mon, 12 Feb 2018 18:55:27 +0300 Subject: [PATCH] Fix deconvolution layer. Add batch norm layer with mean-variance normalization from TensorFlow. --- modules/dnn/src/layers/convolution_layer.cpp | 37 ++++++- modules/dnn/src/layers/mvn_layer.cpp | 8 ++ modules/dnn/src/tensorflow/tf_importer.cpp | 103 ++++++++++++++++--- modules/dnn/test/test_tf_importer.cpp | 6 ++ 4 files changed, 136 insertions(+), 18 deletions(-) diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 64c2212f59..2b1ddb4952 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -1127,8 +1127,25 @@ public: int inpH = inputs[0][2]; int inpW = inputs[0][3]; - int outH = stride.height * (inpH - 1) + kernel.height - 2 * pad.height + adjustPad.height; - int outW = stride.width * (inpW - 1) + kernel.width - 2 * pad.width + adjustPad.width; + int outH = -1, outW = -1; + if (padMode.empty()) + { + outH = stride.height * (inpH - 1) + kernel.height - 2 * pad.height + adjustPad.height; + outW = stride.width * (inpW - 1) + kernel.width - 2 * pad.width + adjustPad.width; + } + else if (padMode == "VALID") + { + outH = stride.height * (inpH - 1) + kernel.height + adjustPad.height; + outW = stride.width * (inpW - 1) + kernel.width + adjustPad.width; + } + else if (padMode == "SAME") + { + outH = stride.height * (inpH - 1) + 1 + adjustPad.height; + outW = stride.width * (inpW - 1) + 1 + adjustPad.width; + } + else + CV_Error(Error::StsError, "Unsupported padding mode " + padMode); + int outCn = numOutput; CV_Assert(outCn % blobs[0].size[1] == 0); @@ -1150,6 +1167,14 @@ public: return false; } + void finalize(const std::vector &inputs, std::vector &outputs) + { + BaseConvolutionLayerImpl::finalize(inputs, outputs); + getConvPoolPaddings(Size(outputs[0].size[3], outputs[0].size[2]), + Size(inputs[0]->size[3], inputs[0]->size[2]), + kernel, stride, padMode, dilation, pad); + } + class MatMulInvoker : public ParallelLoopBody { public: @@ -1316,6 +1341,7 @@ public: int kernel_h, int kernel_w, int pad_h, int pad_w, int stride_h, int stride_w, + int height_col, int width_col, float* data_im, const float* biasvec, bool is1x1) @@ -1329,8 +1355,8 @@ public: t.kernel_h = kernel_h; t.kernel_w = kernel_w; t.pad_h = pad_h; t.pad_w = pad_w; t.stride_h = stride_h; t.stride_w = stride_w; - t.height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1; - t.width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1; + t.height_col = height_col; + t.width_col = width_col; t.nstripes = nstripes; t.is1x1 = is1x1; t.biasvec = biasvec; @@ -1520,6 +1546,7 @@ public: const Mat& inp = *inputs[ii]; Mat& out = outputs[ii]; int numImg = inp.size[0]; + int inpH = inp.size[2], inpW = inp.size[3]; int outH = out.size[2], outW = out.size[3]; Mat convBlob = inputs[ii]->reshape(1, numImg*inpCn); @@ -1542,7 +1569,7 @@ public: Col2ImInvoker::run(colMat.ptr(), outGroupCn, outH, outW, kernel.height, kernel.width, pad.height, pad.width, - stride.height, stride.width, dstMat.ptr(), + stride.height, stride.width, inpH, inpW, dstMat.ptr(), curBiasMat.ptr(), is1x1flag); } } diff --git a/modules/dnn/src/layers/mvn_layer.cpp b/modules/dnn/src/layers/mvn_layer.cpp index c911b741b4..a286fec5bb 100644 --- a/modules/dnn/src/layers/mvn_layer.cpp +++ b/modules/dnn/src/layers/mvn_layer.cpp @@ -273,6 +273,14 @@ public: int i, newRows = 1; for( i = 0; i < splitDim; i++ ) newRows *= inpBlob.size[i]; + + if (inpBlob.total() == newRows) + { + // MVN is applied to single values at an every row. + outBlob.setTo(0); + return; + } + Mat inpMat = inpBlob.reshape(1, newRows); Mat outMat = outBlob.reshape(1, newRows); diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index ccb028ba1c..f6103f0619 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -1160,8 +1160,35 @@ void TFImporter::populateNet(Net dstNet) int id; if (scaleMat.total() == 1) // is a scalar. { - layerParams.set("scale", scaleMat.at(0)); - id = dstNet.addLayer(name, "Power", layerParams); + // Try to match with a LeakyRelu: + // node { + // name: "LeakyRelu/mul" + // op: "Mul" + // input: "LeakyRelu/alpha" + // input: "input" + // } + // node { + // name: "LeakyRelu/Maximum" + // op: "Maximum" + // input: "LeakyRelu/mul" + // input: "input" + // } + StrIntVector next_layers = getNextLayers(net, name, "Maximum"); + if (!next_layers.empty()) + { + int maximumLayerIdx = next_layers[0].second; + ExcludeLayer(net, maximumLayerIdx, 0, false); + layers_to_ignore.insert(next_layers[0].first); + + layerParams.set("negative_slope", scaleMat.at(0)); + id = dstNet.addLayer(name, "ReLU", layerParams); + } + else + { + // Just a multiplication. + layerParams.set("scale", scaleMat.at(0)); + id = dstNet.addLayer(name, "Power", layerParams); + } } else // is a vector { @@ -1241,16 +1268,37 @@ void TFImporter::populateNet(Net dstNet) if (layer.input_size() != 5) CV_Error(Error::StsNotImplemented, "Expected gamma, beta, mean and std"); + Pin inpId = parsePin(layer.input(0)); + + bool isTraining = hasLayerAttr(layer, "is_training") && getLayerAttr(layer, "is_training").b(); layerParams.blobs.resize(4); - // gamma - blobFromTensor(getConstBlob(layer, value_id, 1), layerParams.blobs[2]); - // beta - blobFromTensor(getConstBlob(layer, value_id, 2), layerParams.blobs[3]); - // mean - blobFromTensor(getConstBlob(layer, value_id, 3), layerParams.blobs[0]); - // std - blobFromTensor(getConstBlob(layer, value_id, 4), layerParams.blobs[1]); + Mat gamma, beta, mean, std; + blobFromTensor(getConstBlob(layer, value_id, 1), gamma); + blobFromTensor(getConstBlob(layer, value_id, 2), beta); + if (isTraining) + { + mean = Mat::zeros(1, beta.total(), CV_32F); + std = Mat::ones(1, beta.total(), CV_32F); + + // Add an extra layer: Mean-Variance normalization + LayerParams mvnParams; + std::string mvnName = name + "/MVN"; + CV_Assert(layer_id.find(mvnName) == layer_id.end()); + int mvnId = dstNet.addLayer(mvnName, "MVN", mvnParams); + layer_id[mvnName] = mvnId; + connect(layer_id, dstNet, inpId, mvnId, 0); + inpId = Pin(mvnName); + } + else + { + blobFromTensor(getConstBlob(layer, value_id, 3), mean); + blobFromTensor(getConstBlob(layer, value_id, 4), std); + } + layerParams.blobs[0] = mean; + layerParams.blobs[1] = std; + layerParams.blobs[2] = gamma; + layerParams.blobs[3] = beta; if (hasLayerAttr(layer, "epsilon")) layerParams.set("eps", getLayerAttr(layer, "epsilon").f()); @@ -1262,7 +1310,7 @@ void TFImporter::populateNet(Net dstNet) layer_id[name] = id; // one input only - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); + connect(layer_id, dstNet, inpId, id, 0); } else if (type == "Conv2DBackpropInput") { @@ -1293,13 +1341,42 @@ void TFImporter::populateNet(Net dstNet) kernelFromTensor(getConstBlob(layer, value_id, 1), layerParams.blobs[0]); const int* kshape = layerParams.blobs[0].size.p; - layerParams.set("kernel_h", kshape[2]); - layerParams.set("kernel_w", kshape[3]); + const int kernelH = kshape[2]; + const int kernelW = kshape[3]; + layerParams.set("kernel_h", kernelH); + layerParams.set("kernel_w", kernelW); layerParams.set("num_output", kshape[1]); setStrides(layerParams, layer); setPadding(layerParams, layer); + // For convolution layer, output shape computes as + // o = 1 + (i - k + 2*p) / s + // i - input size, o - output size, k - kernel size, p - pad, s - stride + // In TensorFlow, p == 0 is padMode == 'VALID' or p == (k - 1) / 2 + // considering that k is odd. + // SAME: o = 1 + (i - 1) / s + // VALID: o = 1 + i / s + // Deconvolution's layer output shape computes as + // SAME: o = 1 + (i - 1)*s + // VALID: o = (i - 1)*s + // If output_shape differs from formulas above then adjust padding is applied. + + const int strideY = layerParams.get("stride_h"); + const int strideX = layerParams.get("stride_w"); + Mat outShape = getTensorContent(getConstBlob(layer, value_id, 0)); + const int outH = outShape.at(2); + const int outW = outShape.at(1); + if (layerParams.get("pad_mode") == "SAME") + { + layerParams.set("adj_w", (outW - 1) % strideX); + layerParams.set("adj_h", (outH - 1) % strideY); + } + else if (layerParams.get("pad_mode") == "VALID") + { + layerParams.set("adj_w", (outW - kernelW) % strideX); + layerParams.set("adj_h", (outH - kernelH) % strideY); + } int id = dstNet.addLayer(name, "Deconvolution", layerParams); layer_id[name] = id; diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 9cebd23823..1210d12e93 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -150,6 +150,8 @@ TEST(Test_TensorFlow, batch_norm) runTensorFlowNet("batch_norm"); runTensorFlowNet("fused_batch_norm"); runTensorFlowNet("batch_norm_text", DNN_TARGET_CPU, true); + runTensorFlowNet("mvn_batch_norm"); + runTensorFlowNet("mvn_batch_norm_1x1"); } OCL_TEST(Test_TensorFlow, batch_norm) @@ -170,6 +172,10 @@ TEST(Test_TensorFlow, pooling) TEST(Test_TensorFlow, deconvolution) { runTensorFlowNet("deconvolution"); + runTensorFlowNet("deconvolution_same"); + runTensorFlowNet("deconvolution_stride_2_same"); + runTensorFlowNet("deconvolution_adj_pad_valid"); + runTensorFlowNet("deconvolution_adj_pad_same"); } OCL_TEST(Test_TensorFlow, deconvolution)