From 024b43ca06dc7ab5e612aeee55fb44cdb534c49d Mon Sep 17 00:00:00 2001 From: Smirnov Egor Date: Mon, 19 Jul 2021 18:24:15 +0300 Subject: [PATCH] implement asymmetric padding for conv2d, max_pool and conv2d_backprop_input --- modules/dnn/src/tensorflow/tf_importer.cpp | 112 +++++++++++++++++++-- modules/dnn/test/test_tf_importer.cpp | 10 ++ 2 files changed, 112 insertions(+), 10 deletions(-) diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index 39c2309394..426710989e 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -404,12 +404,53 @@ void setKSize(LayerParams &layerParams, const tensorflow::NodeDef &layer) } } -void setPadding(LayerParams &layerParams, const tensorflow::NodeDef &layer) +void setPadMode(LayerParams &layerParams, const tensorflow::NodeDef &layer) { if (hasLayerAttr(layer, "padding")) layerParams.set("pad_mode", getLayerAttr(layer, "padding").s()); } +bool getExplicitPadding(LayerParams &layerParams, const tensorflow::NodeDef &layer, int64_t (&pads)[8]) +{ + if (!layerParams.has("pad_mode") || + layerParams.get("pad_mode").getStringValue() != "EXPLICIT") + { + return false; + } + + CV_Assert(hasLayerAttr(layer, "explicit_paddings")); + + const tensorflow::AttrValue& protoPads = getLayerAttr(layer, "explicit_paddings"); + if (protoPads.list().i_size() != 8) + { + CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding configuration."); + } + + int n = sizeof(pads) / sizeof(pads[0]); + for (int i = 0; i < n; ++i) + { + pads[i] = protoPads.list().i(i); + } + + if (getDataLayout(layer) != DATA_LAYOUT_NCHW) + { + CV_LOG_DEBUG(NULL, "DNN/TF: Data format " << getLayerAttr(layer, "data_format").s() << ", assuming NHWC."); + // Perhaps, we have NHWC padding dimensions order. + // N H W C + // 0 1 2 3 4 5 6 7 + std::swap(pads[2], pads[6]); + std::swap(pads[3], pads[7]); + // N C W H + // 0 1 2 3 4 5 6 7 + std::swap(pads[4], pads[6]); + std::swap(pads[5], pads[7]); + // N C H W + // 0 1 2 3 4 5 6 7 + } + + return true; +} + Pin parsePin(const std::string &name) { Pin pin(name); @@ -510,6 +551,7 @@ protected: private: void addPermuteLayer(const int* order, const std::string& permName, Pin& inpId); + void setPadding(LayerParams &layerParams, const tensorflow::NodeDef &layer, std::string& inputName, float value = 0.); typedef void (TFImporter::*TFImporterNodeParser)(tensorflow::GraphDef&, const tensorflow::NodeDef&, LayerParams&); typedef std::map DispatchMap; @@ -551,6 +593,31 @@ private: void parseCustomLayer (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); }; +void TFImporter::setPadding(LayerParams &layerParams, const tensorflow::NodeDef &layer, std::string& inputName, float value) +{ + setPadMode(layerParams, layer); + int64_t pads[8]; + + if (!getExplicitPadding(layerParams, layer, pads)) + { + return; + } + + LayerParams padLp; + padLp.name = layer.name() + "/pad"; + padLp.type = "Padding"; + padLp.set("paddings", DictValue::arrayInt(pads, sizeof(pads) / sizeof(pads[0]))); + padLp.set("value", value); + + int id = dstNet.addLayer(padLp.name, padLp.type, padLp); + layer_id[padLp.name] = id; + + connect(layer_id, dstNet, parsePin(inputName), id, 0); + inputName = padLp.name; + + layerParams.set("pad_mode", "VALID"); +} + const TFImporter::DispatchMap TFImporter::buildDispatchMap() { static DispatchMap dispatch; @@ -787,7 +854,7 @@ void TFImporter::parseConvolution(tensorflow::GraphDef& net, const tensorflow::N setStrides(layerParams, layer); if (!layerParams.has("pad_w") && !layerParams.has("pad_h")) - setPadding(layerParams, layer); + setPadding(layerParams, layer, input); // The final node of dilated convolution subgraph. next_layers = getNextLayers(net, name, "BatchToSpaceND"); @@ -1232,20 +1299,21 @@ void TFImporter::parseMaxPool(tensorflow::GraphDef& net, const tensorflow::NodeD { const std::string& name = layer.name(); const int num_inputs = layer.input_size(); + std::string inputName = layer.input(0); CV_CheckGT(num_inputs, 0, ""); layerParams.set("pool", "max"); setKSize(layerParams, layer); setStrides(layerParams, layer); - setPadding(layerParams, layer); + setPadding(layerParams, layer, inputName, -std::numeric_limits::infinity()); // Test_TensorFlow_nets.EAST_text_detection/1, NGRAPH/CPU layerParams.set("ceil_mode", false); int id = dstNet.addLayer(name, "Pooling", layerParams); layer_id[name] = id; - connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); + connectToAllBlobs(layer_id, dstNet, parsePin(inputName), id, num_inputs); } void TFImporter::parseAvgPool(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) @@ -1258,7 +1326,7 @@ void TFImporter::parseAvgPool(tensorflow::GraphDef& net, const tensorflow::NodeD layerParams.set("ave_pool_padded_area", false); setKSize(layerParams, layer); setStrides(layerParams, layer); - setPadding(layerParams, layer); + setPadMode(layerParams, layer); int id = dstNet.addLayer(name, "Pooling", layerParams); layer_id[name] = id; @@ -1673,7 +1741,7 @@ void TFImporter::parseConv2DBackpropInput(tensorflow::GraphDef& net, const tenso // input: "weights" // input: "input" - const std::string& name = layer.name(); + std::string name = layer.name(); const int num_inputs = layer.input_size(); CV_CheckEQ(num_inputs, 3, "Expected output shape, weights and input nodes"); @@ -1704,7 +1772,21 @@ void TFImporter::parseConv2DBackpropInput(tensorflow::GraphDef& net, const tenso layerParams.set("num_output", kshape[1]); setStrides(layerParams, layer); - setPadding(layerParams, layer); + setPadMode(layerParams, layer); + int64_t pads[8]; + bool explicit_pads = getExplicitPadding(layerParams, layer, pads); + int64_t begs[4] = {}; + int64_t ends[4] = {-1, -1, -1, -1}; + if (explicit_pads) + { + name += "/deconv"; + layerParams.set("pad_mode", "VALID"); + for (int i = 2; i < 4; ++i) // begins=[0, 0, a, b], ends=[-1, -1, c, d] + { + begs[i] = pads[2*i]; + ends[i] = -1 - pads[2*i + 1]; + } + } // For convolution layer, output shape computes as // o = 1 + (i - k + 2*p) / s @@ -1721,8 +1803,9 @@ void TFImporter::parseConv2DBackpropInput(tensorflow::GraphDef& net, const tenso const int strideY = layerParams.get("stride_h"); const int strideX = layerParams.get("stride_w"); Mat outShape = getTensorContent(getConstBlob(layer, value_id, 0)); - const int outH = outShape.at(1); - const int outW = outShape.at(2); + int shift = (getDataLayout(layer) == DATA_LAYOUT_NCHW); + const int outH = outShape.at(1 + shift) + begs[2] - 1 - ends[2]; + const int outW = outShape.at(2 + shift) + begs[3] - 1 - ends[3]; if (layerParams.get("pad_mode") == "SAME") { layerParams.set("adj_w", (outW - 1) % strideX); @@ -1738,6 +1821,16 @@ void TFImporter::parseConv2DBackpropInput(tensorflow::GraphDef& net, const tenso // one input only connect(layer_id, dstNet, parsePin(layer.input(2)), id, 0); + if (explicit_pads) // If we have explicit paddings, remove extra data + { + layerParams.set("begin", DictValue::arrayInt(begs, sizeof(begs) / sizeof(begs[0]))); + layerParams.set("end", DictValue::arrayInt(ends, sizeof(ends) / sizeof(ends[0]))); + + int id = dstNet.addLayer(layer.name(), "Slice", layerParams); + layer_id[layer.name()] = id; + + connect(layer_id, dstNet, parsePin(name), id, 0); + } } void TFImporter::parseBlockLSTM(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) @@ -2717,7 +2810,6 @@ void TFImporter::populateNet() addConstNodes(netBin, value_id, layers_to_ignore); addConstNodes(netTxt, value_id, layers_to_ignore); - for (int li = 0; li < layersSize; li++) { const tensorflow::NodeDef& layer = net.node(li); diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 4ba4f29322..4f7840f9e4 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -203,6 +203,16 @@ TEST_P(Test_TensorFlow_layers, padding) runTensorFlowNet("keras_pad_concat"); } +TEST_P(Test_TensorFlow_layers, padding_asymmetric) +{ + runTensorFlowNet("conv2d_asymmetric_pads_nchw"); + runTensorFlowNet("conv2d_asymmetric_pads_nhwc"); + runTensorFlowNet("max_pool2d_asymmetric_pads_nchw"); + runTensorFlowNet("max_pool2d_asymmetric_pads_nhwc"); + runTensorFlowNet("conv2d_backprop_input_asymmetric_pads_nchw"); + runTensorFlowNet("conv2d_backprop_input_asymmetric_pads_nhwc"); +} + TEST_P(Test_TensorFlow_layers, padding_same) { // Reference output values are in range [0.0006, 2.798]