From 222149b9c6d097f6bf608e731a8f52566b26156a Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Fri, 22 Sep 2017 12:12:03 +0300 Subject: [PATCH] Refactored Padding layer --- .../dnn/include/opencv2/dnn/all_layers.hpp | 19 +++ modules/dnn/src/layers/padding_layer.cpp | 117 +++++++++--------- modules/dnn/src/tensorflow/tf_importer.cpp | 63 +++------- modules/dnn/src/torch/torch_importer.cpp | 51 ++++++-- modules/dnn/test/test_halide_layers.cpp | 22 ++++ modules/dnn/test/test_tf_importer.cpp | 1 + modules/dnn/test/test_torch_importer.cpp | 6 + 7 files changed, 166 insertions(+), 113 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index cf47c70a4e..c576e52498 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -337,6 +337,25 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN static Ptr create(const LayerParams& params); }; + /** + * @brief Adds extra values for specific axes. + * @param paddings Vector of paddings in format + * @code + * [ pad_before, pad_after, // [0]th dimension + * pad_before, pad_after, // [1]st dimension + * ... + * pad_before, pad_after ] // [n]th dimension + * @endcode + * that represents number of padded values at every dimension + * starting from the first one. The rest of dimensions won't + * be padded. + * @param value Value to be padded. Defaults to zero. + * @param input_dims Torch's parameter. If @p input_dims is not equal to the + * actual input dimensionality then the `[0]th` dimension + * is considered as a batch dimension and @p paddings are shifted + * to a one dimension. Defaults to `-1` that means padding + * corresponding to @p paddings. + */ class CV_EXPORTS PaddingLayer : public Layer { public: diff --git a/modules/dnn/src/layers/padding_layer.cpp b/modules/dnn/src/layers/padding_layer.cpp index f5a6a52cb6..393c8474ed 100644 --- a/modules/dnn/src/layers/padding_layer.cpp +++ b/modules/dnn/src/layers/padding_layer.cpp @@ -2,7 +2,7 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. -// Copyright (C) 2016, Intel Corporation, all rights reserved. +// Copyright (C) 2017, Intel Corporation, all rights reserved. // Third party copyrights are property of their respective owners. /* @@ -24,14 +24,20 @@ public: PaddingLayerImpl(const LayerParams ¶ms) { setParamsFrom(params); - paddingDim = params.get("padding_dim"); - padding = params.get("padding"); - inputDims = params.get("input_dims", 0); - index = params.get("index", 0); - paddingValue = params.get("value", 0); - - if(paddingDim < 0 || padding < 0) - CV_Error(cv::Error::StsNotImplemented, "Negative padding and dim aren't supported"); + paddingValue = params.get("value", 0); + inputDims = params.get("input_dims", -1); + + CV_Assert(params.has("paddings")); + const DictValue& paddingsParam = params.get("paddings"); + CV_Assert((paddingsParam.size() & 1) == 0); + + paddings.resize(paddingsParam.size() / 2); + for (int i = 0; i < paddings.size(); ++i) + { + paddings[i].first = paddingsParam.get(i * 2); // Pad before. + paddings[i].second = paddingsParam.get(i * 2 + 1); // Pad after. + CV_Assert(paddings[i].first >= 0, paddings[i].second >= 0); + } } bool getMemoryShapes(const std::vector &inputs, @@ -39,24 +45,48 @@ public: std::vector &outputs, std::vector &internals) const { - outputs.clear(); - for(int i = 0; i < inputs.size(); i++) + CV_Assert(inputs.size() == 1); + const MatShape& inpShape = inputs[0]; + CV_Assert(inpShape.size() >= paddings.size()); + CV_Assert(inputDims == -1 || inpShape.size() == inputDims || inpShape.size() > paddings.size()); + + outputs.resize(1, inpShape); + int offset = (inputDims == -1 ? 0 : (inpShape.size() > inputDims ? 1 : 0)); + for (int i = 0; i < paddings.size(); ++i) { - MatShape shape = inputs[i]; - int dim = getPadDim(shape); - CV_Assert(dim < shape.size()); + outputs[0][offset + i] = inpShape[offset + i] + paddings[i].first + paddings[i].second; + } + return false; + } - shape[dim] += padding; - outputs.push_back(shape); + void finalize(const std::vector &inputs, std::vector &outputs) + { + // Compute dstRanges. + const MatSize& inpShape = inputs[0]->size; + dstRanges.resize(paddings.size()); + + int offset = 0; + if (inputDims != -1 && inputs[0]->dims != inputDims) + { + dstRanges.insert(dstRanges.begin(), Range::all()); + offset = 1; } - return false; + for (int i = 0; i < paddings.size(); ++i) + { + dstRanges[offset + i].start = paddings[i].first; + dstRanges[offset + i].end = paddings[i].first + inpShape[offset + i]; + } + + // Add the rest of dimensions. + for (int i = dstRanges.size(); i < inputs[0]->dims; ++i) + dstRanges.push_back(Range::all()); } virtual bool supportBackend(int backendId) { return backendId == DNN_BACKEND_DEFAULT || - backendId == DNN_BACKEND_HALIDE && haveHalide(); + backendId == DNN_BACKEND_HALIDE && haveHalide() && dstRanges.size() == 4; } void forward(std::vector &inputs, std::vector &outputs, std::vector &internals) @@ -64,50 +94,18 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - for(int i = 0; i < inputs.size(); i++) - { - outputs[i] = paddingValue; - const Mat& inp = *inputs[i]; - Mat& out = outputs[i]; - int dims = inp.dims; - MatShape inShape(inp.size.p, inp.size.p + dims); - MatShape outShape(out.size.p, out.size.p + dims); - int dim = getPadDim(inShape); - - int actualIndex = index; - if(index == 0) - actualIndex = inShape[dim]; - - std::vector > srcDstRanges; - srcDstRanges.push_back(std::make_pair(Range(0, actualIndex), Range(0, actualIndex))); - srcDstRanges.push_back(std::make_pair(Range(actualIndex, inShape[dim]), - Range(actualIndex + padding, outShape[dim]))); - - std::vector srcRanges(dims, Range::all()), dstRanges = srcRanges; - - for(int j = 0; j < srcDstRanges.size(); j++) - { - if(!srcDstRanges[j].first.empty()) - { - srcRanges[dim] = srcDstRanges[j].first; - dstRanges[dim] = srcDstRanges[j].second; - Mat dst = out(&dstRanges[0]); - Mat src = inp(&srcRanges[0]).clone(); - src.copyTo(dst); - } - } - } - } - - int getPadDim(const MatShape& shape) const - { - return inputDims > 0 && (int)shape.size() > inputDims ? paddingDim + 1 : paddingDim; + outputs[0].setTo(paddingValue); + inputs[0]->copyTo(outputs[0](dstRanges)); } virtual Ptr initHalide(const std::vector > &inputs) { #ifdef HAVE_HALIDE int inW, inH, inC, inN; + int minN = std::max(dstRanges[0].start, 0); + int minC = std::max(dstRanges[1].start, 0); + int minY = std::max(dstRanges[2].start, 0); + int minX = std::max(dstRanges[3].start, 0); Halide::Buffer inputBuffer = halideBuffer(inputs[0]); getCanonicalSize(inputBuffer, &inW, &inH, &inC, &inN); @@ -115,13 +113,16 @@ public: Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name)); Halide::Func padded = Halide::BoundaryConditions::constant_exterior(inputBuffer, paddingValue); - top(x, y, c, n) = padded(x, y, c, n); + top(x, y, c, n) = padded(x - minX, y - minY, c - minC, n - minN); return Ptr(new HalideBackendNode(top)); #endif // HAVE_HALIDE return Ptr(); } - int paddingDim, padding, inputDims, index; +private: + std::vector > paddings; // Pairs pad before, pad after. + std::vector dstRanges; + int inputDims; float paddingValue; }; diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index 67565cc591..db18fbf40c 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -931,51 +931,28 @@ void TFImporter::populateNet(Net dstNet) } else if (type == "Pad") { - tensorflow::TensorProto paddings = getConstBlob(layer, value_id, 1); - MatShape shape; - blobShapeFromTensor(paddings, shape); - if (shape[0] != 4) - CV_Error(Error::StsError, "Expected NHWC data format"); - - // Copy tensor with paddings. - std::vector values(shape[0] * 2); - CV_Assert(sizeof(int32_t) * values.size() == - paddings.tensor_content().size()); - memcpy(&values[0], &paddings.tensor_content()[0], - paddings.tensor_content().size()); - - // Allow only one padding operation per layer. - bool padded = false; - for (int i = 0; i < values.size(); ++i) + Mat paddings = getTensorContent(getConstBlob(layer, value_id, 1)); + CV_Assert(paddings.type() == CV_32SC1); + if (paddings.total() == 8) { - if (values[i]) - { - if (padded) - CV_Error(Error::StsError, - "Only single padding operation per layer is supported"); - padded = true; - - int axis = i / 2; - // Remap NHWC to NCHW. - // 0 -> 0 - // 1 -> 2 - // 2 -> 3 - // 3 -> 1 - if (axis != 0) - axis = axis % 3 + 1; - - layerParams.set("padding_dim", axis); - if (i % 2) // Pad after - layerParams.set("padding", values[i]); - else // Pad before - layerParams.set("padding", -1 * values[i]); - - int id = dstNet.addLayer(name, "Padding", layerParams); - layer_id[name] = id; - - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); - } + // Perhabs, we have NHWC padding dimensions order. + // N H W C + // 0 1 2 3 4 5 6 7 + std::swap(*paddings.ptr(0, 2), *paddings.ptr(0, 6)); + std::swap(*paddings.ptr(0, 3), *paddings.ptr(0, 7)); + // N C W H + // 0 1 2 3 4 5 6 7 + std::swap(*paddings.ptr(0, 4), *paddings.ptr(0, 6)); + std::swap(*paddings.ptr(0, 5), *paddings.ptr(0, 7)); + // N C H W + // 0 1 2 3 4 5 6 7 } + layerParams.set("paddings", DictValue::arrayInt((int*)paddings.data, paddings.total())); + + int id = dstNet.addLayer(name, "Padding", layerParams); + layer_id[name] = id; + + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); } else if (type == "FusedBatchNorm") { diff --git a/modules/dnn/src/torch/torch_importer.cpp b/modules/dnn/src/torch/torch_importer.cpp index 56c55d6639..c183184b42 100644 --- a/modules/dnn/src/torch/torch_importer.cpp +++ b/modules/dnn/src/torch/torch_importer.cpp @@ -714,23 +714,25 @@ struct TorchImporter : public ::cv::dnn::Importer readTorchTable(scalarParams, tensorParams); newModule->apiType = "Padding"; - CV_Assert(scalarParams.has("pad") && - scalarParams.has("dim")); + CV_Assert(scalarParams.has("pad") && scalarParams.has("dim")); + if (scalarParams.has("index") && scalarParams.get("index") != 1) + CV_Error(Error::StsNotImplemented, "Padding with offset is not implemented"); - layerParams.set("padding_dim", - static_cast(scalarParams.get("dim") - 1)); - layerParams.set("padding", static_cast(scalarParams.get("pad"))); + if (scalarParams.has("value")) + layerParams.set("value", scalarParams.get("value")); if (scalarParams.has("nInputDim")) - layerParams.set("input_dims", - static_cast(scalarParams.get("nInputDim"))); + layerParams.set("input_dims", scalarParams.get("nInputDim")); - if (scalarParams.has("value")) - layerParams.set("value", scalarParams.get("value")); + int dim = scalarParams.get("dim") - 1; // In Lua we start from 1. + int pad = scalarParams.get("pad"); - if (scalarParams.has("index")) - layerParams.set("index", - static_cast(scalarParams.get("index") - 1)); + std::vector paddings((dim + 1) * 2, 0); + if (pad > 0) + paddings[dim * 2 + 1] = pad; // Pad after (right). + else + paddings[dim * 2] = -pad; // Pad before (left). + layerParams.set("paddings", DictValue::arrayInt(&paddings[0], paddings.size())); curModule->modules.push_back(newModule); } @@ -867,6 +869,31 @@ struct TorchImporter : public ::cv::dnn::Importer layerParams.set("scale", scalarParams.get("constant_scalar")); curModule->modules.push_back(newModule); } + else if (nnName == "SpatialZeroPadding") + { + readTorchTable(scalarParams, tensorParams); + CV_Assert(scalarParams.has("pad_l"), scalarParams.has("pad_r"), + scalarParams.has("pad_t"), scalarParams.has("pad_b")); + int padTop = scalarParams.get("pad_t"); + int padLeft = scalarParams.get("pad_l"); + int padRight = scalarParams.get("pad_r"); + int padBottom = scalarParams.get("pad_b"); + if (padTop < 0 || padLeft < 0 || padRight < 0 || padBottom < 0) + CV_Error(Error::StsNotImplemented, "SpatialZeroPadding in cropping mode is not implemented"); + + newModule->apiType = "Padding"; + + // Torch's SpatialZeroPadding works with 3- or 4-dimensional input. + // So we add parameter input_dims=3 to ignore batch dimension if it will be. + std::vector paddings(6, 0); // CHW + paddings[2] = padTop; + paddings[3] = padBottom; + paddings[4] = padLeft; + paddings[5] = padRight; + layerParams.set("paddings", DictValue::arrayInt(&paddings[0], paddings.size())); + layerParams.set("input_dims", 3); + curModule->modules.push_back(newModule); + } else { CV_Error(Error::StsNotImplemented, "Unknown nn class \"" + className + "\""); diff --git a/modules/dnn/test/test_halide_layers.cpp b/modules/dnn/test/test_halide_layers.cpp index 79f767a134..c0a5c80fbc 100644 --- a/modules/dnn/test/test_halide_layers.cpp +++ b/modules/dnn/test/test_halide_layers.cpp @@ -34,6 +34,28 @@ static void test(LayerParams& params, Mat& input) normAssert(outputDefault, outputHalide); } +//////////////////////////////////////////////////////////////////////////////// +// Padding +//////////////////////////////////////////////////////////////////////////////// +TEST(Padding_Halide, Accuracy) +{ + static const int kNumRuns = 10; + std::vector paddings(8); + for (int t = 0; t < kNumRuns; ++t) + { + for (int i = 0; i < paddings.size(); ++i) + paddings[i] = rand() % 5; + + LayerParams lp; + lp.set("paddings", DictValue::arrayInt(&paddings[0], paddings.size())); + lp.type = "Padding"; + lp.name = "testLayer"; + + Mat input({1 + rand() % 10, 1 + rand() % 10, 1 + rand() % 10, 1 + rand() % 10}, CV_32F); + test(lp, input); + } +} + //////////////////////////////////////////////////////////////////////////////// // Convolution //////////////////////////////////////////////////////////////////////////////// diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 57227fff05..9a3f168e56 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -103,6 +103,7 @@ TEST(Test_TensorFlow, padding) { runTensorFlowNet("padding_same"); runTensorFlowNet("padding_valid"); + runTensorFlowNet("spatial_padding"); } TEST(Test_TensorFlow, eltwise_add_mul) diff --git a/modules/dnn/test/test_torch_importer.cpp b/modules/dnn/test/test_torch_importer.cpp index fa444edfa9..ec20ef077e 100644 --- a/modules/dnn/test/test_torch_importer.cpp +++ b/modules/dnn/test/test_torch_importer.cpp @@ -190,6 +190,12 @@ TEST(Torch_Importer, net_normalize) runTorchNet("net_normalize", "", false, true); } +TEST(Torch_Importer, net_padding) +{ + runTorchNet("net_padding", "", false, true); + runTorchNet("net_spatial_zero_padding", "", false, true); +} + TEST(Torch_Importer, ENet_accuracy) { Net net;