diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index 9a4734a939..b65f07555d 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -263,14 +263,6 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN static Ptr create(const LayerParams& params); }; - class CV_EXPORTS LPNormalizeLayer : public Layer - { - public: - float pnorm, epsilon; - - static Ptr create(const LayerParams& params); - }; - class CV_EXPORTS InnerProductLayer : public Layer { public: @@ -545,9 +537,37 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN static Ptr create(const LayerParams& params); }; + /** + * @brief \f$ L_p \f$ - normalization layer. + * @param p Normalization factor. The most common `p = 1` for \f$ L_1 \f$ - + * normalization or `p = 2` for \f$ L_2 \f$ - normalization or a custom one. + * @param eps Parameter \f$ \epsilon \f$ to prevent a division by zero. + * @param across_spatial If true, normalize an input across all non-batch dimensions. + * Otherwise normalize an every channel separately. + * + * Across spatial: + * @f[ + * norm = \sqrt[p]{\epsilon + \sum_{x, y, c} |src(x, y, c)|^p } \\ + * dst(x, y, c) = \frac{ src(x, y, c) }{norm} + * @f] + * + * Channel wise normalization: + * @f[ + * norm(c) = \sqrt[p]{\epsilon + \sum_{x, y} |src(x, y, c)|^p } \\ + * dst(x, y, c) = \frac{ src(x, y, c) }{norm(c)} + * @f] + * + * Where `x, y` - spatial cooridnates, `c` - channel. + * + * An every sample in the batch is normalized separately. Optionally, + * output is scaled by the trained parameters. + */ class NormalizeBBoxLayer : public Layer { public: + float pnorm, epsilon; + bool acrossSpatial; + static Ptr create(const LayerParams& params); }; diff --git a/modules/dnn/src/init.cpp b/modules/dnn/src/init.cpp index 1943b406b9..2527b58dda 100644 --- a/modules/dnn/src/init.cpp +++ b/modules/dnn/src/init.cpp @@ -92,7 +92,6 @@ void initializeLayerFactory() CV_DNN_REGISTER_LAYER_CLASS(InnerProduct, InnerProductLayer); CV_DNN_REGISTER_LAYER_CLASS(Softmax, SoftmaxLayer); CV_DNN_REGISTER_LAYER_CLASS(MVN, MVNLayer); - CV_DNN_REGISTER_LAYER_CLASS(LPNormalize, LPNormalizeLayer); CV_DNN_REGISTER_LAYER_CLASS(ReLU, ReLULayer); CV_DNN_REGISTER_LAYER_CLASS(ReLU6, ReLU6Layer); diff --git a/modules/dnn/src/layers/lp_normalize_layer.cpp b/modules/dnn/src/layers/lp_normalize_layer.cpp deleted file mode 100644 index bba82aa694..0000000000 --- a/modules/dnn/src/layers/lp_normalize_layer.cpp +++ /dev/null @@ -1,78 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. -// -// Copyright (C) 2017, Intel Corporation, all rights reserved. -// Third party copyrights are property of their respective owners. - -#include "../precomp.hpp" -#include "layers_common.hpp" -#include -namespace cv { namespace dnn { - -class LPNormalizeLayerImpl : public LPNormalizeLayer -{ -public: - - LPNormalizeLayerImpl(const LayerParams& params) - { - setParamsFrom(params); - pnorm = params.get("p", 2); - epsilon = params.get("eps", 1e-10f); - CV_Assert(pnorm > 0); - } - - bool getMemoryShapes(const std::vector &inputs, - const int requiredOutputs, - std::vector &outputs, - std::vector &internals) const - { - Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals); - if (pnorm != 1 && pnorm != 2) - { - internals.resize(1, inputs[0]); - } - return true; - } - - virtual bool supportBackend(int backendId) - { - return backendId == DNN_BACKEND_DEFAULT; - } - - void forward(std::vector &inputs, std::vector &outputs, std::vector &internals) - { - CV_TRACE_FUNCTION(); - CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - - CV_Assert(inputs[0]->total() == outputs[0].total()); - float norm; - if (pnorm == 1) - norm = cv::norm(*inputs[0], NORM_L1); - else if (pnorm == 2) - norm = cv::norm(*inputs[0], NORM_L2); - else - { - cv::pow(abs(*inputs[0]), pnorm, internals[0]); - norm = pow((float)sum(internals[0])[0], 1.0f / pnorm); - } - multiply(*inputs[0], 1.0f / (norm + epsilon), outputs[0]); - } - - int64 getFLOPS(const std::vector &inputs, - const std::vector &) const - { - int64 flops = 0; - for (int i = 0; i < inputs.size(); i++) - flops += 3 * total(inputs[i]); - return flops; - } -}; - -Ptr LPNormalizeLayer::create(const LayerParams& params) -{ - return Ptr(new LPNormalizeLayerImpl(params)); -} - -} // namespace dnn -} // namespace cv diff --git a/modules/dnn/src/layers/normalize_bbox_layer.cpp b/modules/dnn/src/layers/normalize_bbox_layer.cpp index 43acd35406..020d1d385a 100644 --- a/modules/dnn/src/layers/normalize_bbox_layer.cpp +++ b/modules/dnn/src/layers/normalize_bbox_layer.cpp @@ -43,83 +43,18 @@ #include "../precomp.hpp" #include "layers_common.hpp" -#include -#include - -namespace cv -{ -namespace dnn -{ - -namespace -{ - const std::string layerName = "NormalizeBBox"; -} +namespace cv { namespace dnn { class NormalizeBBoxLayerImpl : public NormalizeBBoxLayer { - float _eps; - bool _across_spatial; - bool _channel_shared; public: - bool getParameterDict(const LayerParams ¶ms, - const std::string ¶meterName, - DictValue& result) - { - if (!params.has(parameterName)) - { - return false; - } - - result = params.get(parameterName); - return true; - } - - template - T getParameter(const LayerParams ¶ms, - const std::string ¶meterName, - const size_t &idx=0, - const bool required=true, - const T& defaultValue=T()) + NormalizeBBoxLayerImpl(const LayerParams& params) { - DictValue dictValue; - bool success = getParameterDict(params, parameterName, dictValue); - if(!success) - { - if(required) - { - std::string message = layerName; - message += " layer parameter does not contain "; - message += parameterName; - message += " parameter."; - CV_Error(Error::StsBadArg, message); - } - else - { - return defaultValue; - } - } - return dictValue.get(idx); - } - - NormalizeBBoxLayerImpl(const LayerParams ¶ms) - { - _eps = getParameter(params, "eps", 0, false, 1e-10f); - _across_spatial = getParameter(params, "across_spatial"); - _channel_shared = getParameter(params, "channel_shared"); setParamsFrom(params); - } - - void checkInputs(const std::vector &inputs) - { - CV_Assert(inputs.size() > 0); - CV_Assert(inputs[0]->dims == 4 && inputs[0]->type() == CV_32F); - for (size_t i = 1; i < inputs.size(); i++) - { - CV_Assert(inputs[i]->dims == 4 && inputs[i]->type() == CV_32F); - CV_Assert(inputs[i]->size == inputs[0]->size); - } - CV_Assert(inputs[0]->dims > 2); + pnorm = params.get("p", 2); + epsilon = params.get("eps", 1e-10f); + acrossSpatial = params.get("across_spatial", true); + CV_Assert(pnorm > 0); } bool getMemoryShapes(const std::vector &inputs, @@ -127,17 +62,11 @@ public: std::vector &outputs, std::vector &internals) const { - bool inplace = Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals); - size_t channels = inputs[0][1]; - size_t rows = inputs[0][2]; - size_t cols = inputs[0][3]; - size_t channelSize = rows * cols; - - internals.assign(1, shape(channels, channelSize)); - internals.push_back(shape(channels, 1)); - internals.push_back(shape(1, channelSize)); - - return inplace; + CV_Assert(inputs.size() == 1); + Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals); + internals.resize(1, inputs[0]); + internals[0][0] = 1; // Batch size. + return true; } void forward(std::vector &inputs, std::vector &outputs, std::vector &internals) @@ -145,60 +74,46 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - checkInputs(inputs); - - Mat& buffer = internals[0], sumChannelMultiplier = internals[1], - sumSpatialMultiplier = internals[2]; - - sumChannelMultiplier.setTo(1.0); - sumSpatialMultiplier.setTo(1.0); + CV_Assert(inputs.size() == 1 && outputs.size() == 1); + CV_Assert(inputs[0]->total() == outputs[0].total()); const Mat& inp0 = *inputs[0]; + Mat& buffer = internals[0]; size_t num = inp0.size[0]; size_t channels = inp0.size[1]; - size_t channelSize = inp0.size[2] * inp0.size[3]; - - Mat zeroBuffer(channels, channelSize, CV_32F, Scalar(0)); - Mat absDiff; - Mat scale = blobs[0]; - for (size_t j = 0; j < inputs.size(); j++) + size_t channelSize = inp0.total() / (num * channels); + for (size_t n = 0; n < num; ++n) { - for (size_t n = 0; n < num; ++n) - { - Mat src = Mat(channels, channelSize, CV_32F, inputs[j]->ptr(n)); - Mat dst = Mat(channels, channelSize, CV_32F, outputs[j].ptr(n)); - - buffer = src.mul(src); - - if (_across_spatial) - { - absdiff(buffer, zeroBuffer, absDiff); - - // add eps to avoid overflow - double absSum = sum(absDiff)[0] + _eps; + Mat src = Mat(channels, channelSize, CV_32F, (void*)inp0.ptr(n)); + Mat dst = Mat(channels, channelSize, CV_32F, (void*)outputs[0].ptr(n)); - float norm = sqrt(absSum); - dst = src / norm; - } - else - { - Mat norm(channelSize, 1, buffer.type()); // 1 x channelSize - - // (_channels x channelSize)T * _channels x 1 -> channelSize x 1 - gemm(buffer, sumChannelMultiplier, 1, norm, 0, norm, GEMM_1_T); + cv::pow(abs(src), pnorm, buffer); - // compute norm - pow(norm, 0.5f, norm); + if (acrossSpatial) + { + // add eps to avoid overflow + float absSum = sum(buffer)[0] + epsilon; + float norm = pow(absSum, 1.0f / pnorm); + multiply(src, 1.0f / norm, dst); + } + else + { + Mat norm; + reduce(buffer, norm, 0, REDUCE_SUM); + norm += epsilon; - // scale the layer - // _channels x 1 * (channelSize x 1)T -> _channels x channelSize - gemm(sumChannelMultiplier, norm, 1, buffer, 0, buffer, GEMM_2_T); + // compute inverted norm to call multiply instead divide + cv::pow(norm, -1.0f / pnorm, norm); - dst = src / buffer; - } + repeat(norm, channels, 1, buffer); + multiply(src, buffer, dst); + } + if (!blobs.empty()) + { // scale the output - if (_channel_shared) + Mat scale = blobs[0]; + if (scale.total() == 1) { // _scale: 1 x 1 dst *= scale.at(0, 0); @@ -206,15 +121,13 @@ public: else { // _scale: _channels x 1 - // _channels x 1 * 1 x channelSize -> _channels x channelSize - gemm(scale, sumSpatialMultiplier, 1, buffer, 0, buffer); - - dst = dst.mul(buffer); + CV_Assert(scale.total() == channels); + repeat(scale, 1, dst.cols, buffer); + multiply(dst, buffer, dst); } } } } - }; diff --git a/modules/dnn/src/torch/torch_importer.cpp b/modules/dnn/src/torch/torch_importer.cpp index cc120bec9a..e0699f25c7 100644 --- a/modules/dnn/src/torch/torch_importer.cpp +++ b/modules/dnn/src/torch/torch_importer.cpp @@ -706,7 +706,7 @@ struct TorchImporter : public ::cv::dnn::Importer if (scalarParams.has("eps")) layerParams.set("eps", scalarParams.get("eps")); - newModule->apiType = "LPNormalize"; + newModule->apiType = "Normalize"; curModule->modules.push_back(newModule); } else if (nnName == "Padding")