Merge remote-tracking branch 'upstream/3.4' into merge-3.4

5 years ago · c3e8a82c9c
parent d3aaf2d3a3 e58e545584
commit c3e8a82c9c
25 changed files with 837 additions and 73 deletions
--- a/doc/mymath.sty
+++ b/doc/mymath.sty
@ -28,7 +28,7 @@
  #3 & \mbox{#4}\\
  #5 & \mbox{#6}\\
  \end{array} \right.}
-\newcommand{\forkthree}[8]{
+\newcommand{\forkfour}[8]{
  \left\{
  \begin{array}{l l}
  #1 & \mbox{#2}\\
--- a/modules/core/include/opencv2/core/eigen.hpp
+++ b/modules/core/include/opencv2/core/eigen.hpp
@ -47,6 +47,11 @@

 #include "opencv2/core.hpp"

+#if EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3
+#include <unsupported/Eigen/CXX11/Tensor>
+#define OPENCV_EIGEN_TENSOR_SUPPORT
+#endif // EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3
+
 #if defined _MSC_VER && _MSC_VER >= 1200
 #pragma warning( disable: 4714 ) //__forceinline is not inlined
 #pragma warning( disable: 4127 ) //conditional expression is constant
@ -59,6 +64,107 @@ namespace cv
 //! @addtogroup core_eigen
 //! @{

+#ifdef OPENCV_EIGEN_TENSOR_SUPPORT
+/** @brief Converts an Eigen::Tensor to a cv::Mat.
+
+The method converts an Eigen::Tensor with shape (H x W x C) to a cv::Mat where:
+ H = number of rows
+ W = number of columns
+ C = number of channels
+
+Usage:
+\code
+Eigen::Tensor<float, 3, Eigen::RowMajor> a_tensor(...);
+// populate tensor with values
+Mat a_mat;
+eigen2cv(a_tensor, a_mat);
+\endcode
+*/
+template <typename _Tp, int _layout> static inline
+void eigen2cv( const Eigen::Tensor<_Tp, 3, _layout> &src, OutputArray dst )
+{
+    if( !(_layout & Eigen::RowMajorBit) )
+    {
+        const std::array<int, 3> shuffle{2, 1, 0};
+        Eigen::Tensor<_Tp, 3, !_layout> row_major_tensor = src.swap_layout().shuffle(shuffle);
+        Mat _src(src.dimension(0), src.dimension(1), CV_MAKETYPE(DataType<_Tp>::type, src.dimension(2)), row_major_tensor.data());
+        _src.copyTo(dst);
+    }
+    else
+    {
+        Mat _src(src.dimension(0), src.dimension(1), CV_MAKETYPE(DataType<_Tp>::type, src.dimension(2)), (void *)src.data());
+        _src.copyTo(dst);
+    }
+}
+
+/** @brief Converts a cv::Mat to an Eigen::Tensor.
+
+The method converts a cv::Mat to an Eigen Tensor with shape (H x W x C) where:
+ H = number of rows
+ W = number of columns
+ C = number of channels
+
+Usage:
+\code
+Mat a_mat(...);
+// populate Mat with values
+Eigen::Tensor<float, 3, Eigen::RowMajor> a_tensor(...);
+cv2eigen(a_mat, a_tensor);
+\endcode
+*/
+template <typename _Tp, int _layout> static inline
+void cv2eigen( const Mat &src, Eigen::Tensor<_Tp, 3, _layout> &dst )
+{
+    if( !(_layout & Eigen::RowMajorBit) )
+    {
+        Eigen::Tensor<_Tp, 3, !_layout> row_major_tensor(src.rows, src.cols, src.channels());
+        Mat _dst(src.rows, src.cols, CV_MAKETYPE(DataType<_Tp>::type, src.channels()), row_major_tensor.data());
+        if (src.type() == _dst.type())
+            src.copyTo(_dst);
+        else
+            src.convertTo(_dst, _dst.type());
+        const std::array<int, 3> shuffle{2, 1, 0};
+        dst = row_major_tensor.swap_layout().shuffle(shuffle);
+    }
+    else
+    {
+        dst.resize(src.rows, src.cols, src.channels());
+        Mat _dst(src.rows, src.cols, CV_MAKETYPE(DataType<_Tp>::type, src.channels()), dst.data());
+        if (src.type() == _dst.type())
+            src.copyTo(_dst);
+        else
+            src.convertTo(_dst, _dst.type());
+    }
+}
+
+/** @brief Maps cv::Mat data to an Eigen::TensorMap.
+
+The method wraps an existing Mat data array with an Eigen TensorMap of shape (H x W x C) where:
+ H = number of rows
+ W = number of columns
+ C = number of channels
+
+Explicit instantiation of the return type is required.
+
+@note Caller should be aware of the lifetime of the cv::Mat instance and take appropriate safety measures.
+The cv::Mat instance will retain ownership of the data and the Eigen::TensorMap will lose access when the cv::Mat data is deallocated.
+
+The example below initializes a cv::Mat and produces an Eigen::TensorMap:
+\code
+float arr[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
+Mat a_mat(2, 2, CV_32FC3, arr);
+Eigen::TensorMap<Eigen::Tensor<float, 3, Eigen::RowMajor>> a_tensormap = cv2eigen_tensormap<float>(a_mat);
+\endcode
+*/
+template <typename _Tp> static inline
+Eigen::TensorMap<Eigen::Tensor<_Tp, 3, Eigen::RowMajor>> cv2eigen_tensormap(const cv::InputArray &src)
+{
+    Mat mat = src.getMat();
+    CV_CheckTypeEQ(mat.type(), CV_MAKETYPE(traits::Type<_Tp>::value, mat.channels()), "");
+    return Eigen::TensorMap<Eigen::Tensor<_Tp, 3, Eigen::RowMajor>>((_Tp *)mat.data, mat.rows, mat.cols, mat.channels());
+}
+#endif // OPENCV_EIGEN_TENSOR_SUPPORT
+
 template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
 void eigen2cv( const Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& src, OutputArray dst )
 {
--- a/modules/core/src/system.cpp
+++ b/modules/core/src/system.cpp
@ -1888,7 +1888,7 @@ inline size_t parseOption(const std::string &value)
    }
    cv::String valueStr = value.substr(0, pos);
    cv::String suffixStr = value.substr(pos, value.length() - pos);
-    int v = atoi(valueStr.c_str());
+    size_t v = (size_t)std::stoull(valueStr);
    if (suffixStr.length() == 0)
        return v;
    else if (suffixStr == "MB" || suffixStr == "Mb" || suffixStr == "mb")
--- a/modules/core/test/test_mat.cpp
+++ b/modules/core/test/test_mat.cpp
@ -2074,6 +2074,86 @@ TEST(Core_Eigen, eigen2cv_check_Mat_type)
 }
 #endif // HAVE_EIGEN

+#ifdef OPENCV_EIGEN_TENSOR_SUPPORT
+TEST(Core_Eigen, cv2eigen_check_tensor_conversion)
+{
+    Mat A(2, 3, CV_32FC3);
+    float value = 0;
+    for(int row=0; row<A.rows; row++)
+        for(int col=0; col<A.cols; col++)
+            for(int ch=0; ch<A.channels(); ch++)
+                A.at<Vec3f>(row,col)[ch] = value++;
+
+    Eigen::Tensor<float, 3, Eigen::RowMajor> row_tensor;
+    cv2eigen(A, row_tensor);
+
+    float* mat_ptr = (float*)A.data;
+    float* tensor_ptr = row_tensor.data();
+    for (int i=0; i< row_tensor.size(); i++)
+        ASSERT_FLOAT_EQ(mat_ptr[i], tensor_ptr[i]);
+
+    Eigen::Tensor<float, 3, Eigen::ColMajor> col_tensor;
+    cv2eigen(A, col_tensor);
+    value = 0;
+    for(int row=0; row<A.rows; row++)
+        for(int col=0; col<A.cols; col++)
+            for(int ch=0; ch<A.channels(); ch++)
+                ASSERT_FLOAT_EQ(value++, col_tensor(row,col,ch));
+}
+#endif // OPENCV_EIGEN_TENSOR_SUPPORT
+
+#ifdef OPENCV_EIGEN_TENSOR_SUPPORT
+TEST(Core_Eigen, eigen2cv_check_tensor_conversion)
+{
+    Eigen::Tensor<float, 3, Eigen::RowMajor> row_tensor(2,3,3);
+    Eigen::Tensor<float, 3, Eigen::ColMajor> col_tensor(2,3,3);
+    float value = 0;
+    for(int row=0; row<row_tensor.dimension(0); row++)
+        for(int col=0; col<row_tensor.dimension(1); col++)
+            for(int ch=0; ch<row_tensor.dimension(2); ch++)
+            {
+                row_tensor(row,col,ch) = value;
+                col_tensor(row,col,ch) = value;
+                value++;
+            }
+
+    Mat A;
+    eigen2cv(row_tensor, A);
+
+    float* tensor_ptr = row_tensor.data();
+    float* mat_ptr = (float*)A.data;
+    for (int i=0; i< row_tensor.size(); i++)
+        ASSERT_FLOAT_EQ(tensor_ptr[i], mat_ptr[i]);
+
+    Mat B;
+    eigen2cv(col_tensor, B);
+
+    value = 0;
+    for(int row=0; row<B.rows; row++)
+        for(int col=0; col<B.cols; col++)
+            for(int ch=0; ch<B.channels(); ch++)
+                ASSERT_FLOAT_EQ(value++, B.at<Vec3f>(row,col)[ch]);
+}
+#endif // OPENCV_EIGEN_TENSOR_SUPPORT
+
+#ifdef OPENCV_EIGEN_TENSOR_SUPPORT
+TEST(Core_Eigen, cv2eigen_tensormap_check_tensormap_access)
+{
+    float arr[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
+    Mat a_mat(2, 2, CV_32FC3, arr);
+    Eigen::TensorMap<Eigen::Tensor<float, 3, Eigen::RowMajor>> a_tensor = cv2eigen_tensormap<float>(a_mat);
+
+    for(int i=0; i<a_mat.rows; i++) {
+        for (int j=0; j<a_mat.cols; j++) {
+            for (int ch=0; ch<a_mat.channels(); ch++) {
+                ASSERT_FLOAT_EQ(a_mat.at<Vec3f>(i,j)[ch], a_tensor(i,j,ch));
+                ASSERT_EQ(&a_mat.at<Vec3f>(i,j)[ch], &a_tensor(i,j,ch));
+            }
+        }
+    }
+}
+#endif // OPENCV_EIGEN_TENSOR_SUPPORT
+
 TEST(Mat, regression_12943)  // memory usage: ~4.5 Gb
 {
    applyTestTag(CV_TEST_TAG_MEMORY_6GB);
--- a/modules/dnn/perf/perf_net.cpp
+++ b/modules/dnn/perf/perf_net.cpp
@ -197,9 +197,23 @@ PERF_TEST_P_(DNNTestNetwork, YOLOv3)
    if (backend == DNN_BACKEND_HALIDE)
        throw SkipTestException("");
    Mat sample = imread(findDataFile("dnn/dog416.png"));
+    cvtColor(sample, sample, COLOR_BGR2RGB);
    Mat inp;
-    sample.convertTo(inp, CV_32FC3);
-    processNet("dnn/yolov3.weights", "dnn/yolov3.cfg", "", inp / 255);
+    sample.convertTo(inp, CV_32FC3, 1.0f / 255, 0);
+    processNet("dnn/yolov3.weights", "dnn/yolov3.cfg", "", inp);
+}
+
+PERF_TEST_P_(DNNTestNetwork, YOLOv4)
+{
+    if (backend == DNN_BACKEND_HALIDE)
+        throw SkipTestException("");
+    if (target == DNN_TARGET_MYRIAD)
+        throw SkipTestException("");
+    Mat sample = imread(findDataFile("dnn/dog416.png"));
+    cvtColor(sample, sample, COLOR_BGR2RGB);
+    Mat inp;
+    sample.convertTo(inp, CV_32FC3, 1.0f / 255, 0);
+    processNet("dnn/yolov4.weights", "dnn/yolov4.cfg", "", inp);
 }

 PERF_TEST_P_(DNNTestNetwork, EAST_text_detection)
@ -235,6 +249,17 @@ PERF_TEST_P_(DNNTestNetwork, Inception_v2_Faster_RCNN)
               Mat(cv::Size(800, 600), CV_32FC3));
 }

+PERF_TEST_P_(DNNTestNetwork, EfficientDet)
+{
+    if (backend == DNN_BACKEND_HALIDE || target != DNN_TARGET_CPU)
+        throw SkipTestException("");
+    Mat sample = imread(findDataFile("dnn/dog416.png"));
+    resize(sample, sample, Size(512, 512));
+    Mat inp;
+    sample.convertTo(inp, CV_32FC3, 1.0/255);
+    processNet("dnn/efficientdet-d0.pb", "dnn/efficientdet-d0.pbtxt", "", inp);
+}
+
 INSTANTIATE_TEST_CASE_P(/*nothing*/, DNNTestNetwork, dnnBackendsAndTargets());

 } // namespace
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@ -1141,17 +1141,26 @@ static Ptr<BackendWrapper> wrapMat(int backendId, int targetId, cv::Mat& m)

 static int g_networkId = 0;

-struct Net::Impl
+detail::NetImplBase::NetImplBase()
+    : networkId(CV_XADD(&g_networkId, 1))
+    , networkDumpCounter(0)
+    , dumpLevel(DNN_NETWORK_DUMP)
+{
+    // nothing
+}
+
+std::string detail::NetImplBase::getDumpFileNameBase()
+{
+    std::string dumpFileNameBase = cv::format("ocv_dnn_net_%05d_%02d", networkId, networkDumpCounter++);
+    return dumpFileNameBase;
+}
+
+struct Net::Impl : public detail::NetImplBase
 {
    typedef std::map<int, LayerShapes> LayersShapesMap;
    typedef std::map<int, LayerData> MapIdToLayerData;

-    const int networkId; // network global identifier
-    int networkDumpCounter; // dump counter
-
    Impl()
-        : networkId(CV_XADD(&g_networkId, 1))
-        , networkDumpCounter(0)
    {
        //allocate fake net input layer
        netInputLayer = Ptr<DataLayer>(new DataLayer());
@ -1366,7 +1375,7 @@ struct Net::Impl
    {
        CV_TRACE_FUNCTION();

-        if (DNN_NETWORK_DUMP > 0 && networkDumpCounter == 0)
+        if (dumpLevel && networkDumpCounter == 0)
        {
            dumpNetworkToFile();
        }
@ -1470,7 +1479,7 @@ struct Net::Impl

            netWasAllocated = true;

-            if (DNN_NETWORK_DUMP > 0)
+            if (dumpLevel)
            {
                dumpNetworkToFile();
            }
@ -2178,7 +2187,7 @@ struct Net::Impl
                }

                if (net.empty()) {
-                    net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet());
+                    net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(*this));
                }

                if (!fused) {
@ -2222,7 +2231,7 @@ struct Net::Impl
                }
            }
            else {
-                net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet());
+                net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(*this));
            }

            if (!fused)
@ -3406,7 +3415,8 @@ struct Net::Impl
    void dumpNetworkToFile()
    {
 #ifndef OPENCV_DNN_DISABLE_NETWORK_AUTO_DUMP
-        String dumpFileName = cv::format("ocv_dnn_net_%05d_%02d.dot", networkId, networkDumpCounter++);
+        string dumpFileNameBase = getDumpFileNameBase();
+        string dumpFileName = dumpFileNameBase + ".dot";
        try
        {
            string dumpStr = dump();
@ -3465,7 +3475,7 @@ Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNe
    {
        auto fake_node = std::make_shared<ngraph::op::Parameter>(ngraph::element::f32, ngraph::Shape{});
        Ptr<InfEngineNgraphNode> backendNodeNGraph(new InfEngineNgraphNode(fake_node));
-        backendNodeNGraph->net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(ieNet));
+        backendNodeNGraph->net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(*(cvNet.impl), ieNet));
        backendNode = backendNodeNGraph;
    }
    else
--- a/modules/dnn/src/dnn_common.hpp
+++ b/modules/dnn/src/dnn_common.hpp
@ -0,0 +1,34 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef __OPENCV_DNN_COMMON_HPP__
+#define __OPENCV_DNN_COMMON_HPP__
+
+#include <opencv2/dnn.hpp>
+
+namespace cv { namespace dnn {
+CV__DNN_INLINE_NS_BEGIN
+#define IS_DNN_OPENCL_TARGET(id) (id == DNN_TARGET_OPENCL || id == DNN_TARGET_OPENCL_FP16)
+Mutex& getInitializationMutex();
+void initializeLayerFactory();
+
+namespace detail {
+
+struct NetImplBase
+{
+    const int networkId;  // network global identifier
+    int networkDumpCounter;  // dump counter
+    int dumpLevel;  // level of information dumps (initialized through OPENCV_DNN_NETWORK_DUMP parameter)
+
+    NetImplBase();
+
+    std::string getDumpFileNameBase();
+};
+
+}  // namespace detail
+
+CV__DNN_INLINE_NS_END
+}}  // namespace
+
+#endif  // __OPENCV_DNN_COMMON_HPP__
--- a/modules/dnn/src/ie_ngraph.cpp
+++ b/modules/dnn/src/ie_ngraph.cpp
@ -6,6 +6,9 @@
 // Third party copyrights are property of their respective owners.

 #include "precomp.hpp"
+
+#include <fstream>
+
 #include "ie_ngraph.hpp"

 #include <opencv2/dnn/shape_utils.hpp>
@ -22,6 +25,8 @@ namespace cv { namespace dnn {

 #ifdef HAVE_DNN_NGRAPH

+static bool DNN_IE_SERIALIZE = utils::getConfigurationParameterBool("OPENCV_DNN_IE_SERIALIZE", false);
+
 // For networks with input layer which has an empty name, IE generates a name id[some_number].
 // OpenCV lets users use an empty input name and to prevent unexpected naming,
 // we can use some predefined name.
@ -295,13 +300,16 @@ void InfEngineNgraphNode::setName(const std::string& name) {
    node->set_friendly_name(name);
 }

-InfEngineNgraphNet::InfEngineNgraphNet()
+InfEngineNgraphNet::InfEngineNgraphNet(detail::NetImplBase& netImpl)
+    : netImpl_(netImpl)
 {
    hasNetOwner = false;
    device_name = "CPU";
 }

-InfEngineNgraphNet::InfEngineNgraphNet(InferenceEngine::CNNNetwork& net) : cnn(net)
+InfEngineNgraphNet::InfEngineNgraphNet(detail::NetImplBase& netImpl, InferenceEngine::CNNNetwork& net)
+    : netImpl_(netImpl)
+    , cnn(net)
 {
    hasNetOwner = true;
    device_name = "CPU";
@ -440,9 +448,27 @@ void InfEngineNgraphNet::init(Target targetId)
            ngraph_function->validate_nodes_and_infer_types();
        }
        cnn = InferenceEngine::CNNNetwork(ngraph_function);
-#ifdef _DEBUG  // TODO
-        //cnn.serialize("/tmp/cnn.xml", "/tmp/cnn.bin");
+
+        if (DNN_IE_SERIALIZE)
+        {
+#ifndef OPENCV_DNN_DISABLE_NETWORK_AUTO_DUMP
+            std::string dumpFileNameBase = netImpl_.getDumpFileNameBase();
+            try
+            {
+                cnn.serialize(dumpFileNameBase + "_ngraph.xml", dumpFileNameBase + "_ngraph.bin");
+            }
+            catch (const std::exception& e)
+            {
+                std::ofstream out((dumpFileNameBase + "_ngraph.error").c_str(), std::ios::out);
+                out << "Exception: " << e.what() << std::endl;
+            }
+            catch (...)
+            {
+                std::ofstream out((dumpFileNameBase + "_ngraph.error").c_str(), std::ios::out);
+                out << "Can't dump: unknown exception" << std::endl;
+            }
 #endif
+        }
    }

    switch (targetId)
--- a/modules/dnn/src/ie_ngraph.hpp
+++ b/modules/dnn/src/ie_ngraph.hpp
@ -34,8 +34,8 @@ class InfEngineNgraphNode;
 class InfEngineNgraphNet
 {
 public:
-    InfEngineNgraphNet();
-    InfEngineNgraphNet(InferenceEngine::CNNNetwork& net);
+    InfEngineNgraphNet(detail::NetImplBase& netImpl);
+    InfEngineNgraphNet(detail::NetImplBase& netImpl, InferenceEngine::CNNNetwork& net);

    void addOutput(const std::string& name);

@ -55,6 +55,8 @@ public:

    void reset();
 private:
+    detail::NetImplBase& netImpl_;
+
    void release();
    int getNumComponents();
    void dfs(std::shared_ptr<ngraph::Node>& node, std::vector<std::shared_ptr<ngraph::Node>>& comp,
--- a/modules/dnn/src/layers/normalize_bbox_layer.cpp
+++ b/modules/dnn/src/layers/normalize_bbox_layer.cpp
@ -354,7 +354,7 @@ public:
            weight = std::make_shared<ngraph::op::Constant>(
                                      ngraph::element::f32, ngraph::Shape(shape), blobs[0].data);
        }
-        auto mul = std::make_shared<ngraph::op::v1::Multiply>(norm, weight, ngraph::op::AutoBroadcastType::NUMPY);
+        auto mul = std::make_shared<ngraph::op::v0::Multiply>(norm, weight, ngraph::op::AutoBroadcastType::NUMPY);
        return Ptr<BackendNode>(new InfEngineNgraphNode(mul));
    }
 #endif  // HAVE_DNN_NGRAPH
--- a/modules/dnn/src/precomp.hpp
+++ b/modules/dnn/src/precomp.hpp
@ -73,11 +73,4 @@
 #include <opencv2/dnn.hpp>
 #include <opencv2/dnn/all_layers.hpp>

-
-namespace cv { namespace dnn {
-CV__DNN_INLINE_NS_BEGIN
-#define IS_DNN_OPENCL_TARGET(id) (id == DNN_TARGET_OPENCL || id == DNN_TARGET_OPENCL_FP16)
-Mutex& getInitializationMutex();
-void initializeLayerFactory();
-CV__DNN_INLINE_NS_END
-}} // namespace
+#include "dnn_common.hpp"
--- a/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp
+++ b/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp
@ -725,6 +725,21 @@ private:
    bool negativeScales;
 };

+class ClipByValueSubgraph : public TFSubgraph
+{
+public:
+    ClipByValueSubgraph()
+    {
+        int input = addNodeToMatch("");
+        int maxValue = addNodeToMatch("Const");
+        int minimum = addNodeToMatch("Minimum", input, maxValue);
+        int minValue = addNodeToMatch("Const");
+        addNodeToMatch("Maximum", minimum, minValue);
+
+        setFusedNode("ClipByValue", input, minValue, maxValue);
+    }
+};
+
 void simplifySubgraphs(tensorflow::GraphDef& net)
 {
    std::vector<Ptr<Subgraph> > subgraphs;
@ -749,6 +764,7 @@ void simplifySubgraphs(tensorflow::GraphDef& net)
    subgraphs.push_back(Ptr<Subgraph>(new PReLUSubgraph(false)));
    subgraphs.push_back(Ptr<Subgraph>(new FlattenProdSubgraph()));
    subgraphs.push_back(Ptr<Subgraph>(new ResizeBilinearSubgraphDown()));
+    subgraphs.push_back(Ptr<Subgraph>(new ClipByValueSubgraph()));

    for (int i = 0; i < net.node_size(); ++i)
    {
--- a/modules/dnn/src/tensorflow/tf_importer.cpp
+++ b/modules/dnn/src/tensorflow/tf_importer.cpp
@ -1542,22 +1542,32 @@ void TFImporter::populateNet(Net dstNet)

            connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
        }
-        else if (type == "Mul")
+        else if (type == "Mul" || type == "RealDiv")
        {
-            bool haveConst = false;
-            for(int ii = 0; !haveConst && ii < layer.input_size(); ++ii)
+            int constId = -1;
+            for(int ii = 0; ii < layer.input_size(); ++ii)
            {
                Pin input = parsePin(layer.input(ii));
-                haveConst = value_id.find(input.name) != value_id.end();
+                if (value_id.find(input.name) != value_id.end())
+                {
+                    constId = ii;
+                    break;
+                }
            }
-            CV_Assert(!haveConst || layer.input_size() == 2);
+            CV_Assert((constId != -1) || (layer.input_size() == 2));

-            if (haveConst)
+            if (constId != -1)
            {
                // Multiplication by constant.
                CV_Assert(layer.input_size() == 2);
                Mat scaleMat = getTensorContent(getConstBlob(layer, value_id));
                CV_Assert(scaleMat.type() == CV_32FC1);
+                if (type == "RealDiv")
+                {
+                    if (constId == 0)
+                        CV_Error(Error::StsNotImplemented, "Division of constant over variable");
+                    scaleMat = 1.0f / scaleMat;
+                }

                int id;
                if (scaleMat.total() == 1)  // is a scalar.
@ -1659,11 +1669,15 @@ void TFImporter::populateNet(Net dstNet)
                int id;
                if (equalInpShapes || netInputShapes.empty())
                {
-                    layerParams.set("operation", "prod");
+                    layerParams.set("operation", type == "RealDiv" ? "div" : "prod");
                    id = dstNet.addLayer(name, "Eltwise", layerParams);
                }
                else
+                {
+                    if (type == "RealDiv")
+                        CV_Error(Error::StsNotImplemented, "Division of non equal tensors");
                    id = dstNet.addLayer(name, "Scale", layerParams);
+                }

                layer_id[name] = id;

--- a/modules/dnn/test/test_darknet_importer.cpp
+++ b/modules/dnn/test/test_darknet_importer.cpp
@ -245,6 +245,13 @@ public:
                nms_boxes.push_back(box);
                nms_confidences.push_back(conf);
                nms_classIds.push_back(class_id);
+#if 0  // use to update test reference data
+                std::cout << b << ", " << class_id << ", " << conf << "f, "
+                          << box.x << "f, " << box.y << "f, "
+                          << box.x + box.width << "f, " << box.y + box.height << "f,"
+                          << std::endl;
+#endif
+
            }

            normAssertDetections(refClassIds[b], refConfidences[b], refBoxes[b], nms_classIds,
@ -413,6 +420,9 @@ TEST_P(Test_Darknet_nets_async, Accuracy)

    std::string prefix = get<0>(GetParam());

+    if (targetId == DNN_TARGET_MYRIAD && prefix == "yolov4")  // NC_OUT_OF_MEMORY
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
+
    if (backendId != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
        throw SkipTestException("No support for async forward");

@ -457,7 +467,7 @@ TEST_P(Test_Darknet_nets_async, Accuracy)
 }

 INSTANTIATE_TEST_CASE_P(/**/, Test_Darknet_nets_async, Combine(
-    Values("yolo-voc", "tiny-yolo-voc", "yolov3"),
+    Values("yolo-voc", "tiny-yolo-voc", "yolov3", "yolov4"),
    dnnBackendsAndTargets()
 ));

@ -471,15 +481,21 @@ TEST_P(Test_Darknet_nets, YOLOv3)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);

    // batchId, classId, confidence, left, top, right, bottom
-    Mat ref = (Mat_<float>(9, 7) << 0, 7,  0.952983f, 0.614622f, 0.150257f, 0.901369f, 0.289251f,  // a truck
-                                    0, 1,  0.987908f, 0.150913f, 0.221933f, 0.742255f, 0.74626f,   // a bicycle
-                                    0, 16, 0.998836f, 0.160024f, 0.389964f, 0.417885f, 0.943716f,  // a dog (COCO)
-                                    1, 9,  0.384801f, 0.659824f, 0.372389f, 0.673926f, 0.429412f,  // a traffic light
-                                    1, 9,  0.733283f, 0.376029f, 0.315694f, 0.401776f, 0.395165f,  // a traffic light
-                                    1, 9,  0.785352f, 0.665503f, 0.373543f, 0.688893f, 0.439245f,  // a traffic light
-                                    1, 0,  0.980052f, 0.195856f, 0.378454f, 0.258626f, 0.629258f,  // a person
-                                    1, 2,  0.989633f, 0.450719f, 0.463353f, 0.496305f, 0.522258f,  // a car
-                                    1, 2,  0.997412f, 0.647584f, 0.459939f, 0.821038f, 0.663947f); // a car
+    const int N0 = 3;
+    const int N1 = 6;
+    static const float ref_[/* (N0 + N1) * 7 */] = {
+0, 16, 0.998836f, 0.160024f, 0.389964f, 0.417885f, 0.943716f,
+0, 1, 0.987908f, 0.150913f, 0.221933f, 0.742255f, 0.746261f,
+0, 7, 0.952983f, 0.614621f, 0.150257f, 0.901368f, 0.289251f,
+
+1, 2, 0.997412f, 0.647584f, 0.459939f, 0.821037f, 0.663947f,
+1, 2, 0.989633f, 0.450719f, 0.463353f, 0.496306f, 0.522258f,
+1, 0, 0.980053f, 0.195856f, 0.378454f, 0.258626f, 0.629257f,
+1, 9, 0.785341f, 0.665503f, 0.373543f, 0.688893f, 0.439244f,
+1, 9, 0.733275f, 0.376029f, 0.315694f, 0.401776f, 0.395165f,
+1, 9, 0.384815f, 0.659824f, 0.372389f, 0.673927f, 0.429412f,
+    };
+    Mat ref(N0 + N1, 7, CV_32FC1, (void*)ref_);

    double scoreDiff = 8e-5, iouDiff = 3e-4;
    if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD)
@ -506,8 +522,8 @@ TEST_P(Test_Darknet_nets, YOLOv3)
 #endif

    {
-    SCOPED_TRACE("batch size 1");
-    testDarknetModel(config_file, weights_file, ref.rowRange(0, 3), scoreDiff, iouDiff);
+        SCOPED_TRACE("batch size 1");
+        testDarknetModel(config_file, weights_file, ref.rowRange(0, N0), scoreDiff, iouDiff);
    }

 #if defined(INF_ENGINE_RELEASE)
@ -529,6 +545,75 @@ TEST_P(Test_Darknet_nets, YOLOv3)
    }
 }

+TEST_P(Test_Darknet_nets, YOLOv4)
+{
+    applyTestTag(CV_TEST_TAG_LONG, (target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_1GB : CV_TEST_TAG_MEMORY_2GB));
+
+#if defined(INF_ENGINE_RELEASE)
+    if (target == DNN_TARGET_MYRIAD)  // NC_OUT_OF_MEMORY
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
+#endif
+
+    // batchId, classId, confidence, left, top, right, bottom
+    const int N0 = 3;
+    const int N1 = 7;
+    static const float ref_[/* (N0 + N1) * 7 */] = {
+0, 16, 0.992194f, 0.172375f, 0.402458f, 0.403918f, 0.932801f,
+0, 1, 0.988326f, 0.166708f, 0.228236f, 0.737208f, 0.735803f,
+0, 7, 0.94639f, 0.602523f, 0.130399f, 0.901623f, 0.298452f,
+
+1, 2, 0.99761f, 0.646556f, 0.45985f, 0.816041f, 0.659067f,
+1, 0, 0.988913f, 0.201726f, 0.360282f, 0.266181f, 0.631728f,
+1, 2, 0.98233f, 0.452007f, 0.462217f, 0.495612f, 0.521687f,
+1, 9, 0.919195f, 0.374642f, 0.316524f, 0.398126f, 0.393714f,
+1, 9, 0.856303f, 0.666842f, 0.372215f, 0.685539f, 0.44141f,
+1, 9, 0.313516f, 0.656791f, 0.374734f, 0.671959f, 0.438371f,
+1, 9, 0.256625f, 0.940232f, 0.326931f, 0.967586f, 0.374002f,
+    };
+    Mat ref(N0 + N1, 7, CV_32FC1, (void*)ref_);
+
+    double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.006 : 8e-5;
+    double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.042 : 3e-4;
+
+    std::string config_file = "yolov4.cfg";
+    std::string weights_file = "yolov4.weights";
+
+#if defined(INF_ENGINE_RELEASE)
+    if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
+         backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_MYRIAD &&
+        getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
+    {
+        scoreDiff = 0.04;
+        iouDiff = 0.2;
+    }
+#endif
+
+    {
+        SCOPED_TRACE("batch size 1");
+        testDarknetModel(config_file, weights_file, ref.rowRange(0, N0), scoreDiff, iouDiff);
+    }
+
+    {
+        SCOPED_TRACE("batch size 2");
+
+#if defined(INF_ENGINE_RELEASE)
+        if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
+        {
+            if (target == DNN_TARGET_OPENCL)
+                applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
+            else if (target == DNN_TARGET_OPENCL_FP16 && INF_ENGINE_VER_MAJOR_LE(202010000))
+                applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
+            else if (target == DNN_TARGET_MYRIAD &&
+                     getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
+                applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
+        }
+#endif
+
+        testDarknetModel(config_file, weights_file, ref, scoreDiff, iouDiff);
+    }
+}
+
+
 INSTANTIATE_TEST_CASE_P(/**/, Test_Darknet_nets, dnnBackendsAndTargets());

 TEST_P(Test_Darknet_layers, shortcut)
--- a/modules/dnn/test/test_tf_importer.cpp
+++ b/modules/dnn/test/test_tf_importer.cpp
@ -1056,6 +1056,11 @@ TEST_P(Test_TensorFlow_layers, tf2_dense)
    runTensorFlowNet("tf2_dense");
 }

+TEST_P(Test_TensorFlow_layers, clip_by_value)
+{
+    runTensorFlowNet("clip_by_value");
+}
+
 TEST_P(Test_TensorFlow_layers, tf2_prelu)
 {
    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
@ -1205,4 +1210,37 @@ TEST_P(Test_TensorFlow_nets, Mask_RCNN)
        expectNoFallbacks(net);
 }

+TEST_P(Test_TensorFlow_nets, EfficientDet)
+{
+    if (target != DNN_TARGET_CPU)
+    {
+        if (target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
+        if (target == DNN_TARGET_OPENCL)      applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
+        if (target == DNN_TARGET_MYRIAD)      applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
+    }
+    checkBackend();
+    std::string proto = findDataFile("dnn/efficientdet-d0.pbtxt");
+    std::string model = findDataFile("dnn/efficientdet-d0.pb");
+
+    Net net = readNetFromTensorflow(model, proto);
+    Mat img = imread(findDataFile("dnn/dog416.png"));
+    Mat blob = blobFromImage(img, 1.0/255, Size(512, 512), Scalar(123.675, 116.28, 103.53));
+
+    net.setPreferableBackend(backend);
+    net.setPreferableTarget(target);
+    net.setInput(blob);
+    // Output has shape 1x1xNx7 where N - number of detections.
+    // An every detection is a vector of values [id, classId, confidence, left, top, right, bottom]
+    Mat out = net.forward();
+
+    // References are from test for TensorFlow model.
+    Mat ref = (Mat_<float>(3, 7) << 0, 1, 0.8437444, 0.153996080160141, 0.20534580945968628, 0.7463544607162476, 0.7414066195487976,
+                                    0, 17, 0.8245924, 0.16657517850399017, 0.3996818959712982, 0.4111558794975281, 0.9306337833404541,
+                                    0, 7, 0.8039304, 0.6118435263633728, 0.13175517320632935, 0.9065558314323425, 0.2943994700908661);
+    double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 4e-3 : 1e-5;
+    double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 2e-3 : 1e-4;
+    normAssertDetections(ref, out, "", 0.5, scoreDiff, iouDiff);
+    expectNoFallbacksFromIE(net);
+}
+
 }
--- a/modules/imgproc/include/opencv2/imgproc.hpp
+++ b/modules/imgproc/include/opencv2/imgproc.hpp
@ -1545,7 +1545,7 @@ The function smooths an image using the kernel:

 \f[\texttt{K} =  \frac{1}{\texttt{ksize.width*ksize.height}} \begin{bmatrix} 1 & 1 & 1 &  \cdots & 1 & 1  \\ 1 & 1 & 1 &  \cdots & 1 & 1  \\ \hdotsfor{6} \\ 1 & 1 & 1 &  \cdots & 1 & 1  \\ \end{bmatrix}\f]

-The call `blur(src, dst, ksize, anchor, borderType)` is equivalent to `boxFilter(src, dst, src.type(),
+The call `blur(src, dst, ksize, anchor, borderType)` is equivalent to `boxFilter(src, dst, src.type(), ksize,
 anchor, true, borderType)`.

@param src input image; it can have any number of channels, which are processed independently, but
--- a/modules/imgproc/src/drawing.cpp
+++ b/modules/imgproc/src/drawing.cpp
@ -949,6 +949,7 @@ void ellipse2Poly( Point2d center, Size2d axes, int angle,
                   int delta, std::vector<Point2d>& pts )
 {
    CV_INSTRUMENT_REGION();
+    CV_Assert(0 < delta && delta <= 180);

    float alpha, beta;
    int i;
@ -2378,7 +2379,9 @@ void cv::fillPoly(InputOutputArray img, InputArrayOfArrays pts,
 {
    CV_INSTRUMENT_REGION();

-    int i, ncontours = (int)pts.total();
+    bool manyContours = pts.kind() == _InputArray::STD_VECTOR_VECTOR ||
+                        pts.kind() == _InputArray::STD_VECTOR_MAT;
+    int i, ncontours = manyContours ? (int)pts.total() : 1;
    if( ncontours == 0 )
        return;
    AutoBuffer<Point*> _ptsptr(ncontours);
@ -2388,7 +2391,7 @@ void cv::fillPoly(InputOutputArray img, InputArrayOfArrays pts,

    for( i = 0; i < ncontours; i++ )
    {
-        Mat p = pts.getMat(i);
+        Mat p = pts.getMat(manyContours ? i : -1);
        CV_Assert(p.checkVector(2, CV_32S) >= 0);
        ptsptr[i] = p.ptr<Point>();
        npts[i] = p.rows*p.cols*p.channels()/2;
--- a/modules/imgproc/test/test_drawing.cpp
+++ b/modules/imgproc/test/test_drawing.cpp
@ -641,4 +641,42 @@ TEST(Drawing, regression_16308)
    EXPECT_NE(0, (int)img.at<uchar>(99, 50));
 }

+TEST(Drawing, fillpoly_circle)
+{
+    Mat img_c(640, 480, CV_8UC3, Scalar::all(0));
+    Mat img_fp = img_c.clone(), img_fcp = img_c.clone(), img_fp3 = img_c.clone();
+
+    Point center1(img_c.cols/2, img_c.rows/2);
+    Point center2(img_c.cols/10, img_c.rows*3/4);
+    Point center3 = Point(img_c.cols, img_c.rows) - center2;
+    int radius = img_c.rows/4;
+    int radius_small = img_c.cols/15;
+    Scalar color(0, 0, 255);
+
+    circle(img_c, center1, radius, color, -1);
+
+    // check that circle, fillConvexPoly and fillPoly
+    // give almost the same result then asked to draw a single circle
+    vector<Point> vtx;
+    ellipse2Poly(center1, Size(radius, radius), 0, 0, 360, 1, vtx);
+    fillConvexPoly(img_fcp, vtx, color);
+    fillPoly(img_fp, vtx, color);
+    double diff_fp = cv::norm(img_c, img_fp, NORM_L1)/(255*radius*2*CV_PI);
+    double diff_fcp = cv::norm(img_c, img_fcp, NORM_L1)/(255*radius*2*CV_PI);
+    EXPECT_LT(diff_fp, 1.);
+    EXPECT_LT(diff_fcp, 1.);
+
+    // check that fillPoly can draw 3 disjoint circles at once
+    circle(img_c, center2, radius_small, color, -1);
+    circle(img_c, center3, radius_small, color, -1);
+
+    vector<vector<Point> > vtx3(3);
+    vtx3[0] = vtx;
+    ellipse2Poly(center2, Size(radius_small, radius_small), 0, 0, 360, 1, vtx3[1]);
+    ellipse2Poly(center3, Size(radius_small, radius_small), 0, 0, 360, 1, vtx3[2]);
+    fillPoly(img_fp3, vtx3, color);
+    double diff_fp3 = cv::norm(img_c, img_fp3, NORM_L1)/(255*(radius+radius_small*2)*2*CV_PI);
+    EXPECT_LT(diff_fp3, 1.);
+}
+
 }} // namespace
--- a/modules/videoio/src/cap_msmf.cpp
+++ b/modules/videoio/src/cap_msmf.cpp
@ -492,12 +492,14 @@ public:
            }
        }
    }
-    std::pair<MediaID, MediaType> findBest(const MediaType& newType)
+    std::pair<MediaID, MediaType> findBestVideoFormat(const MediaType& newType)
    {
        std::pair<MediaID, MediaType> best;
        std::map<MediaID, MediaType>::const_iterator i = formats.begin();
        for (; i != formats.end(); ++i)
        {
+            if (i->second.majorType != MFMediaType_Video)
+                continue;
            if (newType.isEmpty()) // file input - choose first returned media type
            {
                best = *i;
@ -775,7 +777,12 @@ bool CvCapture_MSMF::configureOutput(MediaType newType, cv::uint32_t outFormat)
 {
    FormatStorage formats;
    formats.read(videoFileSource.Get());
-    std::pair<FormatStorage::MediaID, MediaType> bestMatch = formats.findBest(newType);
+    std::pair<FormatStorage::MediaID, MediaType> bestMatch = formats.findBestVideoFormat(newType);
+    if (bestMatch.second.isEmpty())
+    {
+        CV_LOG_DEBUG(NULL, "Can not find video stream with requested parameters");
+        return false;
+    }
    dwStreamIndex = bestMatch.first.stream;
    nativeFormat = bestMatch.second;
    MediaType newFormat = nativeFormat;
--- a/samples/dnn/mask_rcnn.py
+++ b/samples/dnn/mask_rcnn.py
@ -43,7 +43,7 @@ def showLegend(classes):
        for i in range(len(classes)):
            block = legend[i * blockHeight:(i + 1) * blockHeight]
            block[:,:] = colors[i]
-            cv.putText(block, classes[i], (0, blockHeight/2), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255))
+            cv.putText(block, classes[i], (0, blockHeight//2), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255))

        cv.namedWindow('Legend', cv.WINDOW_NORMAL)
        cv.imshow('Legend', legend)
--- a/samples/dnn/object_detection.cpp
+++ b/samples/dnn/object_detection.cpp
@ -45,7 +45,7 @@ std::vector<std::string> classes;
 inline void preprocess(const Mat& frame, Net& net, Size inpSize, float scale,
                       const Scalar& mean, bool swapRB);

-void postprocess(Mat& frame, const std::vector<Mat>& out, Net& net);
+void postprocess(Mat& frame, const std::vector<Mat>& out, Net& net, int backend);

 void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame);

@ -148,7 +148,8 @@ int main(int argc, char** argv)

    // Load a model.
    Net net = readNet(modelPath, configPath, parser.get<String>("framework"));
-    net.setPreferableBackend(parser.get<int>("backend"));
+    int backend = parser.get<int>("backend");
+    net.setPreferableBackend(backend);
    net.setPreferableTarget(parser.get<int>("target"));
    std::vector<String> outNames = net.getUnconnectedOutLayersNames();

@ -245,7 +246,7 @@ int main(int argc, char** argv)
        std::vector<Mat> outs = predictionsQueue.get();
        Mat frame = processedFramesQueue.get();

-        postprocess(frame, outs, net);
+        postprocess(frame, outs, net, backend);

        if (predictionsQueue.counter > 1)
        {
@ -285,7 +286,7 @@ int main(int argc, char** argv)
        std::vector<Mat> outs;
        net.forward(outs, outNames);

-        postprocess(frame, outs, net);
+        postprocess(frame, outs, net, backend);

        // Put efficiency information.
        std::vector<double> layersTimes;
@ -319,7 +320,7 @@ inline void preprocess(const Mat& frame, Net& net, Size inpSize, float scale,
    }
 }

-void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net)
+void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net, int backend)
 {
    static std::vector<int> outLayers = net.getUnconnectedOutLayers();
    static std::string outLayerType = net.getLayer(outLayers[0])->type;
@ -396,11 +397,48 @@ void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net)
    else
        CV_Error(Error::StsNotImplemented, "Unknown output layer type: " + outLayerType);

-    std::vector<int> indices;
-    NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
-    for (size_t i = 0; i < indices.size(); ++i)
+    // NMS is used inside Region layer only on DNN_BACKEND_OPENCV for another backends we need NMS in sample
+    // or NMS is required if number of outputs > 1
+    if (outLayers.size() > 1 || (outLayerType == "Region" && backend != DNN_BACKEND_OPENCV))
+    {
+        std::map<int, std::vector<size_t> > class2indices;
+        for (size_t i = 0; i < classIds.size(); i++)
+        {
+            if (confidences[i] >= confThreshold)
+            {
+                class2indices[classIds[i]].push_back(i);
+            }
+        }
+        std::vector<Rect> nmsBoxes;
+        std::vector<float> nmsConfidences;
+        std::vector<int> nmsClassIds;
+        for (std::map<int, std::vector<size_t> >::iterator it = class2indices.begin(); it != class2indices.end(); ++it)
+        {
+            std::vector<Rect> localBoxes;
+            std::vector<float> localConfidences;
+            std::vector<size_t> classIndices = it->second;
+            for (size_t i = 0; i < classIndices.size(); i++)
+            {
+                localBoxes.push_back(boxes[classIndices[i]]);
+                localConfidences.push_back(confidences[classIndices[i]]);
+            }
+            std::vector<int> nmsIndices;
+            NMSBoxes(localBoxes, localConfidences, confThreshold, nmsThreshold, nmsIndices);
+            for (size_t i = 0; i < nmsIndices.size(); i++)
+            {
+                size_t idx = nmsIndices[i];
+                nmsBoxes.push_back(localBoxes[idx]);
+                nmsConfidences.push_back(localConfidences[idx]);
+                nmsClassIds.push_back(it->first);
+            }
+        }
+        boxes = nmsBoxes;
+        classIds = nmsClassIds;
+        confidences = nmsConfidences;
+    }
+
+    for (size_t idx = 0; idx < boxes.size(); ++idx)
    {
-        int idx = indices[i];
        Rect box = boxes[idx];
        drawPred(classIds[idx], confidences[idx], box.x, box.y,
                 box.x + box.width, box.y + box.height, frame);
--- a/samples/dnn/object_detection.py
+++ b/samples/dnn/object_detection.py
@ -141,9 +141,6 @@ def postprocess(frame, outs):
        # Network produces output blob with a shape NxC where N is a number of
        # detected objects and C is a number of classes + 4 where the first 4
        # numbers are [center_x, center_y, width, height]
-        classIds = []
-        confidences = []
-        boxes = []
        for out in outs:
            for detection in out:
                scores = detection[5:]
@ -163,9 +160,25 @@ def postprocess(frame, outs):
        print('Unknown output layer type: ' + lastLayer.type)
        exit()

-    indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)
+    # NMS is used inside Region layer only on DNN_BACKEND_OPENCV for another backends we need NMS in sample
+    # or NMS is required if number of outputs > 1
+    if len(outNames) > 1 or lastLayer.type == 'Region' and args.backend != cv.dnn.DNN_BACKEND_OPENCV:
+        indices = []
+        classIds = np.array(classIds)
+        boxes = np.array(boxes)
+        confidences = np.array(confidences)
+        unique_classes = set(classIds)
+        for cl in unique_classes:
+            class_indices = np.where(classIds == cl)[0]
+            conf = confidences[class_indices]
+            box  = boxes[class_indices].tolist()
+            nms_indices = cv.dnn.NMSBoxes(box, conf, confThreshold, nmsThreshold)
+            nms_indices = nms_indices[:, 0] if len(nms_indices) else []
+            indices.extend(class_indices[nms_indices])
+    else:
+        indices = np.arange(0, len(classIds))
+
    for i in indices:
-        i = i[0]
        box = boxes[i]
        left = box[0]
        top = box[1]
--- a/samples/dnn/segmentation.py
+++ b/samples/dnn/segmentation.py
@ -65,7 +65,7 @@ def showLegend(classes):
        for i in range(len(classes)):
            block = legend[i * blockHeight:(i + 1) * blockHeight]
            block[:,:] = colors[i]
-            cv.putText(block, classes[i], (0, blockHeight/2), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255))
+            cv.putText(block, classes[i], (0, blockHeight//2), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255))

        cv.namedWindow('Legend', cv.WINDOW_NORMAL)
        cv.imshow('Legend', legend)
@ -76,7 +76,7 @@ net = cv.dnn.readNet(args.model, args.config, args.framework)
 net.setPreferableBackend(args.backend)
 net.setPreferableTarget(args.target)

-winName = 'Deep learning image classification in OpenCV'
+winName = 'Deep learning semantic segmentation in OpenCV'
 cv.namedWindow(winName, cv.WINDOW_NORMAL)

 cap = cv.VideoCapture(args.input if args.input else 0)
--- a/samples/dnn/tf_text_graph_common.py
+++ b/samples/dnn/tf_text_graph_common.py
@ -269,7 +269,7 @@ def parseTextGraph(filePath):
 def removeIdentity(graph_def):
    identities = {}
    for node in graph_def.node:
-        if node.op == 'Identity':
+        if node.op == 'Identity' or node.op == 'IdentityN':
            identities[node.name] = node.input[0]
            graph_def.node.remove(node)

--- a/samples/dnn/tf_text_graph_efficientdet.py
+++ b/samples/dnn/tf_text_graph_efficientdet.py
@ -0,0 +1,236 @@
+# This file is a part of OpenCV project.
+# It is a subject to the license terms in the LICENSE file found in the top-level directory
+# of this distribution and at http://opencv.org/license.html.
+#
+# Copyright (C) 2020, Intel Corporation, all rights reserved.
+# Third party copyrights are property of their respective owners.
+#
+# Use this script to get the text graph representation (.pbtxt) of EfficientDet
+# deep learning network trained in https://github.com/google/automl.
+# Then you can import it with a binary frozen graph (.pb) using readNetFromTensorflow() function.
+# See details and examples on the following wiki page: https://github.com/opencv/opencv/wiki/TensorFlow-Object-Detection-API
+import argparse
+import re
+from math import sqrt
+from tf_text_graph_common import *
+
+
+class AnchorGenerator:
+    def __init__(self, min_level, aspect_ratios, num_scales, anchor_scale):
+        self.min_level = min_level
+        self.aspect_ratios = aspect_ratios
+        self.anchor_scale = anchor_scale
+        self.scales = [2**(float(s) / num_scales) for s in range(num_scales)]
+
+    def get(self, layer_id):
+        widths = []
+        heights = []
+        for s in self.scales:
+            for a in self.aspect_ratios:
+                base_anchor_size = 2**(self.min_level + layer_id) * self.anchor_scale
+                heights.append(base_anchor_size * s * a[1])
+                widths.append(base_anchor_size * s * a[0])
+        return widths, heights
+
+
+def createGraph(modelPath, outputPath, min_level, aspect_ratios, num_scales,
+                anchor_scale, num_classes, image_width, image_height):
+    print('Min level: %d' % min_level)
+    print('Anchor scale: %f' % anchor_scale)
+    print('Num scales: %d' % num_scales)
+    print('Aspect ratios: %s' % str(aspect_ratios))
+    print('Number of classes: %d' % num_classes)
+    print('Input image size: %dx%d' % (image_width, image_height))
+
+    # Read the graph.
+    _inpNames = ['image_arrays']
+    outNames = ['detections']
+
+    writeTextGraph(modelPath, outputPath, outNames)
+    graph_def = parseTextGraph(outputPath)
+
+    def getUnconnectedNodes():
+        unconnected = []
+        for node in graph_def.node:
+            if node.op == 'Const':
+                continue
+            unconnected.append(node.name)
+            for inp in node.input:
+                if inp in unconnected:
+                    unconnected.remove(inp)
+        return unconnected
+
+
+    nodesToKeep = ['truediv']  # Keep preprocessing nodes
+
+    removeIdentity(graph_def)
+
+    scopesToKeep = ('image_arrays', 'efficientnet', 'resample_p6', 'resample_p7',
+                    'fpn_cells', 'class_net', 'box_net', 'Reshape', 'concat')
+
+    addConstNode('scale_w', [2.0], graph_def)
+    addConstNode('scale_h', [2.0], graph_def)
+    nodesToKeep += ['scale_w', 'scale_h']
+
+    for node in graph_def.node:
+        if re.match('efficientnet-(.*)/blocks_\d+/se/mul_1', node.name):
+            node.input[0], node.input[1] = node.input[1], node.input[0]
+
+        if re.match('fpn_cells/cell_\d+/fnode\d+/resample(.*)/nearest_upsampling/Reshape_1$', node.name):
+            node.op = 'ResizeNearestNeighbor'
+            node.input[1] = 'scale_w'
+            node.input.append('scale_h')
+
+            for inpNode in graph_def.node:
+                if inpNode.name == node.name[:node.name.rfind('_')]:
+                    node.input[0] = inpNode.input[0]
+
+        if re.match('box_net/box-predict(_\d)*/separable_conv2d$', node.name):
+            node.addAttr('loc_pred_transposed', True)
+
+        # Replace RealDiv to Mul with inversed scale for compatibility
+        if node.op == 'RealDiv':
+            for inpNode in graph_def.node:
+                if inpNode.name != node.input[1] or not 'value' in inpNode.attr:
+                    continue
+
+                tensor = inpNode.attr['value']['tensor'][0]
+                if not 'float_val' in tensor:
+                    continue
+                scale = float(inpNode.attr['value']['tensor'][0]['float_val'][0])
+
+                addConstNode(inpNode.name + '/inv', [1.0 / scale], graph_def)
+                nodesToKeep.append(inpNode.name + '/inv')
+                node.input[1] = inpNode.name + '/inv'
+                node.op = 'Mul'
+                break
+
+
+    def to_remove(name, op):
+        if name in nodesToKeep:
+            return False
+        return op == 'Const' or not name.startswith(scopesToKeep)
+
+    removeUnusedNodesAndAttrs(to_remove, graph_def)
+
+    # Attach unconnected preprocessing
+    assert(graph_def.node[1].name == 'truediv' and graph_def.node[1].op == 'RealDiv')
+    graph_def.node[1].input.insert(0, 'image_arrays')
+    graph_def.node[2].input.insert(0, 'truediv')
+
+    priors_generator = AnchorGenerator(min_level, aspect_ratios, num_scales, anchor_scale)
+    priorBoxes = []
+    for i in range(5):
+        inpName = ''
+        for node in graph_def.node:
+            if node.name == 'Reshape_%d' % (i * 2 + 1):
+                inpName = node.input[0]
+                break
+
+        priorBox = NodeDef()
+        priorBox.name = 'PriorBox_%d' % i
+        priorBox.op = 'PriorBox'
+        priorBox.input.append(inpName)
+        priorBox.input.append(graph_def.node[0].name)  # image_tensor
+
+        priorBox.addAttr('flip', False)
+        priorBox.addAttr('clip', False)
+
+        widths, heights = priors_generator.get(i)
+
+        priorBox.addAttr('width', widths)
+        priorBox.addAttr('height', heights)
+        priorBox.addAttr('variance', [1.0, 1.0, 1.0, 1.0])
+
+        graph_def.node.extend([priorBox])
+        priorBoxes.append(priorBox.name)
+
+    addConstNode('concat/axis_flatten', [-1], graph_def)
+
+    def addConcatNode(name, inputs, axisNodeName):
+        concat = NodeDef()
+        concat.name = name
+        concat.op = 'ConcatV2'
+        for inp in inputs:
+            concat.input.append(inp)
+        concat.input.append(axisNodeName)
+        graph_def.node.extend([concat])
+
+    addConcatNode('PriorBox/concat', priorBoxes, 'concat/axis_flatten')
+
+    sigmoid = NodeDef()
+    sigmoid.name = 'concat/sigmoid'
+    sigmoid.op = 'Sigmoid'
+    sigmoid.input.append('concat')
+    graph_def.node.extend([sigmoid])
+
+    addFlatten(sigmoid.name, sigmoid.name + '/Flatten', graph_def)
+    addFlatten('concat_1', 'concat_1/Flatten', graph_def)
+
+    detectionOut = NodeDef()
+    detectionOut.name = 'detection_out'
+    detectionOut.op = 'DetectionOutput'
+
+    detectionOut.input.append('concat_1/Flatten')
+    detectionOut.input.append(sigmoid.name + '/Flatten')
+    detectionOut.input.append('PriorBox/concat')
+
+    detectionOut.addAttr('num_classes', num_classes)
+    detectionOut.addAttr('share_location', True)
+    detectionOut.addAttr('background_label_id', num_classes + 1)
+    detectionOut.addAttr('nms_threshold', 0.6)
+    detectionOut.addAttr('confidence_threshold', 0.2)
+    detectionOut.addAttr('top_k', 100)
+    detectionOut.addAttr('keep_top_k', 100)
+    detectionOut.addAttr('code_type', "CENTER_SIZE")
+    graph_def.node.extend([detectionOut])
+
+    graph_def.node[0].attr['shape'] =  {
+            'shape': {
+                'dim': [
+                    {'size': -1},
+                    {'size': image_height},
+                    {'size': image_width},
+                    {'size': 3}
+                ]
+            }
+        }
+
+    while True:
+        unconnectedNodes = getUnconnectedNodes()
+        unconnectedNodes.remove(detectionOut.name)
+        if not unconnectedNodes:
+            break
+
+        for name in unconnectedNodes:
+            for i in range(len(graph_def.node)):
+                if graph_def.node[i].name == name:
+                    del graph_def.node[i]
+                    break
+
+    # Save as text
+    graph_def.save(outputPath)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Run this script to get a text graph of '
+                                                 'SSD model from TensorFlow Object Detection API. '
+                                                 'Then pass it with .pb file to cv::dnn::readNetFromTensorflow function.')
+    parser.add_argument('--input', required=True, help='Path to frozen TensorFlow graph.')
+    parser.add_argument('--output', required=True, help='Path to output text graph.')
+    parser.add_argument('--min_level', default=3, type=int, help='Parameter from training config')
+    parser.add_argument('--num_scales', default=3, type=int, help='Parameter from training config')
+    parser.add_argument('--anchor_scale', default=4.0, type=float, help='Parameter from training config')
+    parser.add_argument('--aspect_ratios', default=[1.0, 1.0, 1.4, 0.7, 0.7, 1.4],
+                        nargs='+', type=float, help='Parameter from training config')
+    parser.add_argument('--num_classes', default=90, type=int, help='Number of classes to detect')
+    parser.add_argument('--width', default=512, type=int, help='Network input width')
+    parser.add_argument('--height', default=512, type=int, help='Network input height')
+    args = parser.parse_args()
+
+    ar = args.aspect_ratios
+    assert(len(ar) % 2 == 0)
+    ar = list(zip(ar[::2], ar[1::2]))
+
+    createGraph(args.input, args.output, args.min_level, ar, args.num_scales,
+                args.anchor_scale, args.num_classes, args.width, args.height)