Merge remote-tracking branch 'upstream/3.4' into merge-3.4

5 years ago · 4cb9faf6c9
parent 55f2370f36 a67228cd73
commit 4cb9faf6c9
28 changed files with 1300 additions and 349 deletions
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@ -109,6 +109,22 @@ public:
 #ifdef HAVE_INF_ENGINE
    static inline bool checkIETarget(Target target)
    {
+#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2019R3)
+        // Lightweight detection
+        const std::vector<std::string> devices = getCore().GetAvailableDevices();
+        for (std::vector<std::string>::const_iterator i = devices.begin(); i != devices.end(); ++i)
+        {
+            if (std::string::npos != i->find("MYRIAD") && target == DNN_TARGET_MYRIAD)
+                return true;
+            else if (std::string::npos != i->find("FPGA") && target == DNN_TARGET_FPGA)
+                return true;
+            else if (std::string::npos != i->find("CPU") && target == DNN_TARGET_CPU)
+                return true;
+            else if (std::string::npos != i->find("GPU") && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
+                return true;
+        }
+        return false;
+#else
        cv::dnn::Net net;
        cv::dnn::LayerParams lp;
        lp.set("kernel_size", 1);
@ -132,6 +148,7 @@ public:
            return false;
        }
        return true;
+#endif
    }
 #endif

--- a/modules/dnn/src/graph_simplifier.cpp
+++ b/modules/dnn/src/graph_simplifier.cpp
@ -0,0 +1,207 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+// Copyright (C) 2020, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+
+#include "precomp.hpp"
+
+#include "graph_simplifier.hpp"
+
+#include <queue>
+
+namespace cv { namespace dnn {
+
+Subgraph::~Subgraph() {}
+
+int Subgraph::addNodeToMatch(const std::string& op, int input_0, int input_1,
+                             int input_2, int input_3)
+{
+    int nodeInputs[] = {input_0, input_1, input_2, input_3};
+    int numInputs = 0;
+    for (int i = 0; i < 4; ++i)
+    {
+        numInputs += (int)(nodeInputs[i] != -1);
+    }
+    return addNodeToMatch(op, std::vector<int>(&nodeInputs[0], &nodeInputs[0] + numInputs));
+}
+
+int Subgraph::addNodeToMatch(const std::string& op, const std::vector<int>& inputs_)
+{
+    for (int i = 0; i < inputs_.size(); ++i)
+    {
+        CV_Assert(inputs_[i] < (int)nodes.size());
+    }
+    nodes.push_back(op);
+    inputs.push_back(inputs_);
+    return nodes.size() - 1;
+}
+
+void Subgraph::setFusedNode(const std::string& op, int input_0, int input_1,
+                            int input_2, int input_3, int input_4, int input_5)
+{
+    int nodeInputs[] = {input_0, input_1, input_2, input_3, input_4, input_5};
+    int numInputs = 0;
+    for (int i = 0; i < 6; ++i)
+    {
+        CV_Assert(nodeInputs[i] < (int)nodes.size());
+        numInputs += (int)(nodeInputs[i] != -1);
+    }
+    setFusedNode(op, std::vector<int>(&nodeInputs[0], &nodeInputs[0] + numInputs));
+}
+
+void Subgraph::setFusedNode(const std::string& op, const std::vector<int>& inputs_)
+{
+    fusedNodeInputs = inputs_;
+    fusedNodeOp = op;
+}
+
+int Subgraph::getInputNodeId(const Ptr<ImportGraphWrapper>& net,
+                             const Ptr<ImportNodeWrapper>& node,
+                             int inpId)
+{
+    CV_Assert(inpId < node->getNumInputs());
+    std::string name = node->getInputName(inpId);
+    // If operation produces several tensors, they are specified by index
+    // after ':' character. In example, "input:0".
+    name = name.substr(0, name.rfind(':'));
+    const int numNodes = net->getNumNodes();
+    for (int i = 0; i < numNodes; ++i)
+    {
+        if (net->getNodeName(i) == name)
+            return i;
+    }
+    CV_Error(Error::StsParseError, "Input node with name " + name + " not found");
+}
+
+bool Subgraph::match(const Ptr<ImportGraphWrapper>& net, int nodeId,
+                     std::vector<int>& matchedNodesIds,
+                     std::vector<int>& targetNodesIds)
+{
+    matchedNodesIds.clear();
+    targetNodesIds.clear();
+
+    std::queue<int> nodesToMatch;
+    std::queue<int> targetNodes;
+    nodesToMatch.push(nodeId);
+    targetNodes.push(nodes.size() - 1);
+    while (!nodesToMatch.empty())
+    {
+        int nodeToMatch = nodesToMatch.front();
+        int targetNodeId = targetNodes.front();
+        nodesToMatch.pop();
+        targetNodes.pop();
+
+        if (std::find(matchedNodesIds.begin(), matchedNodesIds.end(), nodeToMatch) !=
+            matchedNodesIds.end())
+            continue;
+
+        const Ptr<ImportNodeWrapper> node = net->getNode(nodeToMatch);
+        if (node->getType() != nodes[targetNodeId])
+            return false;
+
+        std::vector<int>& inputNodes = inputs[targetNodeId];
+        if (inputNodes.size() != node->getNumInputs())
+            return false;
+
+        for (int j = 0; j < inputNodes.size(); ++j)
+        {
+            if (nodes[inputNodes[j]].empty())  // Unknown input node type.
+                continue;
+            nodeId = getInputNodeId(net, node, j);
+            const Ptr<ImportNodeWrapper> inpNode = net->getNode(nodeId);
+            if (inpNode->getType() != "Const")
+            {
+                nodesToMatch.push(nodeId);
+                targetNodes.push(inputNodes[j]);
+            }
+            else if (nodes[inputNodes[j]] != "Const")
+                return false;
+        }
+        matchedNodesIds.push_back(nodeToMatch);
+        targetNodesIds.push_back(targetNodeId);
+    }
+
+    const int n = matchedNodesIds.size();
+    std::vector<std::pair<int, int> > elements(n);
+    for (int i = 0; i < n; ++i)
+        elements[i] = std::make_pair(matchedNodesIds[i], targetNodesIds[i]);
+    std::sort(elements.begin(), elements.end());
+    for (int i = 0; i < n; ++i)
+    {
+        matchedNodesIds[i] = elements[i].first;
+        targetNodesIds[i] = elements[i].second;
+    }
+    return true;
+}
+
+void Subgraph::replace(const Ptr<ImportGraphWrapper>& net, const std::vector<int>& matchedNodesIds,
+                       const std::vector<int>& targetNodesIds)
+{
+    // Extract names of input nodes.
+    std::vector<std::string> inputsNames(fusedNodeInputs.size());
+    for (int i = 0; i < fusedNodeInputs.size(); ++i)
+    {
+        std::string inpName;
+        // Find input node name looking at inputs of fused nodes.
+        for (int j = 0; j < matchedNodesIds.size() && inpName.empty(); ++j)
+        {
+            Ptr<ImportNodeWrapper> node = net->getNode(matchedNodesIds[j]);
+            std::vector<int>& inpIndices = inputs[targetNodesIds[j]];
+
+            CV_Assert(node->getNumInputs() == inpIndices.size());
+            for (int k = 0; k < inpIndices.size(); ++k)
+            {
+                if (inpIndices[k] == fusedNodeInputs[i])
+                {
+                    inpName = node->getInputName(k);
+                    break;
+                }
+            }
+        }
+        CV_Assert(!inpName.empty());
+        inputsNames[i] = inpName;
+    }
+
+    // Remove matched nodes except the last one. Indices in ascending order are expected.
+    Ptr<ImportNodeWrapper> node = net->getNode(matchedNodesIds.back());
+    for (int i = matchedNodesIds.size() - 2; i >= 0; --i)
+        net->removeNode(matchedNodesIds[i]);
+
+    // Modify the last node to be a fused one.
+    node->setType(fusedNodeOp);
+    node->setInputNames(inputsNames);
+
+    std::vector<Ptr<ImportNodeWrapper> > inputNodes(inputsNames.size());
+    for (int i = 0; i < inputsNames.size(); ++i)
+    {
+        inputNodes[i] = net->getNode(getInputNodeId(net, node, i));
+    }
+    finalize(net, node, inputNodes);
+}
+
+void Subgraph::finalize(const Ptr<ImportGraphWrapper>& net,
+                        const Ptr<ImportNodeWrapper>& fusedNode,
+                        std::vector<Ptr<ImportNodeWrapper> >& inputs) {}
+
+void simplifySubgraphs(const Ptr<ImportGraphWrapper>& net,
+                       const std::vector<Ptr<Subgraph> >& patterns)
+{
+    int numNodes = net->getNumNodes();
+    std::vector<int> matchedNodesIds, targetNodesIds;
+    for (int i = 0; i < numNodes; ++i)
+    {
+        for (int j = 0; j < patterns.size(); ++j)
+        {
+            if (patterns[j]->match(net, i, matchedNodesIds, targetNodesIds))
+            {
+                patterns[j]->replace(net, matchedNodesIds, targetNodesIds);
+                numNodes -= matchedNodesIds.size() - 1;  // #matchedNodes removed and one added.
+                break;
+            }
+        }
+    }
+}
+
+}}  // namespace cv::dnn
--- a/modules/dnn/src/graph_simplifier.hpp
+++ b/modules/dnn/src/graph_simplifier.hpp
@ -0,0 +1,100 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+// Copyright (C) 2020, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+
+#ifndef __OPENCV_DNN_GRAPH_SIMPLIFIER_HPP__
+#define __OPENCV_DNN_GRAPH_SIMPLIFIER_HPP__
+
+#include <string>
+
+#include <opencv2/core.hpp>
+
+namespace cv { namespace dnn {
+
+class ImportNodeWrapper
+{
+public:
+    virtual ~ImportNodeWrapper() {};
+
+    virtual int getNumInputs() const = 0;
+
+    virtual std::string getInputName(int idx) const = 0;
+
+    virtual std::string getType() const = 0;
+
+    virtual void setType(const std::string& type) = 0;
+
+    virtual void setInputNames(const std::vector<std::string>& inputs) = 0;
+};
+
+class ImportGraphWrapper
+{
+public:
+    virtual ~ImportGraphWrapper() {};
+
+    virtual Ptr<ImportNodeWrapper> getNode(int idx) const = 0;
+
+    virtual int getNumNodes() const = 0;
+
+    virtual std::string getNodeName(int idx) const = 0;
+
+    virtual void removeNode(int idx) = 0;
+};
+
+class Subgraph  // Interface to match and replace subgraphs.
+{
+public:
+    virtual ~Subgraph();
+
+    // Add a node to be matched in the origin graph. Specify ids of nodes that
+    // are expected to be inputs. Returns id of a newly added node.
+    // TODO: Replace inputs to std::vector<int> in C++11
+    int addNodeToMatch(const std::string& op, int input_0 = -1, int input_1 = -1,
+                       int input_2 = -1, int input_3 = -1);
+
+    int addNodeToMatch(const std::string& op, const std::vector<int>& inputs_);
+
+    // Specify resulting node. All the matched nodes in subgraph excluding
+    // input nodes will be fused into this single node.
+    // TODO: Replace inputs to std::vector<int> in C++11
+    void setFusedNode(const std::string& op, int input_0 = -1, int input_1 = -1,
+                      int input_2 = -1, int input_3 = -1, int input_4 = -1,
+                      int input_5 = -1);
+
+    void setFusedNode(const std::string& op, const std::vector<int>& inputs_);
+
+    static int getInputNodeId(const Ptr<ImportGraphWrapper>& net,
+                              const Ptr<ImportNodeWrapper>& node,
+                              int inpId);
+
+    // Match TensorFlow subgraph starting from <nodeId> with a set of nodes to be fused.
+    // Const nodes are skipped during matching. Returns true if nodes are matched and can be fused.
+    virtual bool match(const Ptr<ImportGraphWrapper>& net, int nodeId,
+                       std::vector<int>& matchedNodesIds,
+                       std::vector<int>& targetNodesIds);
+
+    // Fuse matched subgraph.
+    void replace(const Ptr<ImportGraphWrapper>& net, const std::vector<int>& matchedNodesIds,
+                 const std::vector<int>& targetNodesIds);
+
+    virtual void finalize(const Ptr<ImportGraphWrapper>& net,
+                          const Ptr<ImportNodeWrapper>& fusedNode,
+                          std::vector<Ptr<ImportNodeWrapper> >& inputs);
+
+private:
+    std::vector<std::string> nodes;         // Nodes to be matched in the origin graph.
+    std::vector<std::vector<int> > inputs;  // Connections of an every node to it's inputs.
+
+    std::string fusedNodeOp;           // Operation name of resulting fused node.
+    std::vector<int> fusedNodeInputs;  // Inputs of fused node.
+};
+
+void simplifySubgraphs(const Ptr<ImportGraphWrapper>& net,
+                       const std::vector<Ptr<Subgraph> >& patterns);
+
+}}  // namespace dnn, namespace cv
+
+#endif  // __OPENCV_DNN_GRAPH_SIMPLIFIER_HPP__
--- a/modules/dnn/src/ie_ngraph.cpp
+++ b/modules/dnn/src/ie_ngraph.cpp
@ -168,21 +168,26 @@ void InfEngineNgraphNet::init(Target targetId)
 {
    if (!hasNetOwner)
    {
-        if (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) {
+        if (targetId == DNN_TARGET_OPENCL_FP16)
+        {
            auto nodes = ngraph_function->get_ordered_ops();
-            for (auto& node : nodes) {
+            for (auto& node : nodes)
+            {
                auto parameter = std::dynamic_pointer_cast<ngraph::op::Parameter>(node);
-                if (parameter && parameter->get_element_type() == ngraph::element::f32) {
+                if (parameter && parameter->get_element_type() == ngraph::element::f32)
+                {
                    parameter->set_element_type(ngraph::element::f16);
                }
                auto constant = std::dynamic_pointer_cast<ngraph::op::Constant>(node);
-                if (constant && constant->get_element_type() == ngraph::element::f32) {
-                    auto data = constant->get_vector<float>();
-                    std::vector<ngraph::float16> new_data(data.size());
-                    for (size_t i = 0; i < data.size(); ++i) {
-                        new_data[i] = ngraph::float16(data[i]);
-                    }
-                    auto new_const = std::make_shared<ngraph::op::Constant>(ngraph::element::f16, constant->get_shape(), new_data);
+                if (constant && constant->get_element_type() == ngraph::element::f32)
+                {
+                    const float* floatsData = constant->get_data_ptr<float>();
+                    size_t total = ngraph::shape_size(constant->get_shape());
+                    Mat floats(1, total, CV_32F, (void*)floatsData);
+                    Mat halfs;
+                    cv::convertFp16(floats, halfs);
+
+                    auto new_const = std::make_shared<ngraph::op::Constant>(ngraph::element::f16, constant->get_shape(), halfs.data);
                    new_const->set_friendly_name(constant->get_friendly_name());
                    ngraph::replace_node(constant, new_const);
                }
--- a/modules/dnn/src/layers/concat_layer.cpp
+++ b/modules/dnn/src/layers/concat_layer.cpp
@ -114,7 +114,8 @@ public:
        return backendId == DNN_BACKEND_OPENCV ||
               backendId == DNN_BACKEND_CUDA ||
               (backendId == DNN_BACKEND_HALIDE && haveHalide() && axis == 1 && !padding) ||  // By channels
-               ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine() && !padding) ||
+               (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && haveInfEngine() && !padding) ||
+               backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ||
               (backendId == DNN_BACKEND_VKCOM && haveVulkan() && !padding);
    }

@ -351,14 +352,45 @@ public:
    virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
                                        const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
    {
+        InferenceEngine::DataPtr data = ngraphDataNode(inputs[0]);
+        const int numDims = data->getDims().size();
+        const int cAxis = clamp(axis, numDims);
+        std::vector<size_t> maxDims(numDims, 0);
+
        CV_Assert(inputs.size() == nodes.size());
        ngraph::NodeVector inp_nodes;
-        for (auto& node : nodes) {
-            inp_nodes.push_back(node.dynamicCast<InfEngineNgraphNode>()->node);
-        }
+        for (int i = 0; i < nodes.size(); ++i)
+        {
+            inp_nodes.push_back(nodes[i].dynamicCast<InfEngineNgraphNode>()->node);

-        InferenceEngine::DataPtr data = ngraphDataNode(inputs[0]);
-        auto concat = std::make_shared<ngraph::op::Concat>(inp_nodes, clamp(axis, data->getDims().size()));
+            std::vector<size_t> inpShape = ngraphDataNode(inputs[i])->getDims();
+            for (int i = 0; i < numDims; ++i)
+                maxDims[i] = std::max(maxDims[i], inpShape[i]);
+        }
+        for (int i = 0; i < inp_nodes.size(); ++i)
+        {
+            bool needPadding = false;
+            std::vector<size_t> inpShape = ngraphDataNode(inputs[i])->getDims();
+            std::vector<int64_t> begins(inpShape.size(), 0), ends(inpShape.size(), 0);
+            for (int j = 0; j < inpShape.size(); ++j)
+            {
+                if (j != cAxis && inpShape[j] != maxDims[j])
+                {
+                    needPadding = true;
+                    begins[j] = static_cast<int64_t>((maxDims[j] - inpShape[j]) / 2);
+                    ends[j] = static_cast<int64_t>(maxDims[j] - inpShape[j] - begins[j]);
+                }
+            }
+            if (needPadding)
+            {
+                inp_nodes[i] = std::make_shared<ngraph::op::v1::Pad>(
+                    inp_nodes[i],
+                    std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{begins.size()}, begins.data()),
+                    std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{ends.size()}, ends.data()),
+                    ngraph::op::PadMode::CONSTANT);
+            }
+        }
+        auto concat = std::make_shared<ngraph::op::Concat>(inp_nodes, cAxis);
        return Ptr<BackendNode>(new InfEngineNgraphNode(concat));
    }
 #endif  // HAVE_DNN_NGRAPH
--- a/modules/dnn/src/layers/normalize_bbox_layer.cpp
+++ b/modules/dnn/src/layers/normalize_bbox_layer.cpp
@ -75,7 +75,10 @@ public:
            if (pnorm != 2)
                return false;

-            return preferableTarget == DNN_TARGET_MYRIAD ? !acrossSpatial : startAxis == 1;
+            if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && preferableTarget == DNN_TARGET_MYRIAD)
+                return !acrossSpatial;
+
+            return startAxis == 1;
        }
        return backendId == DNN_BACKEND_OPENCV ||
               (backendId == DNN_BACKEND_CUDA && (pnorm == 1 || pnorm == 2));
@ -373,7 +376,6 @@ public:
        }
        else
        {
-            // weight->get_shape().size() > 1 ~> channel_shared = false
            weight = std::make_shared<ngraph::op::Constant>(
                                      ngraph::element::f32, ngraph::Shape(shape), blobs[0].data);
        }
--- a/modules/dnn/src/layers/pooling_layer.cpp
+++ b/modules/dnn/src/layers/pooling_layer.cpp
@ -203,7 +203,7 @@ public:
 #endif
        }
        else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
-            return type != STOCHASTIC;
+            return !computeMaxIdx && type != STOCHASTIC;
        }
        else
        {
--- a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp
+++ b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp
@ -0,0 +1,157 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+// Copyright (C) 2020, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+
+#include "../precomp.hpp"
+
+#include "../graph_simplifier.hpp"
+#include "onnx_graph_simplifier.hpp"
+
+#include <queue>
+
+namespace cv { namespace dnn {
+CV__DNN_INLINE_NS_BEGIN
+
+// This wrapper can behave differently for fake input nodes and real graph nodes.
+class ONNXNodeWrapper : public ImportNodeWrapper
+{
+public:
+    ONNXNodeWrapper(opencv_onnx::NodeProto* _node = 0) : node(_node) {}
+
+    virtual int getNumInputs() const CV_OVERRIDE
+    {
+        return node ? node->input_size() : 0;
+    }
+
+    virtual std::string getInputName(int idx) const CV_OVERRIDE
+    {
+        CV_Assert_N(node, idx < node->input_size());
+        return node->input(idx);
+    }
+
+    virtual std::string getType() const CV_OVERRIDE
+    {
+        return node ? node->op_type() : "";
+    }
+
+    virtual void setType(const std::string& type) CV_OVERRIDE
+    {
+        CV_Assert(node);
+        node->set_op_type(type);
+    }
+
+    virtual void setInputNames(const std::vector<std::string>& inputs) CV_OVERRIDE
+    {
+        CV_Assert(node);
+        node->clear_input();
+        for (int i = 0; i < inputs.size(); ++i)
+            node->add_input(inputs[i]);
+    }
+
+    opencv_onnx::NodeProto* node;
+};
+
+// ONNX graph's inputs are separate from nodes so we index them before the rest of nodes.
+class ONNXGraphWrapper : public ImportGraphWrapper
+{
+public:
+    ONNXGraphWrapper(opencv_onnx::GraphProto& _net) : net(_net)
+    {
+        numInputs = net.input_size();
+    }
+
+    virtual Ptr<ImportNodeWrapper> getNode(int idx) const CV_OVERRIDE
+    {
+        opencv_onnx::NodeProto* node = 0;
+        if (idx >= numInputs)
+            node = net.mutable_node(idx - numInputs);
+        return makePtr<ONNXNodeWrapper>(node);
+    }
+
+    virtual int getNumNodes() const CV_OVERRIDE
+    {
+        return numInputs + net.node_size();
+    }
+
+    virtual std::string getNodeName(int idx) const CV_OVERRIDE
+    {
+        if (idx < numInputs)
+            return net.input(idx).name();
+        else
+            return net.node(idx - numInputs).output(0);
+    }
+
+    virtual void removeNode(int idx) CV_OVERRIDE
+    {
+        CV_Assert(idx >= numInputs);
+        net.mutable_node()->DeleteSubrange(idx - numInputs, 1);
+    }
+
+private:
+    int numInputs;
+    opencv_onnx::GraphProto& net;
+};
+
+class SoftMaxSubgraph : public Subgraph
+{
+public:
+    SoftMaxSubgraph()
+    {
+        int input = addNodeToMatch("");
+        int inpExp = addNodeToMatch("Exp", input);
+        int sum = addNodeToMatch("ReduceSum", inpExp);
+        addNodeToMatch("Div", inpExp, sum);
+        setFusedNode("Softmax", input);
+    }
+
+    virtual bool match(const Ptr<ImportGraphWrapper>& net, int nodeId,
+                       std::vector<int>& matchedNodesIds,
+                       std::vector<int>& targetNodesIds) CV_OVERRIDE
+    {
+        if (Subgraph::match(net, nodeId, matchedNodesIds, targetNodesIds))
+        {
+            Ptr<ImportNodeWrapper> sum = net->getNode(matchedNodesIds[1]);
+            opencv_onnx::NodeProto* node = sum.dynamicCast<ONNXNodeWrapper>()->node;
+
+            for (int i = 0; i < node->attribute_size(); i++)
+            {
+                opencv_onnx::AttributeProto attr = node->attribute(i);
+                if (attr.name() != "axes")
+                    continue;
+                if (attr.ints_size() != 1)
+                    CV_Error(Error::StsNotImplemented, format("Unexpected number of axes: %d", attr.ints_size()));
+                axis = attr.ints(0);
+                return true;
+            }
+            CV_Error(Error::StsNotImplemented, "Missed axes attribute");
+        }
+        return false;
+    }
+
+    virtual void finalize(const Ptr<ImportGraphWrapper>&,
+                          const Ptr<ImportNodeWrapper>& fusedNode,
+                          std::vector<Ptr<ImportNodeWrapper> >&) CV_OVERRIDE
+    {
+        opencv_onnx::NodeProto* node = fusedNode.dynamicCast<ONNXNodeWrapper>()->node;
+        opencv_onnx::AttributeProto* attr = node->add_attribute();
+        attr->set_name("axis");
+        attr->set_i(axis);
+    }
+
+private:
+    int axis;
+};
+
+void simplifySubgraphs(opencv_onnx::GraphProto& net)
+{
+    std::vector<Ptr<Subgraph> > subgraphs;
+    subgraphs.push_back(makePtr<SoftMaxSubgraph>());
+
+    simplifySubgraphs(Ptr<ImportGraphWrapper>(new ONNXGraphWrapper(net)), subgraphs);
+}
+
+CV__DNN_INLINE_NS_END
+}}  // namespace cv::dnn
--- a/modules/dnn/src/onnx/onnx_graph_simplifier.hpp
+++ b/modules/dnn/src/onnx/onnx_graph_simplifier.hpp
@ -0,0 +1,30 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+// Copyright (C) 2020, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+
+#ifndef __OPENCV_DNN_ONNX_SIMPLIFIER_HPP__
+#define __OPENCV_DNN_ONNX_SIMPLIFIER_HPP__
+
+#include "../precomp.hpp"
+
+#if defined(__GNUC__) && __GNUC__ >= 5
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wsuggest-override"
+#endif
+#include "opencv-onnx.pb.h"
+#if defined(__GNUC__) && __GNUC__ >= 5
+#pragma GCC diagnostic pop
+#endif
+
+namespace cv { namespace dnn {
+CV__DNN_INLINE_NS_BEGIN
+
+void simplifySubgraphs(opencv_onnx::GraphProto& net);
+
+CV__DNN_INLINE_NS_END
+}}  // namespace dnn, namespace cv
+
+#endif  // __OPENCV_DNN_ONNX_SIMPLIFIER_HPP__
--- a/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@ -26,6 +26,8 @@
 #pragma GCC diagnostic pop
 #endif

+#include "onnx_graph_simplifier.hpp"
+
 namespace cv {
 namespace dnn {
 CV__DNN_INLINE_NS_BEGIN
@ -326,6 +328,9 @@ void ONNXImporter::populateNet(Net dstNet)
 {
    CV_Assert(model_proto.has_graph());
    opencv_onnx::GraphProto graph_proto = model_proto.graph();
+
+    simplifySubgraphs(graph_proto);
+
    std::map<std::string, Mat> constBlobs = getGraphTensors(graph_proto);
    // List of internal blobs shapes.
    std::map<std::string, MatShape> outShapes;
--- a/modules/dnn/src/op_inf_engine.cpp
+++ b/modules/dnn/src/op_inf_engine.cpp
@ -574,6 +574,21 @@ InferenceEngine::Core& getCore()
 #if !defined(OPENCV_DNN_IE_VPU_TYPE_DEFAULT)
 static bool detectMyriadX_()
 {
+#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2019R3)
+    // Lightweight detection
+    InferenceEngine::Core& ie = getCore();
+    const std::vector<std::string> devices = ie.GetAvailableDevices();
+    for (std::vector<std::string>::const_iterator i = devices.begin(); i != devices.end(); ++i)
+    {
+        if (i->find("MYRIAD") != std::string::npos)
+        {
+            const std::string name = ie.GetMetric(*i, METRIC_KEY(FULL_DEVICE_NAME)).as<std::string>();
+            CV_LOG_INFO(NULL, "Myriad device: " << name);
+            return name.find("MyriadX") != std::string::npos  || name.find("Myriad X") != std::string::npos;
+        }
+    }
+    return false;
+#else
    InferenceEngine::Builder::Network builder("");
    InferenceEngine::idx_t inpId = builder.addLayer(
                                   InferenceEngine::Builder::InputLayer().setPort(InferenceEngine::Port({1})));
@ -634,6 +649,7 @@ static bool detectMyriadX_()
        return false;
    }
    return true;
+#endif
 }
 #endif  // !defined(OPENCV_DNN_IE_VPU_TYPE_DEFAULT)

--- a/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp
+++ b/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp
@ -9,6 +9,7 @@

 #ifdef HAVE_PROTOBUF

+#include "../graph_simplifier.hpp"
 #include "tf_graph_simplifier.hpp"
 #include <queue>

@ -18,203 +19,87 @@ CV__DNN_INLINE_NS_BEGIN
 using ::google::protobuf::RepeatedField;
 using ::google::protobuf::MapPair;

-class Subgraph  // Interface to match and replace TensorFlow subgraphs.
+class TFNodeWrapper : public ImportNodeWrapper
 {
 public:
-    virtual ~Subgraph() {}
+    TFNodeWrapper(tensorflow::NodeDef* _node) : node(_node) {}

-    // Add a node to be matched in the origin graph. Specify ids of nodes that
-    // are expected to be inputs. Returns id of a newly added node.
-    // TODO: Replace inputs to std::vector<int> in C++11
-    int addNodeToMatch(const std::string& op, int input_0 = -1, int input_1 = -1,
-                       int input_2 = -1, int input_3 = -1)
+    virtual int getNumInputs() const CV_OVERRIDE
    {
-        int nodeInputs[] = {input_0, input_1, input_2, input_3};
-        int numInputs = 0;
-        for (int i = 0; i < 4; ++i)
-        {
-            numInputs += (int)(nodeInputs[i] != -1);
-        }
-        return addNodeToMatch(op, std::vector<int>(&nodeInputs[0], &nodeInputs[0] + numInputs));
+        return node->input_size();
    }

-    int addNodeToMatch(const std::string& op, const std::vector<int>& inputs_)
+    virtual std::string getInputName(int idx) const CV_OVERRIDE
    {
-        for (int i = 0; i < inputs_.size(); ++i)
-        {
-            CV_Assert(inputs_[i] < (int)nodes.size());
-        }
-        nodes.push_back(op);
-        inputs.push_back(inputs_);
-        return nodes.size() - 1;
+        return node->input(idx);
    }

-    // Specify resulting node. All the matched nodes in subgraph excluding
-    // input nodes will be fused into this single node.
-    // TODO: Replace inputs to std::vector<int> in C++11
-    void setFusedNode(const std::string& op, int input_0 = -1, int input_1 = -1,
-                      int input_2 = -1, int input_3 = -1, int input_4 = -1,
-                      int input_5 = -1)
+    virtual std::string getType() const CV_OVERRIDE
    {
-        int nodeInputs[] = {input_0, input_1, input_2, input_3, input_4, input_5};
-        int numInputs = 0;
-        for (int i = 0; i < 6; ++i)
-        {
-            CV_Assert(nodeInputs[i] < (int)nodes.size());
-            numInputs += (int)(nodeInputs[i] != -1);
-        }
-        setFusedNode(op, std::vector<int>(&nodeInputs[0], &nodeInputs[0] + numInputs));
+        return node->op();
    }

-    void setFusedNode(const std::string& op, const std::vector<int>& inputs_)
+    virtual void setType(const std::string& type) CV_OVERRIDE
    {
-        fusedNodeInputs = inputs_;
-        fusedNodeOp = op;
+        node->set_op(type);
    }

-    static int getInputNodeId(const tensorflow::GraphDef& net,
-                              const tensorflow::NodeDef& node,
-                              int inpId)
+    virtual void setInputNames(const std::vector<std::string>& inputs) CV_OVERRIDE
    {
-        CV_Assert(inpId < node.input_size());
-        std::string name = node.input(inpId);
-        // If operation produces several tensors, they are specified by index
-        // after ':' character. In example, "input:0".
-        name = name.substr(0, name.rfind(':'));
-        const int numNodes = net.node_size();
-        for (int i = 0; i < numNodes; ++i)
-        {
-            if (net.node(i).name() == name)
-                return i;
-        }
-        CV_Error(Error::StsParseError, "Input node with name " + name + " not found");
+        node->clear_input();
+        for (int i = 0; i < inputs.size(); ++i)
+            node->add_input(inputs[i]);
    }

-    // Match TensorFlow subgraph starting from <nodeId> with a set of nodes to be fused.
-    // Const nodes are skipped during matching. Returns true if nodes are matched and can be fused.
-    virtual bool match(const tensorflow::GraphDef& net, int nodeId,
-                       std::vector<int>& matchedNodesIds,
-                       std::vector<int>& targetNodesIds)
-    {
-        matchedNodesIds.clear();
-        targetNodesIds.clear();
-
-        std::queue<int> nodesToMatch;
-        std::queue<int> targetNodes;
-        nodesToMatch.push(nodeId);
-        targetNodes.push(nodes.size() - 1);
-        while (!nodesToMatch.empty())
-        {
-            int nodeToMatch = nodesToMatch.front();
-            int targetNodeId = targetNodes.front();
-            nodesToMatch.pop();
-            targetNodes.pop();
-
-            if (std::find(matchedNodesIds.begin(), matchedNodesIds.end(), nodeToMatch) !=
-                matchedNodesIds.end())
-                continue;
-
-            const tensorflow::NodeDef& node = net.node(nodeToMatch);
-            if (node.op() != nodes[targetNodeId])
-                return false;
-
-            std::vector<int>& inputNodes = inputs[targetNodeId];
-            if (inputNodes.size() != node.input_size())
-                return false;
+    tensorflow::NodeDef* node;
+};

-            for (int j = 0; j < inputNodes.size(); ++j)
-            {
-                if (nodes[inputNodes[j]].empty())  // Unknown input node type.
-                    continue;
-                nodeId = getInputNodeId(net, node, j);
-                const tensorflow::NodeDef& inpNode = net.node(nodeId);
-                if (inpNode.op() != "Const")
-                {
-                    nodesToMatch.push(nodeId);
-                    targetNodes.push(inputNodes[j]);
-                }
-                else if (nodes[inputNodes[j]] != "Const")
-                    return false;
-            }
-            matchedNodesIds.push_back(nodeToMatch);
-            targetNodesIds.push_back(targetNodeId);
-        }
+class TFGraphWrapper : public ImportGraphWrapper
+{
+public:
+    TFGraphWrapper(tensorflow::GraphDef& _net) : net(_net) {}

-        const int n = matchedNodesIds.size();
-        std::vector<std::pair<int, int> > elements(n);
-        for (int i = 0; i < n; ++i)
-            elements[i] = std::make_pair(matchedNodesIds[i], targetNodesIds[i]);
-        std::sort(elements.begin(), elements.end());
-        for (int i = 0; i < n; ++i)
-        {
-            matchedNodesIds[i] = elements[i].first;
-            targetNodesIds[i] = elements[i].second;
-        }
-        return true;
+    virtual Ptr<ImportNodeWrapper> getNode(int idx) const CV_OVERRIDE
+    {
+        return makePtr<TFNodeWrapper>(net.mutable_node(idx));
    }

-    // Fuse matched subgraph.
-    void replace(tensorflow::GraphDef& net, const std::vector<int>& matchedNodesIds,
-                 const std::vector<int>& targetNodesIds)
+    virtual int getNumNodes() const CV_OVERRIDE
    {
-        // Extract names of input nodes.
-        std::vector<std::string> inputsNames(fusedNodeInputs.size());
-        for (int i = 0; i < fusedNodeInputs.size(); ++i)
-        {
-            std::string inpName;
-            // Find input node name looking at inputs of fused nodes.
-            for (int j = 0; j < matchedNodesIds.size() && inpName.empty(); ++j)
-            {
-                const tensorflow::NodeDef &node = net.node(matchedNodesIds[j]);
-                std::vector<int>& inpIndices = inputs[targetNodesIds[j]];
-
-                CV_Assert(node.input_size() == inpIndices.size());
-                for (int k = 0; k < inpIndices.size(); ++k)
-                {
-                    if (inpIndices[k] == fusedNodeInputs[i])
-                    {
-                        inpName = node.input(k);
-                        break;
-                    }
-                }
-            }
-            CV_Assert(!inpName.empty());
-            inputsNames[i] = inpName;
-        }
-
-        // Remove matched nodes except the last one. Indices in ascending order are expected.
-        tensorflow::NodeDef* node = net.mutable_node(matchedNodesIds.back());
-        for (int i = matchedNodesIds.size() - 2; i >= 0; --i)
-            net.mutable_node()->DeleteSubrange(matchedNodesIds[i], 1);
+        return net.node_size();
+    }

-        // Modify the last node to be a fused one.
-        node->set_op(fusedNodeOp);
-        node->clear_input();
-        for (int i = 0; i < inputsNames.size(); ++i)
-        {
-            node->add_input(inputsNames[i]);
-        }
+    virtual std::string getNodeName(int idx) const CV_OVERRIDE
+    {
+        return net.node(idx).name();
+    }

-        std::vector<tensorflow::NodeDef*> inputNodes(inputsNames.size());
-        for (int i = 0; i < inputsNames.size(); ++i)
-        {
-            inputNodes[i] = net.mutable_node(getInputNodeId(net, *node, i));
-        }
-        finalize(net, node, inputNodes);
+    virtual void removeNode(int idx) CV_OVERRIDE
+    {
+        net.mutable_node()->DeleteSubrange(idx, 1);
    }

-    virtual void finalize(tensorflow::GraphDef&, tensorflow::NodeDef*,
-                          std::vector<tensorflow::NodeDef*>&) {}
+    tensorflow::GraphDef& net;
+};

-private:
-    std::vector<std::string> nodes;         // Nodes to be matched in the origin graph.
-    std::vector<std::vector<int> > inputs;  // Connections of an every node to it's inputs.
+class TFSubgraph : public Subgraph
+{
+    virtual void finalize(const Ptr<ImportGraphWrapper>& netWrapper,
+                          const Ptr<ImportNodeWrapper>& fusedNodeWrapper,
+                          std::vector<Ptr<ImportNodeWrapper> >& inputs) CV_OVERRIDE
+    {
+        std::vector<tensorflow::NodeDef*> inputNodes(inputs.size());
+        for (int i = 0; i < inputs.size(); ++i)
+            inputNodes[i] = inputs[i].dynamicCast<TFNodeWrapper>()->node;
+        finalize(netWrapper.dynamicCast<TFGraphWrapper>()->net,
+                 fusedNodeWrapper.dynamicCast<TFNodeWrapper>()->node, inputNodes);
+    }

-    std::string fusedNodeOp;           // Operation name of resulting fused node.
-    std::vector<int> fusedNodeInputs;  // Inputs of fused node.
+    virtual void finalize(tensorflow::GraphDef&, tensorflow::NodeDef* fusedNode,
+                          std::vector<tensorflow::NodeDef*>& inputNodes) {}
 };

-class BatchNormSubgraph : public Subgraph
+class BatchNormSubgraph : public TFSubgraph
 {
 public:
    BatchNormSubgraph()
@ -250,7 +135,7 @@ public:
    }
 };

-class BatchNormNoGammaSubgraph : public Subgraph
+class BatchNormNoGammaSubgraph : public TFSubgraph
 {
 public:
    BatchNormNoGammaSubgraph()
@ -366,20 +251,21 @@ public:
        setFusedNode("Relu6", input);
    }

-    virtual bool match(const tensorflow::GraphDef& net, int nodeId,
+    virtual bool match(const Ptr<ImportGraphWrapper>& net, int nodeId,
                       std::vector<int>& matchedNodesIds,
                       std::vector<int>& targetNodesIds) CV_OVERRIDE
    {
        if (!Subgraph::match(net, nodeId, matchedNodesIds, targetNodesIds))
            return false;
-        Mat maxValue = getTensorContent(net.node(matchedNodesIds.front() + 1).attr().at("value").tensor());
+        tensorflow::NodeDef* node = net->getNode(matchedNodesIds.front() + 1).dynamicCast<TFNodeWrapper>()->node;
+        Mat maxValue = getTensorContent(node->attr().at("value").tensor());
        return maxValue.type() == CV_32FC1 && maxValue.total() == 1 && maxValue.at<float>(0) == 6;
    }
 };

 // Keras' reshape stores output shape in separate Const nodes by one value.
 // Need to merge them into a single Const node.
-class ReshapeKerasSubgraph : public Subgraph
+class ReshapeKerasSubgraph : public TFSubgraph
 {
 public:
    ReshapeKerasSubgraph(int _numOutDims) : numOutDims(_numOutDims)
@ -402,15 +288,15 @@ public:
        setFusedNode("Reshape", ids);
    }

-    virtual bool match(const tensorflow::GraphDef& net, int nodeId,
+    virtual bool match(const Ptr<ImportGraphWrapper>& net, int nodeId,
                       std::vector<int>& matchedNodesIds,
                       std::vector<int>& targetNodesIds) CV_OVERRIDE
    {
-        const tensorflow::NodeDef& node = net.node(nodeId);
-        if (node.input_size() == 0)
+        Ptr<ImportNodeWrapper> node = net->getNode(nodeId);
+        if (node->getNumInputs() == 0)
            return false;

-        inpName = node.input(0);
+        inpName = node->getInputName(0);
        return Subgraph::match(net, nodeId, matchedNodesIds, targetNodesIds);
    }

@ -457,7 +343,7 @@ public:
    }
 };

-class DeconvolutionValidKerasSubgraph : public Subgraph
+class DeconvolutionValidKerasSubgraph : public TFSubgraph
 {
 public:
    DeconvolutionValidKerasSubgraph()
@ -518,7 +404,7 @@ public:
    }
 };

-class DeconvolutionSameKerasSubgraph : public Subgraph
+class DeconvolutionSameKerasSubgraph : public TFSubgraph
 {
 public:
    DeconvolutionSameKerasSubgraph()
@ -608,7 +494,7 @@ public:
 };

 // In case of resizing by factor.
-class UpsamplingKerasSubgraph : public Subgraph
+class UpsamplingKerasSubgraph : public TFSubgraph
 {
 public:
    UpsamplingKerasSubgraph(const std::string& type)
@ -703,7 +589,7 @@ public:
    }
 };

-class KerasMVNSubgraph : public Subgraph
+class KerasMVNSubgraph : public TFSubgraph
 {
 public:
    KerasMVNSubgraph()
@ -758,20 +644,7 @@ void simplifySubgraphs(tensorflow::GraphDef& net)
    subgraphs.push_back(Ptr<Subgraph>(new ReshapeAsShapeSubgraph()));
    subgraphs.push_back(Ptr<Subgraph>(new KerasMVNSubgraph()));

-    int numNodes = net.node_size();
-    std::vector<int> matchedNodesIds, targetNodesIds;
-    for (int i = 0; i < numNodes; ++i)
-    {
-        for (int j = 0; j < subgraphs.size(); ++j)
-        {
-            if (subgraphs[j]->match(net, i, matchedNodesIds, targetNodesIds))
-            {
-                subgraphs[j]->replace(net, matchedNodesIds, targetNodesIds);
-                numNodes -= matchedNodesIds.size() - 1;  // #matchedNodes removed and one added.
-                break;
-            }
-        }
-    }
+    simplifySubgraphs(Ptr<ImportGraphWrapper>(new TFGraphWrapper(net)), subgraphs);
 }

 void RemoveIdentityOps(tensorflow::GraphDef& net)
--- a/modules/dnn/test/test_backends.cpp
+++ b/modules/dnn/test/test_backends.cpp
@ -197,8 +197,8 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_Caffe_Different_Width_Height)
    if (backend == DNN_BACKEND_HALIDE)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_HALIDE);
 #if defined(INF_ENGINE_RELEASE)
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD
-            && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
+    if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) &&
+        target == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
 #endif
    Mat sample = imread(findDataFile("dnn/street.png"));
@ -249,8 +249,8 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_v1_TensorFlow_Different_Width_Height)
    if (backend == DNN_BACKEND_HALIDE)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_HALIDE);
 #if defined(INF_ENGINE_RELEASE)
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD
-            && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
+    if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) &&
+        target == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
 #endif
 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2019020000)
--- a/modules/dnn/test/test_caffe_importer.cpp
+++ b/modules/dnn/test/test_caffe_importer.cpp
@ -691,9 +691,11 @@ TEST_P(Test_Caffe_nets, FasterRCNN_zf)
        (target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB),
        CV_TEST_TAG_DEBUG_LONG
    );
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_OPENCL_FP16)
+    if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
+         backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_OPENCL_FP16)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD)
+    if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
+         backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_MYRIAD)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
    if (target == DNN_TARGET_CUDA_FP16)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA_FP16);
@ -710,9 +712,11 @@ TEST_P(Test_Caffe_nets, RFCN)
        CV_TEST_TAG_LONG,
        CV_TEST_TAG_DEBUG_VERYLONG
    );
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_OPENCL_FP16)
+    if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
+         backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_OPENCL_FP16)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD)
+    if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
+         backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_MYRIAD)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
    float scoreDiff = default_l1, iouDiff = default_lInf;
    if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
--- a/modules/dnn/test/test_darknet_importer.cpp
+++ b/modules/dnn/test/test_darknet_importer.cpp
@ -307,8 +307,8 @@ TEST_P(Test_Darknet_nets, YoloVoc)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
 #endif
 #if defined(INF_ENGINE_RELEASE)
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD
-            && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
+    if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) &&
+        target == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);  // need to update check function
 #endif

@ -352,8 +352,8 @@ TEST_P(Test_Darknet_nets, TinyYoloVoc)
    applyTestTag(CV_TEST_TAG_MEMORY_512MB);

 #if defined(INF_ENGINE_RELEASE)
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD
-            && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
+    if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) &&
+        target == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);  // need to update check function
 #endif
    // batchId, classId, confidence, left, top, right, bottom
@ -486,7 +486,8 @@ TEST_P(Test_Darknet_nets, YOLOv3)
    std::string weights_file = "yolov3.weights";

 #if defined(INF_ENGINE_RELEASE)
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD &&
+    if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
+         backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_MYRIAD &&
        getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
    {
        scoreDiff = 0.04;
--- a/modules/dnn/test/test_halide_layers.cpp
+++ b/modules/dnn/test/test_halide_layers.cpp
@ -357,11 +357,6 @@ TEST_P(MaxPooling, Accuracy)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
 #endif

-#if defined(INF_ENGINE_RELEASE)
-    if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && stride != Size(1, 1) && pad != Size(0, 0))
-        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
-#endif
-
    LayerParams lp;
    lp.set("pool", "max");
    lp.set("kernel_w", kernel.width);
@ -399,7 +394,8 @@ TEST_P(FullyConnected, Accuracy)
    bool hasBias = get<3>(GetParam());
    Backend backendId = get<0>(get<4>(GetParam()));
    Target targetId = get<1>(get<4>(GetParam()));
-    if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && (targetId == DNN_TARGET_OPENCL_FP16 ||
+    if ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
+         backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && (targetId == DNN_TARGET_OPENCL_FP16 ||
       (targetId == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X))) {
        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
--- a/modules/dnn/test/test_ie_models.cpp
+++ b/modules/dnn/test/test_ie_models.cpp
@ -134,12 +134,13 @@ static const std::vector<std::string> getOpenVINOTestModelsList()
    return result;
 }

-static inline void genData(const std::vector<size_t>& dims, Mat& m, Blob::Ptr& dataPtr)
+static inline void genData(const InferenceEngine::TensorDesc& desc, Mat& m, Blob::Ptr& dataPtr)
 {
+    const std::vector<size_t>& dims = desc.getDims();
    m.create(std::vector<int>(dims.begin(), dims.end()), CV_32F);
    randu(m, -1, 1);

-    dataPtr = make_shared_blob<float>({Precision::FP32, dims, Layout::ANY}, (float*)m.data);
+    dataPtr = make_shared_blob<float>(desc, (float*)m.data);
 }

 void runIE(Target target, const std::string& xmlPath, const std::string& binPath,
@ -238,7 +239,7 @@ void runIE(Target target, const std::string& xmlPath, const std::string& binPath
    BlobMap inputBlobs;
    for (auto& it : net.getInputsInfo())
    {
-        genData(it.second->getTensorDesc().getDims(), inputsMap[it.first], inputBlobs[it.first]);
+        genData(it.second->getTensorDesc(), inputsMap[it.first], inputBlobs[it.first]);
    }
    infRequest.SetInput(inputBlobs);

@ -247,7 +248,7 @@ void runIE(Target target, const std::string& xmlPath, const std::string& binPath
    BlobMap outputBlobs;
    for (auto& it : net.getOutputsInfo())
    {
-        genData(it.second->getTensorDesc().getDims(), outputsMap[it.first], outputBlobs[it.first]);
+        genData(it.second->getTensorDesc(), outputsMap[it.first], outputBlobs[it.first]);
    }
    infRequest.SetOutput(outputBlobs);

--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@ -864,6 +864,8 @@ TEST_P(Test_Caffe_layers, PriorBox_squares)
 {
    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
    LayerParams lp;
    lp.name = "testPriorBox";
    lp.type = "PriorBox";
@ -1301,7 +1303,7 @@ static void test_dldt_fused_output(Backend backend, Target target)
    }
    net.setPreferableBackend(backend);
    net.setPreferableTarget(target);
-    net.setInput(Mat({1, 1, 1, 1}, CV_32FC1, Scalar(1)));
+    net.setInput(Mat({1, 1, 2, 3}, CV_32FC1, Scalar(1)));
    net.forward();
 }

@ -1340,7 +1342,7 @@ TEST_P(Test_DLDT_layers, multiple_networks)
        nets[i].addLayerToPrev(lp.name, lp.type, lp);
        nets[i].setPreferableBackend(backend);
        nets[i].setPreferableTarget(target);
-        nets[i].setInput(Mat({1, 1, 1, 1}, CV_32FC1, Scalar(1)));
+        nets[i].setInput(Mat({1, 1, 2, 3}, CV_32FC1, Scalar(1)));
    }
    Mat out_1 = nets[0].forward();
    Mat out_2 = nets[1].forward();
--- a/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@ -369,9 +369,12 @@ TEST_P(Test_ONNX_layers, Div)
    net.setPreferableBackend(backend);
    net.setPreferableTarget(target);

-    Mat inp1 = blobFromNPY(_tf("data/input_div_0.npy"));
-    Mat inp2 = blobFromNPY(_tf("data/input_div_1.npy"));
+    // Reference output values range is -68.80928, 2.991873. So to avoid computational
+    // difference for FP16 we'll perform reversed division (just swap inputs).
+    Mat inp1 = blobFromNPY(_tf("data/input_div_1.npy"));
+    Mat inp2 = blobFromNPY(_tf("data/input_div_0.npy"));
    Mat ref  = blobFromNPY(_tf("data/output_div.npy"));
+    cv::divide(1.0, ref, ref);
    checkBackend(&inp1, &ref);

    net.setInput(inp1, "0");
@ -421,6 +424,7 @@ TEST_P(Test_ONNX_layers, Softmax)
 {
    testONNXModels("softmax");
    testONNXModels("log_softmax", npy, 0, 0, false, false);
+    testONNXModels("softmax_unfused");
 }

 TEST_P(Test_ONNX_layers, Split_EltwiseMax)
@ -473,6 +477,9 @@ TEST_P(Test_ONNX_nets, Googlenet)
    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);

+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+
    const String model = _tf("models/googlenet.onnx", false);

    Net net = readNetFromONNX(model);
@ -516,7 +523,7 @@ TEST_P(Test_ONNX_nets, RCNN_ILSVRC13)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
 #endif
    // Reference output values are in range [-4.992, -1.161]
-    testONNXModels("rcnn_ilsvrc13", pb, 0.0045);
+    testONNXModels("rcnn_ilsvrc13", pb, 0.0046);
 }

 TEST_P(Test_ONNX_nets, VGG16_bn)
@ -583,10 +590,12 @@ TEST_P(Test_ONNX_nets, TinyYolov2)
    )
        applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);

-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD
-            && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X
+    if (target == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X
    )
-        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X,
+                     backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ?
+                     CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER :
+                     CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
 #endif

    // output range: [-11; 8]
@ -628,6 +637,12 @@ TEST_P(Test_ONNX_nets, LResNet100E_IR)
        if (target == DNN_TARGET_OPENCL)      applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
        if (target == DNN_TARGET_MYRIAD)      applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
    }
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+    {
+        if (target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+        if (target == DNN_TARGET_OPENCL)      applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+        if (target == DNN_TARGET_MYRIAD)      applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+    }

    double l1 = default_l1, lInf = default_lInf;
    // output range: [-3; 3]
@ -652,10 +667,11 @@ TEST_P(Test_ONNX_nets, LResNet100E_IR)
 TEST_P(Test_ONNX_nets, Emotion_ferplus)
 {
 #if defined(INF_ENGINE_RELEASE)
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD
-            && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X
-    )
-        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    if (target == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X,
+                     backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ?
+                     CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER :
+                     CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
 #endif

    double l1 = default_l1;
@ -692,7 +708,8 @@ TEST_P(Test_ONNX_nets, DenseNet121)
 TEST_P(Test_ONNX_nets, Inception_v1)
 {
 #if defined(INF_ENGINE_RELEASE)
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD)
+    if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
+         backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_MYRIAD)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
 #endif
    testONNXModels("inception_v1", pb);
--- a/modules/dnn/test/test_tf_importer.cpp
+++ b/modules/dnn/test/test_tf_importer.cpp
@ -261,10 +261,13 @@ TEST_P(Test_TensorFlow_layers, ave_pool_same)
 {
    // Reference output values are in range [-0.519531, 0.112976]
 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000)
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD
-            && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X
-    )
-        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
+    if (target == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
+    {
+        if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
+            applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
+        else if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+            applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
+    }
 #endif
    runTensorFlowNet("ave_pool_same");
 }
@ -399,6 +402,8 @@ TEST_P(Test_TensorFlow_layers, l2_normalize_3d)
 #if defined(INF_ENGINE_RELEASE)
    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
 #endif

    runTensorFlowNet("l2_normalize_3d");
@ -409,11 +414,15 @@ class Test_TensorFlow_nets : public DNNTestLayer {};
 TEST_P(Test_TensorFlow_nets, MobileNet_SSD)
 {
 #if defined(INF_ENGINE_RELEASE)
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD)
+    if (target == DNN_TARGET_MYRIAD)
    {
 #if INF_ENGINE_VER_MAJOR_GE(2019020000)
        if (getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
-            applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
+            applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X,
+                         backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ?
+                             CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER :
+                             CV_TEST_TAG_DNN_SKIP_IE_NGRAPH,
+                         CV_TEST_TAG_DNN_SKIP_IE_VERSION);
 #endif
    }
 #endif
@ -554,6 +563,10 @@ TEST_P(Test_TensorFlow_nets, Faster_RCNN)
    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 &&
        (INF_ENGINE_VER_MAJOR_LT(2019020000) || target != DNN_TARGET_CPU))
        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
+
+    if (INF_ENGINE_VER_MAJOR_GT(2019030000) &&
+        backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
 #endif
    // segfault: inference-engine/thirdparty/clDNN/src/gpu/detection_output_cpu.cpp:111:
    // Assertion `prior_height > 0' failed.
--- a/modules/dnn/test/test_torch_importer.cpp
+++ b/modules/dnn/test/test_torch_importer.cpp
@ -239,6 +239,8 @@ TEST_P(Test_Torch_layers, net_conv_gemm_lrn)
 {
    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
    double l1 = 0.0, lInf = 0.0;
    if (target == DNN_TARGET_OPENCL_FP16)
    {
@ -398,6 +400,13 @@ TEST_P(Test_Torch_nets, ENet_accuracy)
        if (target == DNN_TARGET_MYRIAD)      applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
        throw SkipTestException("");
    }
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target != DNN_TARGET_CPU)
+    {
+        if (target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+        if (target == DNN_TARGET_OPENCL)      applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+        if (target == DNN_TARGET_MYRIAD)      applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+        throw SkipTestException("");
+    }

    Net net;
    {
@ -450,6 +459,9 @@ TEST_P(Test_Torch_nets, FastNeuralStyle_accuracy)
    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD
            && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD
+            && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
 #endif

    checkBackend();
--- a/modules/photo/src/seamless_cloning_impl.cpp
+++ b/modules/photo/src/seamless_cloning_impl.cpp
@ -57,12 +57,8 @@ void Cloning::computeGradientX( const Mat &img, Mat &gx)
    }
    else if (img.channels() == 1)
    {
-        Mat tmp[3];
-        for(int chan = 0 ; chan < 3 ; ++chan)
-        {
-            filter2D(img, tmp[chan], CV_32F, kernel);
-        }
-        merge(tmp, 3, gx);
+        filter2D(img, gx, CV_32F, kernel);
+        cvtColor(gx, gx, COLOR_GRAY2BGR);
    }
 }

@ -78,12 +74,8 @@ void Cloning::computeGradientY( const Mat &img, Mat &gy)
    }
    else if (img.channels() == 1)
    {
-        Mat tmp[3];
-        for(int chan = 0 ; chan < 3 ; ++chan)
-        {
-            filter2D(img, tmp[chan], CV_32F, kernel);
-        }
-        merge(tmp, 3, gy);
+        filter2D(img, gy, CV_32F, kernel);
+        cvtColor(gy, gy, COLOR_GRAY2BGR);
    }
 }

--- a/modules/python/src2/cv2.cpp
+++ b/modules/python/src2/cv2.cpp
@ -13,11 +13,14 @@
 #   define Py_LIMITED_API 0x03030000
 #endif

-#include <math.h>
+#include <cmath>
 #include <Python.h>
+#include <limits>

 #if PY_MAJOR_VERSION < 3
 #undef CVPY_DYNAMIC_INIT
+#else
+#define CV_PYTHON_3 1
 #endif

 #if defined(_MSC_VER) && (_MSC_VER > 1800)
@ -40,16 +43,17 @@

 #include <type_traits>  // std::enable_if

+#define CV_HAS_CONVERSION_ERROR(x) (((x) == -1) && PyErr_Occurred())
+
+
 class ArgInfo
 {
 public:
-    const char * name;
+    const char* name;
    bool outputarg;
    // more fields may be added if necessary

-    ArgInfo(const char * name_, bool outputarg_)
-        : name(name_)
-        , outputarg(outputarg_) {}
+    ArgInfo(const char* name_, bool outputarg_) : name(name_), outputarg(outputarg_) {}

 private:
    ArgInfo(const ArgInfo&) = delete;
@ -162,6 +166,135 @@ catch (const cv::Exception &e) \

 using namespace cv;

+
+namespace {
+template<class T>
+NPY_TYPES asNumpyType()
+{
+    return NPY_OBJECT;
+}
+
+template<>
+NPY_TYPES asNumpyType<bool>()
+{
+    return NPY_BOOL;
+}
+
+#define CV_GENERATE_INTEGRAL_TYPE_NPY_CONVERSION(src, dst) \
+    template<>                                             \
+    NPY_TYPES asNumpyType<src>()                           \
+    {                                                      \
+        return NPY_##dst;                                  \
+    }                                                      \
+    template<>                                             \
+    NPY_TYPES asNumpyType<u##src>()                        \
+    {                                                      \
+        return NPY_U##dst;                                 \
+    }
+
+CV_GENERATE_INTEGRAL_TYPE_NPY_CONVERSION(int8_t, INT8);
+
+CV_GENERATE_INTEGRAL_TYPE_NPY_CONVERSION(int16_t, INT16);
+
+CV_GENERATE_INTEGRAL_TYPE_NPY_CONVERSION(int32_t, INT32);
+
+CV_GENERATE_INTEGRAL_TYPE_NPY_CONVERSION(int64_t, INT64);
+
+#undef CV_GENERATE_INTEGRAL_TYPE_NPY_CONVERSION
+
+template<>
+NPY_TYPES asNumpyType<float>()
+{
+    return NPY_FLOAT;
+}
+
+template<>
+NPY_TYPES asNumpyType<double>()
+{
+    return NPY_DOUBLE;
+}
+
+template <class T>
+PyArray_Descr* getNumpyTypeDescriptor()
+{
+    return PyArray_DescrFromType(asNumpyType<T>());
+}
+
+template <>
+PyArray_Descr* getNumpyTypeDescriptor<size_t>()
+{
+#if SIZE_MAX == ULONG_MAX
+    return PyArray_DescrFromType(NPY_ULONG);
+#elif SIZE_MAX == ULLONG_MAX
+    return PyArray_DescrFromType(NPY_ULONGLONG);
+#else
+    return PyArray_DescrFromType(NPY_UINT);
+#endif
+}
+
+template <class T, class U>
+bool isRepresentable(U value) {
+    return (std::numeric_limits<T>::min() <= value) && (value <= std::numeric_limits<T>::max());
+}
+
+template<class T>
+bool canBeSafelyCasted(PyObject* obj, PyArray_Descr* to)
+{
+    return PyArray_CanCastTo(PyArray_DescrFromScalar(obj), to) != 0;
+}
+
+
+template<>
+bool canBeSafelyCasted<size_t>(PyObject* obj, PyArray_Descr* to)
+{
+    PyArray_Descr* from = PyArray_DescrFromScalar(obj);
+    if (PyArray_CanCastTo(from, to))
+    {
+        return true;
+    }
+    else
+    {
+        // False negative scenarios:
+        // - Signed input is positive so it can be safely cast to unsigned output
+        // - Input has wider limits but value is representable within output limits
+        // - All the above
+        if (PyDataType_ISSIGNED(from))
+        {
+            int64_t input = 0;
+            PyArray_CastScalarToCtype(obj, &input, getNumpyTypeDescriptor<int64_t>());
+            return (input >= 0) && isRepresentable<size_t>(static_cast<uint64_t>(input));
+        }
+        else
+        {
+            uint64_t input = 0;
+            PyArray_CastScalarToCtype(obj, &input, getNumpyTypeDescriptor<uint64_t>());
+            return isRepresentable<size_t>(input);
+        }
+        return false;
+    }
+}
+
+
+template<class T>
+bool parseNumpyScalar(PyObject* obj, T& value)
+{
+    if (PyArray_CheckScalar(obj))
+    {
+        // According to the numpy documentation:
+        // There are 21 statically-defined PyArray_Descr objects for the built-in data-types
+        // So descriptor pointer is not owning.
+        PyArray_Descr* to = getNumpyTypeDescriptor<T>();
+        if (canBeSafelyCasted<T>(obj, to))
+        {
+            PyArray_CastScalarToCtype(obj, &value, to);
+            return true;
+        }
+    }
+    return false;
+}
+
+} // namespace
+
 typedef std::vector<uchar> vector_uchar;
 typedef std::vector<char> vector_char;
 typedef std::vector<int> vector_int;
@ -271,6 +404,11 @@ NumpyAllocator g_numpyAllocator;

 enum { ARG_NONE = 0, ARG_MAT = 1, ARG_SCALAR = 2 };

+static bool isBool(PyObject* obj) CV_NOEXCEPT
+{
+    return PyArray_IsScalar(obj, Bool) || PyBool_Check(obj);
+}
+
 // special case, when the converter needs full ArgInfo structure
 static bool pyopencv_to(PyObject* o, Mat& m, const ArgInfo& info)
 {
@ -581,14 +719,22 @@ PyObject* pyopencv_from(const bool& value)
 template<>
 bool pyopencv_to(PyObject* obj, bool& value, const ArgInfo& info)
 {
-    CV_UNUSED(info);
-    if(!obj || obj == Py_None)
+    if (!obj || obj == Py_None)
+    {
        return true;
-    int _val = PyObject_IsTrue(obj);
-    if(_val < 0)
-        return false;
-    value = _val > 0;
-    return true;
+    }
+    if (isBool(obj) || PyArray_IsIntegerScalar(obj))
+    {
+        npy_bool npy_value = NPY_FALSE;
+        const int ret_code = PyArray_BoolConverter(obj, &npy_value);
+        if (ret_code >= 0)
+        {
+            value = (npy_value == NPY_TRUE);
+            return true;
+        }
+    }
+    failmsg("Argument '%s' is not convertable to bool", info.name);
+    return false;
 }

 template<>
@ -600,11 +746,62 @@ PyObject* pyopencv_from(const size_t& value)
 template<>
 bool pyopencv_to(PyObject* obj, size_t& value, const ArgInfo& info)
 {
-    CV_UNUSED(info);
-    if(!obj || obj == Py_None)
+    if (!obj || obj == Py_None)
+    {
        return true;
-    value = (int)PyLong_AsUnsignedLong(obj);
-    return value != (size_t)-1 || !PyErr_Occurred();
+    }
+    if (isBool(obj))
+    {
+        failmsg("Argument '%s' must be integer type, not bool", info.name);
+        return false;
+    }
+    if (PyArray_IsIntegerScalar(obj))
+    {
+        if (PyLong_Check(obj))
+        {
+#if defined(CV_PYTHON_3)
+            value = PyLong_AsSize_t(obj);
+#else
+    #if ULONG_MAX == SIZE_MAX
+            value = PyLong_AsUnsignedLong(obj);
+    #else
+            value = PyLong_AsUnsignedLongLong(obj);
+    #endif
+#endif
+        }
+#if !defined(CV_PYTHON_3)
+        // Python 2.x has PyIntObject which is not a subtype of PyLongObject
+        // Overflow check here is unnecessary because object will be converted to long on the
+        // interpreter side
+        else if (PyInt_Check(obj))
+        {
+            const long res = PyInt_AsLong(obj);
+            if (res < 0) {
+                failmsg("Argument '%s' can not be safely parsed to 'size_t'", info.name);
+                return false;
+            }
+    #if ULONG_MAX == SIZE_MAX
+            value = PyInt_AsUnsignedLongMask(obj);
+    #else
+            value = PyInt_AsUnsignedLongLongMask(obj);
+    #endif
+        }
+#endif
+        else
+        {
+            const bool isParsed = parseNumpyScalar<size_t>(obj, value);
+            if (!isParsed) {
+                failmsg("Argument '%s' can not be safely parsed to 'size_t'", info.name);
+                return false;
+            }
+        }
+    }
+    else
+    {
+        failmsg("Argument '%s' is required to be an integer", info.name);
+        return false;
+    }
+    return !PyErr_Occurred();
 }

 template<>
@ -616,16 +813,25 @@ PyObject* pyopencv_from(const int& value)
 template<>
 bool pyopencv_to(PyObject* obj, int& value, const ArgInfo& info)
 {
-    CV_UNUSED(info);
-    if(!obj || obj == Py_None)
+    if (!obj || obj == Py_None)
+    {
        return true;
-    if(PyInt_Check(obj))
-        value = (int)PyInt_AsLong(obj);
-    else if(PyLong_Check(obj))
-        value = (int)PyLong_AsLong(obj);
+    }
+    if (isBool(obj))
+    {
+        failmsg("Argument '%s' must be integer, not bool", info.name);
+        return false;
+    }
+    if (PyArray_IsIntegerScalar(obj))
+    {
+        value = PyArray_PyIntAsInt(obj);
+    }
    else
+    {
+        failmsg("Argument '%s' is required to be an integer", info.name);
        return false;
-    return value != -1 || !PyErr_Occurred();
+    }
+    return !CV_HAS_CONVERSION_ERROR(value);
 }

 // There is conflict between "size_t" and "unsigned int".
@ -680,13 +886,39 @@ PyObject* pyopencv_from(const double& value)
 template<>
 bool pyopencv_to(PyObject* obj, double& value, const ArgInfo& info)
 {
-    CV_UNUSED(info);
-    if(!obj || obj == Py_None)
+    if (!obj || obj == Py_None)
+    {
        return true;
-    if(!!PyInt_CheckExact(obj))
-        value = (double)PyInt_AS_LONG(obj);
+    }
+    if (isBool(obj))
+    {
+        failmsg("Argument '%s' must be double, not bool", info.name);
+        return false;
+    }
+    if (PyArray_IsPythonNumber(obj))
+    {
+        if (PyLong_Check(obj))
+        {
+            value = PyLong_AsDouble(obj);
+        }
+        else
+        {
+            value = PyFloat_AsDouble(obj);
+        }
+    }
+    else if (PyArray_CheckScalar(obj))
+    {
+        const bool isParsed = parseNumpyScalar<double>(obj, value);
+        if (!isParsed) {
+            failmsg("Argument '%s' can not be safely parsed to 'double'", info.name);
+            return false;
+        }
+    }
    else
-        value = PyFloat_AsDouble(obj);
+    {
+        failmsg("Argument '%s' can not be treated as a double", info.name);
+        return false;
+    }
    return !PyErr_Occurred();
 }

@ -699,13 +931,41 @@ PyObject* pyopencv_from(const float& value)
 template<>
 bool pyopencv_to(PyObject* obj, float& value, const ArgInfo& info)
 {
-    CV_UNUSED(info);
-    if(!obj || obj == Py_None)
+    if (!obj || obj == Py_None)
+    {
        return true;
-    if(!!PyInt_CheckExact(obj))
-        value = (float)PyInt_AS_LONG(obj);
+    }
+    if (isBool(obj))
+    {
+        failmsg("Argument '%s' must be float, not bool", info.name);
+        return false;
+    }
+    if (PyArray_IsPythonNumber(obj))
+    {
+        if (PyLong_Check(obj))
+        {
+            double res = PyLong_AsDouble(obj);
+            value = static_cast<float>(res);
+        }
+        else
+        {
+            double res = PyFloat_AsDouble(obj);
+            value = static_cast<float>(res);
+        }
+    }
+    else if (PyArray_CheckScalar(obj))
+    {
+       const bool isParsed = parseNumpyScalar<float>(obj, value);
+        if (!isParsed) {
+            failmsg("Argument '%s' can not be safely parsed to 'float'", info.name);
+            return false;
+        }
+    }
    else
-        value = (float)PyFloat_AsDouble(obj);
+    {
+        failmsg("Argument '%s' can't be treated as a float", info.name);
+        return false;
+    }
    return !PyErr_Occurred();
 }

@ -1781,7 +2041,7 @@ static bool init_body(PyObject * m)
 #pragma GCC visibility push(default)
 #endif

-#if PY_MAJOR_VERSION >= 3
+#if defined(CV_PYTHON_3)
 // === Python 3

 static struct PyModuleDef cv2_moduledef =
--- a/modules/python/src2/gen2.py
+++ b/modules/python/src2/gen2.py
@ -4,12 +4,14 @@ from __future__ import print_function
 import hdr_parser, sys, re, os
 from string import Template
 from pprint import pprint
+from collections import namedtuple

 if sys.version_info[0] >= 3:
    from io import StringIO
 else:
    from cStringIO import StringIO

+
 forbidden_arg_types = ["void*"]

 ignored_arg_types = ["RNG*"]
@ -172,18 +174,48 @@ gen_template_prop_init = Template("""
 gen_template_rw_prop_init = Template("""
    {(char*)"${member}", (getter)pyopencv_${name}_get_${member}, (setter)pyopencv_${name}_set_${member}, (char*)"${member}", NULL},""")

+class FormatStrings:
+    string = 's'
+    unsigned_char = 'b'
+    short_int = 'h'
+    int = 'i'
+    unsigned_int = 'I'
+    long = 'l'
+    unsigned_long = 'k'
+    long_long = 'L'
+    unsigned_long_long = 'K'
+    size_t = 'n'
+    float = 'f'
+    double = 'd'
+    object = 'O'
+
+ArgTypeInfo = namedtuple('ArgTypeInfo',
+                        ['atype', 'format_str', 'default_value',
+                         'strict_conversion'])
+# strict_conversion is False by default
+ArgTypeInfo.__new__.__defaults__ = (False,)
+
 simple_argtype_mapping = {
-    "bool": ("bool", "b", "0"),
-    "size_t": ("size_t", "I", "0"),
-    "int": ("int", "i", "0"),
-    "float": ("float", "f", "0.f"),
-    "double": ("double", "d", "0"),
-    "c_string": ("char*", "s", '(char*)""')
+    "bool": ArgTypeInfo("bool", FormatStrings.unsigned_char, "0", True),
+    "size_t": ArgTypeInfo("size_t", FormatStrings.unsigned_long_long, "0", True),
+    "int": ArgTypeInfo("int", FormatStrings.int, "0", True),
+    "float": ArgTypeInfo("float", FormatStrings.float, "0.f", True),
+    "double": ArgTypeInfo("double", FormatStrings.double, "0", True),
+    "c_string": ArgTypeInfo("char*", FormatStrings.string, '(char*)""')
 }

+
 def normalize_class_name(name):
    return re.sub(r"^cv\.", "", name).replace(".", "_")

+
+def get_type_format_string(arg_type_info):
+    if arg_type_info.strict_conversion:
+        return FormatStrings.object
+    else:
+        return arg_type_info.format_str
+
+
 class ClassProp(object):
    def __init__(self, decl):
        self.tp = decl[0].replace("*", "_ptr")
@ -575,7 +607,7 @@ class FuncInfo(object):
                fullname = selfinfo.wname + "." + fullname

        all_code_variants = []
-        declno = -1
+
        for v in self.variants:
            code_decl = ""
            code_ret = ""
@ -583,7 +615,6 @@ class FuncInfo(object):

            code_args = "("
            all_cargs = []
-            parse_arglist = []

            if v.isphantom and ismethod and not self.is_static:
                code_args += "_self_"
@ -616,22 +647,22 @@ class FuncInfo(object):
                if any(tp in codegen.enums.keys() for tp in tp_candidates):
                    defval0 = "static_cast<%s>(%d)" % (a.tp, 0)

-                amapping = simple_argtype_mapping.get(tp, (tp, "O", defval0))
+                arg_type_info = simple_argtype_mapping.get(tp, ArgTypeInfo(tp, FormatStrings.object, defval0, True))
                parse_name = a.name
                if a.py_inputarg:
-                    if amapping[1] == "O":
+                    if arg_type_info.strict_conversion:
                        code_decl += "    PyObject* pyobj_%s = NULL;\n" % (a.name,)
                        parse_name = "pyobj_" + a.name
                        if a.tp == 'char':
-                            code_cvt_list.append("convert_to_char(pyobj_%s, &%s, %s)"% (a.name, a.name, a.crepr()))
+                            code_cvt_list.append("convert_to_char(pyobj_%s, &%s, %s)" % (a.name, a.name, a.crepr()))
                        else:
                            code_cvt_list.append("pyopencv_to(pyobj_%s, %s, %s)" % (a.name, a.name, a.crepr()))

-                all_cargs.append([amapping, parse_name])
+                all_cargs.append([arg_type_info, parse_name])

                defval = a.defval
                if not defval:
-                    defval = amapping[2]
+                    defval = arg_type_info.default_value
                else:
                    if "UMat" in tp:
                        if "Mat" in defval and "UMat" not in defval:
@ -640,14 +671,14 @@ class FuncInfo(object):
                        if "Mat" in defval and "GpuMat" not in defval:
                            defval = defval.replace("Mat", "cuda::GpuMat")
                # "tp arg = tp();" is equivalent to "tp arg;" in the case of complex types
-                if defval == tp + "()" and amapping[1] == "O":
+                if defval == tp + "()" and arg_type_info.format_str == FormatStrings.object:
                    defval = ""
                if a.outputarg and not a.inputarg:
                    defval = ""
                if defval:
-                    code_decl += "    %s %s=%s;\n" % (amapping[0], a.name, defval)
+                    code_decl += "    %s %s=%s;\n" % (arg_type_info.atype, a.name, defval)
                else:
-                    code_decl += "    %s %s;\n" % (amapping[0], a.name)
+                    code_decl += "    %s %s;\n" % (arg_type_info.atype, a.name)

                if not code_args.endswith("("):
                    code_args += ", "
@ -689,12 +720,16 @@ class FuncInfo(object):
            if v.rettype:
                tp = v.rettype
                tp1 = tp.replace("*", "_ptr")
-                amapping = simple_argtype_mapping.get(tp, (tp, "O", "0"))
-                all_cargs.append(amapping)
+                default_info = ArgTypeInfo(tp, FormatStrings.object, "0")
+                arg_type_info = simple_argtype_mapping.get(tp, default_info)
+                all_cargs.append(arg_type_info)

            if v.args and v.py_arglist:
                # form the format spec for PyArg_ParseTupleAndKeywords
-                fmtspec = "".join([all_cargs[argno][0][1] for aname, argno in v.py_arglist])
+                fmtspec = "".join([
+                    get_type_format_string(all_cargs[argno][0])
+                    for aname, argno in v.py_arglist
+                ])
                if v.py_noptargs > 0:
                    fmtspec = fmtspec[:-v.py_noptargs] + "|" + fmtspec[-v.py_noptargs:]
                fmtspec += ":" + fullname
@ -722,10 +757,6 @@ class FuncInfo(object):
            else:
                # there is more than 1 return parameter; form the tuple out of them
                fmtspec = "N"*len(v.py_outlist)
-                backcvt_arg_list = []
-                for aname, argno in v.py_outlist:
-                    amapping = all_cargs[argno][0]
-                    backcvt_arg_list.append("%s(%s)" % (amapping[2], aname))
                code_ret = "return Py_BuildValue(\"(%s)\", %s)" % \
                    (fmtspec, ", ".join(["pyopencv_from(" + aname + ")" for aname, argno in v.py_outlist]))

--- a/modules/python/test/test_misc.py
+++ b/modules/python/test/test_misc.py
@ -136,13 +136,12 @@ class Arguments(NewOpenCVTests):
                             msg=get_conversion_error_msg(convertible_false, 'bool: false', actual))

    def test_parse_to_bool_not_convertible(self):
-        for not_convertible in (1.2, np.float(2.3), 's', 'str', (1, 2), [1, 2], complex(1, 1), None,
+        for not_convertible in (1.2, np.float(2.3), 's', 'str', (1, 2), [1, 2], complex(1, 1),
                                complex(imag=2), complex(1.1), np.array([1, 0], dtype=np.bool)):
            with self.assertRaises((TypeError, OverflowError),
                                   msg=get_no_exception_msg(not_convertible)):
                _ = cv.utils.dumpBool(not_convertible)

-    @unittest.skip('Wrong conversion behavior')
    def test_parse_to_bool_convertible_extra(self):
        try_to_convert = partial(self._try_to_convert, cv.utils.dumpBool)
        _, max_size_t = get_limits(ctypes.c_size_t)
@ -151,7 +150,6 @@ class Arguments(NewOpenCVTests):
            self.assertEqual('bool: true', actual,
                             msg=get_conversion_error_msg(convertible_true, 'bool: true', actual))

-    @unittest.skip('Wrong conversion behavior')
    def test_parse_to_bool_not_convertible_extra(self):
        for not_convertible in (np.array([False]), np.array([True], dtype=np.bool)):
            with self.assertRaises((TypeError, OverflowError),
@ -172,12 +170,11 @@ class Arguments(NewOpenCVTests):
        min_int, max_int = get_limits(ctypes.c_int)
        for not_convertible in (1.2, np.float(4), float(3), np.double(45), 's', 'str',
                                np.array([1, 2]), (1,), [1, 2], min_int - 1, max_int + 1,
-                                complex(1, 1), complex(imag=2), complex(1.1), None):
+                                complex(1, 1), complex(imag=2), complex(1.1)):
            with self.assertRaises((TypeError, OverflowError, ValueError),
                                   msg=get_no_exception_msg(not_convertible)):
                _ = cv.utils.dumpInt(not_convertible)

-    @unittest.skip('Wrong conversion behavior')
    def test_parse_to_int_not_convertible_extra(self):
        for not_convertible in (np.bool_(True), True, False, np.float32(2.3),
                                np.array([3, ], dtype=int), np.array([-2, ], dtype=np.int32),
@ -189,7 +186,7 @@ class Arguments(NewOpenCVTests):
    def test_parse_to_size_t_convertible(self):
        try_to_convert = partial(self._try_to_convert, cv.utils.dumpSizeT)
        _, max_uint = get_limits(ctypes.c_uint)
-        for convertible in (2, True, False, max_uint, (12), np.uint8(34), np.int8(12), np.int16(23),
+        for convertible in (2, max_uint, (12), np.uint8(34), np.int8(12), np.int16(23),
                            np.int32(123), np.int64(344), np.uint64(3), np.uint16(2), np.uint32(5),
                            np.uint(44)):
            expected = 'size_t: {0:d}'.format(convertible).lower()
@ -198,14 +195,15 @@ class Arguments(NewOpenCVTests):
                             msg=get_conversion_error_msg(convertible, expected, actual))

    def test_parse_to_size_t_not_convertible(self):
-        for not_convertible in (1.2, np.float(4), float(3), np.double(45), 's', 'str',
-                                np.array([1, 2]), (1,), [1, 2], np.float64(6), complex(1, 1),
-                                complex(imag=2), complex(1.1), None):
+        min_long, _ = get_limits(ctypes.c_long)
+        for not_convertible in (1.2, True, False, np.bool_(True), np.float(4), float(3),
+                                np.double(45), 's', 'str', np.array([1, 2]), (1,), [1, 2],
+                                np.float64(6), complex(1, 1), complex(imag=2), complex(1.1),
+                                -1, min_long, np.int8(-35)):
            with self.assertRaises((TypeError, OverflowError),
                                   msg=get_no_exception_msg(not_convertible)):
                _ = cv.utils.dumpSizeT(not_convertible)

-    @unittest.skip('Wrong conversion behavior')
    def test_parse_to_size_t_convertible_extra(self):
        try_to_convert = partial(self._try_to_convert, cv.utils.dumpSizeT)
        _, max_size_t = get_limits(ctypes.c_size_t)
@ -215,7 +213,6 @@ class Arguments(NewOpenCVTests):
            self.assertEqual(expected, actual,
                             msg=get_conversion_error_msg(convertible, expected, actual))

-    @unittest.skip('Wrong conversion behavior')
    def test_parse_to_size_t_not_convertible_extra(self):
        for not_convertible in (np.bool_(True), True, False, np.array([123, ], dtype=np.uint8),):
            with self.assertRaises((TypeError, OverflowError),
@ -251,13 +248,12 @@ class Arguments(NewOpenCVTests):
                             msg=get_conversion_error_msg(inf, expected, actual))

    def test_parse_to_float_not_convertible(self):
-        for not_convertible in ('s', 'str', (12,), [1, 2], None, np.array([1, 2], dtype=np.float),
+        for not_convertible in ('s', 'str', (12,), [1, 2], np.array([1, 2], dtype=np.float),
                                np.array([1, 2], dtype=np.double), complex(1, 1), complex(imag=2),
                                complex(1.1)):
            with self.assertRaises((TypeError), msg=get_no_exception_msg(not_convertible)):
                _ = cv.utils.dumpFloat(not_convertible)

-    @unittest.skip('Wrong conversion behavior')
    def test_parse_to_float_not_convertible_extra(self):
        for not_convertible in (np.bool_(False), True, False, np.array([123, ], dtype=int),
                                np.array([1., ]), np.array([False]),
@ -289,13 +285,12 @@ class Arguments(NewOpenCVTests):
                          "Actual: {}".format(type(nan).__name__, actual))

    def test_parse_to_double_not_convertible(self):
-        for not_convertible in ('s', 'str', (12,), [1, 2], None, np.array([1, 2], dtype=np.float),
+        for not_convertible in ('s', 'str', (12,), [1, 2], np.array([1, 2], dtype=np.float),
                                np.array([1, 2], dtype=np.double), complex(1, 1), complex(imag=2),
                                complex(1.1)):
            with self.assertRaises((TypeError), msg=get_no_exception_msg(not_convertible)):
                _ = cv.utils.dumpDouble(not_convertible)

-    @unittest.skip('Wrong conversion behavior')
    def test_parse_to_double_not_convertible_extra(self):
        for not_convertible in (np.bool_(False), True, False, np.array([123, ], dtype=int),
                                np.array([1., ]), np.array([False]),
--- a/modules/python/test/test_norm.py
+++ b/modules/python/test/test_norm.py
@ -0,0 +1,173 @@
+#!/usr/bin/env python
+
+from itertools import product
+from functools import reduce
+
+import numpy as np
+import cv2 as cv
+
+from tests_common import NewOpenCVTests
+
+
+def norm_inf(x, y=None):
+    def norm(vec):
+        return np.linalg.norm(vec.flatten(), np.inf)
+
+    x = x.astype(np.float64)
+    return norm(x) if y is None else norm(x - y.astype(np.float64))
+
+
+def norm_l1(x, y=None):
+    def norm(vec):
+        return np.linalg.norm(vec.flatten(), 1)
+
+    x = x.astype(np.float64)
+    return norm(x) if y is None else norm(x - y.astype(np.float64))
+
+
+def norm_l2(x, y=None):
+    def norm(vec):
+        return np.linalg.norm(vec.flatten())
+
+    x = x.astype(np.float64)
+    return norm(x) if y is None else norm(x - y.astype(np.float64))
+
+
+def norm_l2sqr(x, y=None):
+    def norm(vec):
+        return np.square(vec).sum()
+
+    x = x.astype(np.float64)
+    return norm(x) if y is None else norm(x - y.astype(np.float64))
+
+
+def norm_hamming(x, y=None):
+    def norm(vec):
+        return sum(bin(i).count('1') for i in vec.flatten())
+
+    return norm(x) if y is None else norm(np.bitwise_xor(x, y))
+
+
+def norm_hamming2(x, y=None):
+    def norm(vec):
+        def element_norm(element):
+            binary_str = bin(element).split('b')[-1]
+            if len(binary_str) % 2 == 1:
+                binary_str = '0' + binary_str
+            gen = filter(lambda p: p != '00',
+                         (binary_str[i:i+2]
+                          for i in range(0, len(binary_str), 2)))
+            return sum(1 for _ in gen)
+
+        return sum(element_norm(element) for element in vec.flatten())
+
+    return norm(x) if y is None else norm(np.bitwise_xor(x, y))
+
+
+norm_type_under_test = {
+    cv.NORM_INF: norm_inf,
+    cv.NORM_L1: norm_l1,
+    cv.NORM_L2: norm_l2,
+    cv.NORM_L2SQR: norm_l2sqr,
+    cv.NORM_HAMMING: norm_hamming,
+    cv.NORM_HAMMING2: norm_hamming2
+}
+
+norm_name = {
+    cv.NORM_INF: 'inf',
+    cv.NORM_L1: 'L1',
+    cv.NORM_L2: 'L2',
+    cv.NORM_L2SQR: 'L2SQR',
+    cv.NORM_HAMMING: 'Hamming',
+    cv.NORM_HAMMING2: 'Hamming2'
+}
+
+
+def get_element_types(norm_type):
+    if norm_type in (cv.NORM_HAMMING, cv.NORM_HAMMING2):
+        return (np.uint8,)
+    else:
+        return (np.uint8, np.int8, np.uint16, np.int16, np.int32, np.float32,
+                np.float64)
+
+
+def generate_vector(shape, dtype):
+    if np.issubdtype(dtype, np.integer):
+        return np.random.randint(0, 100, shape).astype(dtype)
+    else:
+        return np.random.normal(10., 12.5, shape).astype(dtype)
+
+
+shapes = (1, 2, 3, 5, 7, 16, (1, 1), (2, 2), (3, 5), (1, 7))
+
+
+class norm_test(NewOpenCVTests):
+
+    def test_norm_for_one_array(self):
+        np.random.seed(123)
+        for norm_type, norm in norm_type_under_test.items():
+            element_types = get_element_types(norm_type)
+            for shape, element_type in product(shapes, element_types):
+                array = generate_vector(shape, element_type)
+                expected = norm(array)
+                actual = cv.norm(array, norm_type)
+                self.assertAlmostEqual(
+                    expected, actual, places=2,
+                    msg='Array {0} of {1} and norm {2}'.format(
+                        array, element_type.__name__, norm_name[norm_type]
+                    )
+                )
+
+    def test_norm_for_two_arrays(self):
+        np.random.seed(456)
+        for norm_type, norm in norm_type_under_test.items():
+            element_types = get_element_types(norm_type)
+            for shape, element_type in product(shapes, element_types):
+                first = generate_vector(shape, element_type)
+                second = generate_vector(shape, element_type)
+                expected = norm(first, second)
+                actual = cv.norm(first, second, norm_type)
+                self.assertAlmostEqual(
+                    expected, actual, places=2,
+                    msg='Arrays {0} {1} of type {2} and norm {3}'.format(
+                        first, second, element_type.__name__,
+                        norm_name[norm_type]
+                    )
+                )
+
+    def test_norm_fails_for_wrong_type(self):
+        for norm_type in (cv.NORM_HAMMING, cv.NORM_HAMMING2):
+            with self.assertRaises(Exception,
+                                   msg='Type is not checked {0}'.format(
+                                       norm_name[norm_type]
+                                   )):
+                cv.norm(np.array([1, 2], dtype=np.int32), norm_type)
+
+    def test_norm_fails_for_array_and_scalar(self):
+        for norm_type in norm_type_under_test:
+            with self.assertRaises(Exception,
+                                   msg='Exception is not thrown for {0}'.format(
+                                       norm_name[norm_type]
+                                   )):
+                cv.norm(np.array([1, 2], dtype=np.uint8), 123, norm_type)
+
+    def test_norm_fails_for_scalar_and_array(self):
+        for norm_type in norm_type_under_test:
+            with self.assertRaises(Exception,
+                                   msg='Exception is not thrown for {0}'.format(
+                                       norm_name[norm_type]
+                                   )):
+                cv.norm(4, np.array([1, 2], dtype=np.uint8), norm_type)
+
+    def test_norm_fails_for_array_and_norm_type_as_scalar(self):
+        for norm_type in norm_type_under_test:
+            with self.assertRaises(Exception,
+                                   msg='Exception is not thrown for {0}'.format(
+                                       norm_name[norm_type]
+                                   )):
+                cv.norm(np.array([3, 4, 5], dtype=np.uint8),
+                        norm_type, normType=norm_type)
+
+
+if __name__ == '__main__':
+    NewOpenCVTests.bootstrap()
--- a/samples/python/digits.py
+++ b/samples/python/digits.py
@ -70,13 +70,8 @@ def deskew(img):
    img = cv.warpAffine(img, M, (SZ, SZ), flags=cv.WARP_INVERSE_MAP | cv.INTER_LINEAR)
    return img

-class StatModel(object):
-    def load(self, fn):
-        self.model.load(fn)  # Known bug: https://github.com/opencv/opencv/issues/4969
-    def save(self, fn):
-        self.model.save(fn)

-class KNearest(StatModel):
+class KNearest(object):
    def __init__(self, k = 3):
        self.k = k
        self.model = cv.ml.KNearest_create()
@ -88,7 +83,13 @@ class KNearest(StatModel):
        _retval, results, _neigh_resp, _dists = self.model.findNearest(samples, self.k)
        return results.ravel()

-class SVM(StatModel):
+    def load(self, fn):
+        self.model = cv.ml.KNearest_load(fn)
+
+    def save(self, fn):
+        self.model.save(fn)
+
+class SVM(object):
    def __init__(self, C = 1, gamma = 0.5):
        self.model = cv.ml.SVM_create()
        self.model.setGamma(gamma)
@ -102,6 +103,11 @@ class SVM(StatModel):
    def predict(self, samples):
        return self.model.predict(samples)[1].ravel()

+    def load(self, fn):
+        self.model = cv.ml.SVM_load(fn)
+
+    def save(self, fn):
+        self.model.save(fn)

 def evaluate_model(model, digits, samples, labels):
    resp = model.predict(samples)
--- a/samples/python/digits_video.py
+++ b/samples/python/digits_video.py
@ -1,4 +1,12 @@
 #!/usr/bin/env python
+'''
+Digit recognition from video.
+
+Run digits.py before, to train and save the SVM.
+
+Usage:
+  digits_video.py [{camera_id|video_file}]
+'''

 # Python 2/3 compatibility
 from __future__ import print_function
@ -28,11 +36,7 @@ def main():
        print('"%s" not found, run digits.py first' % classifier_fn)
        return

-    if True:
-        model = cv.ml.SVM_load(classifier_fn)
-    else:
-        model = cv.ml.SVM_create()
-        model.load_(classifier_fn) #Known bug: https://github.com/opencv/opencv/issues/4969
+    model = cv.ml.SVM_load(classifier_fn)

    while True:
        _ret, frame = cap.read()