From f3eef792eb6ea5c54c8ecb2cbcd30d514f62ec4f Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Sun, 16 Feb 2020 22:12:14 +0300 Subject: [PATCH] Enable Mask R-CNN with Inference Engine. Full coverage with nGraph --- modules/dnn/src/dnn.cpp | 32 +++++++++++---- modules/dnn/src/ie_ngraph.cpp | 9 ++-- .../dnn/src/layers/crop_and_resize_layer.cpp | 41 +++++++++++++++++++ modules/dnn/src/layers/scale_layer.cpp | 39 ++++++++++-------- modules/dnn/test/test_tf_importer.cpp | 31 ++++++++++++-- 5 files changed, 119 insertions(+), 33 deletions(-) diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index be3ce435b3..9e2d255bb0 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -1294,13 +1294,15 @@ struct Net::Impl #endif clear(); + this->blobsToKeep = blobsToKeep_; + allocateLayers(blobsToKeep_); MapIdToLayerData::iterator it = layers.find(0); CV_Assert(it != layers.end()); it->second.skip = netInputLayer->skip; - initBackend(); + initBackend(blobsToKeep_); if (!netWasAllocated ) { @@ -1313,7 +1315,6 @@ struct Net::Impl } netWasAllocated = true; - this->blobsToKeep = blobsToKeep_; if (DNN_NETWORK_DUMP > 0) { @@ -1440,7 +1441,7 @@ struct Net::Impl ldOut.consumers.push_back(LayerPin(inLayerId, outNum)); } - void initBackend() + void initBackend(const std::vector& blobsToKeep_) { CV_TRACE_FUNCTION(); if (preferableBackend == DNN_BACKEND_OPENCV) @@ -1450,7 +1451,7 @@ struct Net::Impl else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) { #ifdef HAVE_INF_ENGINE - initInfEngineBackend(); + initInfEngineBackend(blobsToKeep_); #else CV_Assert(false && "This OpenCV version is built without Inference Engine API support"); #endif @@ -1458,7 +1459,7 @@ struct Net::Impl else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { #ifdef HAVE_DNN_NGRAPH - initNgraphBackend(); + initNgraphBackend(blobsToKeep_); #else CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of Inference Engine + nGraph"); #endif @@ -1560,7 +1561,7 @@ struct Net::Impl } } - void initInfEngineBackend() + void initInfEngineBackend(const std::vector& blobsToKeep_) { CV_TRACE_FUNCTION(); CV_Assert_N(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, haveInfEngine()); @@ -1750,6 +1751,15 @@ struct Net::Impl CV_Assert(!ieNode.empty()); ieNode->net = net; + for (const auto& pin : blobsToKeep_) + { + if (pin.lid == ld.id) + { + ieNode->net->addOutput(ieNode->layer.getName()); + break; + } + } + // Convert weights in FP16 for specific targets. if ((preferableTarget == DNN_TARGET_OPENCL_FP16 || preferableTarget == DNN_TARGET_MYRIAD || @@ -1856,7 +1866,7 @@ struct Net::Impl } } - void initNgraphBackend() + void initNgraphBackend(const std::vector& blobsToKeep_) { CV_TRACE_FUNCTION(); CV_Assert_N(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, haveInfEngine()); @@ -2045,6 +2055,14 @@ struct Net::Impl // TF EAST_text_detection ieNode->net->setUnconnectedNodes(ieNode); } + for (const auto& pin : blobsToKeep_) + { + if (pin.lid == ld.id) + { + ieNode->net->addOutput(ieNode->node->get_friendly_name()); + break; + } + } ieNode->net->setNodePtr(&ieNode->node); net->addBlobs(ld.inputBlobsWrappers); diff --git a/modules/dnn/src/ie_ngraph.cpp b/modules/dnn/src/ie_ngraph.cpp index 2a00880c42..d7df547412 100644 --- a/modules/dnn/src/ie_ngraph.cpp +++ b/modules/dnn/src/ie_ngraph.cpp @@ -231,11 +231,10 @@ void InfEngineNgraphNet::init(Target targetId) } } } - } else { - for (const auto& name : requestedOutputs) - { - cnn.addOutput(name); - } + } + for (const auto& name : requestedOutputs) + { + cnn.addOutput(name); } for (const auto& it : cnn.getInputsInfo()) diff --git a/modules/dnn/src/layers/crop_and_resize_layer.cpp b/modules/dnn/src/layers/crop_and_resize_layer.cpp index de87107d43..ba11c33508 100644 --- a/modules/dnn/src/layers/crop_and_resize_layer.cpp +++ b/modules/dnn/src/layers/crop_and_resize_layer.cpp @@ -5,6 +5,7 @@ // Copyright (C) 2018, Intel Corporation, all rights reserved. // Third party copyrights are property of their respective owners. #include "../precomp.hpp" +#include "../ie_ngraph.hpp" #include "layers_common.hpp" namespace cv { namespace dnn { @@ -20,6 +21,11 @@ public: outHeight = params.get("height"); } + virtual bool supportBackend(int backendId) CV_OVERRIDE + { + return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; + } + bool getMemoryShapes(const std::vector &inputs, const int requiredOutputs, std::vector &outputs, @@ -111,6 +117,41 @@ public: } } +#ifdef HAVE_DNN_NGRAPH + virtual Ptr initNgraph(const std::vector >& inputs, + const std::vector >& nodes) CV_OVERRIDE + { + // Slice second input: from 1x1xNx7 to 1x1xNx5 + auto input = nodes[0].dynamicCast()->node; + auto rois = nodes[1].dynamicCast()->node; + + std::vector dims = rois->get_shape(), offsets(4, 0); + offsets[3] = 2; + dims[3] = 7; + + auto lower_bounds = std::make_shared(ngraph::element::i64, + ngraph::Shape{offsets.size()}, offsets.data()); + auto upper_bounds = std::make_shared(ngraph::element::i64, + ngraph::Shape{dims.size()}, dims.data()); + auto strides = std::make_shared(ngraph::element::i64, + ngraph::Shape{dims.size()}, std::vector((int64_t)dims.size(), 1)); + auto slice = std::make_shared(rois, + lower_bounds, upper_bounds, strides, std::vector{}, std::vector{}); + + // Reshape rois from 4D to 2D + std::vector shapeData = {dims[2], 5}; + auto shape = std::make_shared(ngraph::element::i64, ngraph::Shape{2}, shapeData.data()); + auto reshape = std::make_shared(slice, shape, true); + + auto roiPooling = + std::make_shared(input, reshape, + ngraph::Shape{(size_t)outHeight, (size_t)outWidth}, + 1.0f, "bilinear"); + + return Ptr(new InfEngineNgraphNode(roiPooling)); + } +#endif // HAVE_DNN_NGRAPH + private: int outWidth, outHeight; }; diff --git a/modules/dnn/src/layers/scale_layer.cpp b/modules/dnn/src/layers/scale_layer.cpp index 620b65741c..ea2d117901 100644 --- a/modules/dnn/src/layers/scale_layer.cpp +++ b/modules/dnn/src/layers/scale_layer.cpp @@ -53,7 +53,8 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE || - ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && axis == 1); + (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && axis == 1) || + (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && axis > 0); } void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE @@ -233,22 +234,26 @@ public: auto ieInpNode = nodes[0].dynamicCast()->node; std::vector shape(ieInpNode->get_shape().size(), 1); - shape[1] = numChannels; - auto weight = hasWeights ? - std::make_shared(ngraph::element::f32, - ngraph::Shape(shape), blobs[0].data) : - std::make_shared(ngraph::element::f32, - ngraph::Shape(shape), std::vector(numChannels, 1).data()); - - auto bias = hasBias ? - std::make_shared(ngraph::element::f32, - ngraph::Shape(shape), blobs.back().data) : - std::make_shared(ngraph::element::f32, - ngraph::Shape(shape), std::vector(numChannels, 0).data()); - - auto scale_node = std::make_shared(ieInpNode, weight, ngraph::op::AutoBroadcastType::NUMPY); - auto scale_shift = std::make_shared(scale_node, bias, ngraph::op::AutoBroadcastType::NUMPY); - return Ptr(new InfEngineNgraphNode(scale_shift)); + int cAxis = clamp(axis, shape.size()); + shape[cAxis] = numChannels; + + auto node = ieInpNode; + if (hasWeights) + { + auto weight = std::make_shared(ngraph::element::f32, + ngraph::Shape(shape), blobs[0].data); + node = std::make_shared(node, weight, ngraph::op::AutoBroadcastType::NUMPY); + } + if (hasBias || !hasWeights) + { + auto bias = hasBias ? + std::make_shared(ngraph::element::f32, + ngraph::Shape(shape), blobs.back().data) : + std::make_shared(ngraph::element::f32, + ngraph::Shape(shape), std::vector(numChannels, 0).data()); + node = std::make_shared(node, bias, ngraph::op::AutoBroadcastType::NUMPY); + } + return Ptr(new InfEngineNgraphNode(node)); } #endif // HAVE_DNN_NGRAPH diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 0abb1a4b1c..ecbf776184 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -914,8 +914,16 @@ TEST(Test_TensorFlow, two_inputs) normAssert(out, firstInput + secondInput); } -TEST(Test_TensorFlow, Mask_RCNN) +TEST_P(Test_TensorFlow_nets, Mask_RCNN) { + static const double kMaskThreshold = 0.5; + + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + + if (target == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X); + applyTestTag(CV_TEST_TAG_MEMORY_1GB, CV_TEST_TAG_DEBUG_VERYLONG); Mat img = imread(findDataFile("dnn/street.png")); std::string proto = findDataFile("dnn/mask_rcnn_inception_v2_coco_2018_01_28.pbtxt"); @@ -926,7 +934,8 @@ TEST(Test_TensorFlow, Mask_RCNN) Mat refMasks = blobFromNPY(path("mask_rcnn_inception_v2_coco_2018_01_28.detection_masks.npy")); Mat blob = blobFromImage(img, 1.0f, Size(800, 800), Scalar(), true, false); - net.setPreferableBackend(DNN_BACKEND_OPENCV); + net.setPreferableBackend(backend); + net.setPreferableTarget(target); net.setInput(blob); @@ -940,7 +949,10 @@ TEST(Test_TensorFlow, Mask_RCNN) Mat outDetections = outs[0]; Mat outMasks = outs[1]; - normAssertDetections(refDetections, outDetections, "", /*threshold for zero confidence*/1e-5); + + double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.019 : 2e-5; + double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.018 : default_lInf; + normAssertDetections(refDetections, outDetections, "", /*threshold for zero confidence*/1e-5, scoreDiff, iouDiff); // Output size of masks is NxCxHxW where // N - number of detected boxes @@ -964,7 +976,18 @@ TEST(Test_TensorFlow, Mask_RCNN) outMasks(srcRanges).copyTo(masks(dstRanges)); } cv::Range topRefMasks[] = {Range::all(), Range(0, numDetections), Range::all(), Range::all()}; - normAssert(masks, refMasks(&topRefMasks[0])); + refMasks = refMasks(&topRefMasks[0]); + + // make binary masks + cv::threshold(masks.reshape(1, 1), masks, kMaskThreshold, 1, THRESH_BINARY); + cv::threshold(refMasks.reshape(1, 1), refMasks, kMaskThreshold, 1, THRESH_BINARY); + + double inter = cv::countNonZero(masks & refMasks); + double area = cv::countNonZero(masks | refMasks); + EXPECT_GE(inter / area, 0.99); + + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + expectNoFallbacks(net); } }