From 9fef09fe89090117bbe56905796dd8d05b482229 Mon Sep 17 00:00:00 2001 From: Josh Bradley Date: Sat, 23 May 2020 14:25:01 -0400 Subject: [PATCH 01/13] Merge pull request #17320 from jgbradley1:add-eigen-tensor-conversions * add eigen tensor conversion functions * add eigen tensor conversion tests * add support for column major order * update eigen tensor tests * fix coding style and add conditional compilation * fix conditional compilation checks * remove whitespace * rearrange functions for easier reading * reformat function documentation and add tensormap unit test * cleanup documentation of unit test * remove condition duplication * check Eigen major version, not minor version * restrict to Eigen v3.3.0+ * add documentation note and add type checking to cv2eigen_tensormap() --- modules/core/include/opencv2/core/eigen.hpp | 106 ++++++++++++++++++++ modules/core/test/test_mat.cpp | 80 +++++++++++++++ 2 files changed, 186 insertions(+) diff --git a/modules/core/include/opencv2/core/eigen.hpp b/modules/core/include/opencv2/core/eigen.hpp index 741648edb8..8c250efe4c 100644 --- a/modules/core/include/opencv2/core/eigen.hpp +++ b/modules/core/include/opencv2/core/eigen.hpp @@ -47,6 +47,11 @@ #include "opencv2/core.hpp" +#if EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3 +#include +#define OPENCV_EIGEN_TENSOR_SUPPORT +#endif // EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3 + #if defined _MSC_VER && _MSC_VER >= 1200 #pragma warning( disable: 4714 ) //__forceinline is not inlined #pragma warning( disable: 4127 ) //conditional expression is constant @@ -59,6 +64,107 @@ namespace cv //! @addtogroup core_eigen //! @{ +#ifdef OPENCV_EIGEN_TENSOR_SUPPORT +/** @brief Converts an Eigen::Tensor to a cv::Mat. + +The method converts an Eigen::Tensor with shape (H x W x C) to a cv::Mat where: + H = number of rows + W = number of columns + C = number of channels + +Usage: +\code +Eigen::Tensor a_tensor(...); +// populate tensor with values +Mat a_mat; +eigen2cv(a_tensor, a_mat); +\endcode +*/ +template static inline +void eigen2cv( const Eigen::Tensor<_Tp, 3, _layout> &src, OutputArray dst ) +{ + if( !(_layout & Eigen::RowMajorBit) ) + { + const std::array shuffle{2, 1, 0}; + Eigen::Tensor<_Tp, 3, !_layout> row_major_tensor = src.swap_layout().shuffle(shuffle); + Mat _src(src.dimension(0), src.dimension(1), CV_MAKETYPE(DataType<_Tp>::type, src.dimension(2)), row_major_tensor.data()); + _src.copyTo(dst); + } + else + { + Mat _src(src.dimension(0), src.dimension(1), CV_MAKETYPE(DataType<_Tp>::type, src.dimension(2)), (void *)src.data()); + _src.copyTo(dst); + } +} + +/** @brief Converts a cv::Mat to an Eigen::Tensor. + +The method converts a cv::Mat to an Eigen Tensor with shape (H x W x C) where: + H = number of rows + W = number of columns + C = number of channels + +Usage: +\code +Mat a_mat(...); +// populate Mat with values +Eigen::Tensor a_tensor(...); +cv2eigen(a_mat, a_tensor); +\endcode +*/ +template static inline +void cv2eigen( const Mat &src, Eigen::Tensor<_Tp, 3, _layout> &dst ) +{ + if( !(_layout & Eigen::RowMajorBit) ) + { + Eigen::Tensor<_Tp, 3, !_layout> row_major_tensor(src.rows, src.cols, src.channels()); + Mat _dst(src.rows, src.cols, CV_MAKETYPE(DataType<_Tp>::type, src.channels()), row_major_tensor.data()); + if (src.type() == _dst.type()) + src.copyTo(_dst); + else + src.convertTo(_dst, _dst.type()); + const std::array shuffle{2, 1, 0}; + dst = row_major_tensor.swap_layout().shuffle(shuffle); + } + else + { + dst.resize(src.rows, src.cols, src.channels()); + Mat _dst(src.rows, src.cols, CV_MAKETYPE(DataType<_Tp>::type, src.channels()), dst.data()); + if (src.type() == _dst.type()) + src.copyTo(_dst); + else + src.convertTo(_dst, _dst.type()); + } +} + +/** @brief Maps cv::Mat data to an Eigen::TensorMap. + +The method wraps an existing Mat data array with an Eigen TensorMap of shape (H x W x C) where: + H = number of rows + W = number of columns + C = number of channels + +Explicit instantiation of the return type is required. + +@note Caller should be aware of the lifetime of the cv::Mat instance and take appropriate safety measures. +The cv::Mat instance will retain ownership of the data and the Eigen::TensorMap will lose access when the cv::Mat data is deallocated. + +The example below initializes a cv::Mat and produces an Eigen::TensorMap: +\code +float arr[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; +Mat a_mat(2, 2, CV_32FC3, arr); +Eigen::TensorMap> a_tensormap = cv2eigen_tensormap(a_mat); +\endcode +*/ +template static inline +Eigen::TensorMap> cv2eigen_tensormap(const cv::InputArray &src) +{ + Mat mat = src.getMat(); + CV_CheckTypeEQ(mat.type(), CV_MAKETYPE(traits::Type<_Tp>::value, mat.channels()), ""); + return Eigen::TensorMap>((_Tp *)mat.data, mat.rows, mat.cols, mat.channels()); +} +#endif // OPENCV_EIGEN_TENSOR_SUPPORT + template static inline void eigen2cv( const Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& src, OutputArray dst ) { diff --git a/modules/core/test/test_mat.cpp b/modules/core/test/test_mat.cpp index 58eafd0748..43e0078ec8 100644 --- a/modules/core/test/test_mat.cpp +++ b/modules/core/test/test_mat.cpp @@ -2084,6 +2084,86 @@ TEST(Core_Eigen, eigen2cv_check_Mat_type) } #endif // HAVE_EIGEN +#ifdef OPENCV_EIGEN_TENSOR_SUPPORT +TEST(Core_Eigen, cv2eigen_check_tensor_conversion) +{ + Mat A(2, 3, CV_32FC3); + float value = 0; + for(int row=0; row(row,col)[ch] = value++; + + Eigen::Tensor row_tensor; + cv2eigen(A, row_tensor); + + float* mat_ptr = (float*)A.data; + float* tensor_ptr = row_tensor.data(); + for (int i=0; i< row_tensor.size(); i++) + ASSERT_FLOAT_EQ(mat_ptr[i], tensor_ptr[i]); + + Eigen::Tensor col_tensor; + cv2eigen(A, col_tensor); + value = 0; + for(int row=0; row row_tensor(2,3,3); + Eigen::Tensor col_tensor(2,3,3); + float value = 0; + for(int row=0; row(row,col)[ch]); +} +#endif // OPENCV_EIGEN_TENSOR_SUPPORT + +#ifdef OPENCV_EIGEN_TENSOR_SUPPORT +TEST(Core_Eigen, cv2eigen_tensormap_check_tensormap_access) +{ + float arr[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + Mat a_mat(2, 2, CV_32FC3, arr); + Eigen::TensorMap> a_tensor = cv2eigen_tensormap(a_mat); + + for(int i=0; i(i,j)[ch], a_tensor(i,j,ch)); + ASSERT_EQ(&a_mat.at(i,j)[ch], &a_tensor(i,j,ch)); + } + } + } +} +#endif // OPENCV_EIGEN_TENSOR_SUPPORT + TEST(Mat, regression_12943) // memory usage: ~4.5 Gb { applyTestTag(CV_TEST_TAG_MEMORY_6GB); From 5393185add52af1cdd929f203084364e3ae2f66e Mon Sep 17 00:00:00 2001 From: "Michal W. Tarnowski" Date: Mon, 25 May 2020 00:46:41 +0200 Subject: [PATCH 02/13] Merge pull request #17360 from mwtarnowski:fix-documentation-imgproc-blur * fix documentation for cv::blur * correct the position of ksize parameter --- modules/imgproc/include/opencv2/imgproc.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp index 21bb012dcc..f6b5339194 100644 --- a/modules/imgproc/include/opencv2/imgproc.hpp +++ b/modules/imgproc/include/opencv2/imgproc.hpp @@ -1507,7 +1507,7 @@ The function smooths an image using the kernel: \f[\texttt{K} = \frac{1}{\texttt{ksize.width*ksize.height}} \begin{bmatrix} 1 & 1 & 1 & \cdots & 1 & 1 \\ 1 & 1 & 1 & \cdots & 1 & 1 \\ \hdotsfor{6} \\ 1 & 1 & 1 & \cdots & 1 & 1 \\ \end{bmatrix}\f] -The call `blur(src, dst, ksize, anchor, borderType)` is equivalent to `boxFilter(src, dst, src.type(), +The call `blur(src, dst, ksize, anchor, borderType)` is equivalent to `boxFilter(src, dst, src.type(), ksize, anchor, true, borderType)`. @param src input image; it can have any number of channels, which are processed independently, but From d5e8792f5502e451951b2ab5209838d753a7c369 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Mon, 25 May 2020 15:34:11 +0300 Subject: [PATCH 03/13] Merge pull request #17332 from l-bat:fix_nms Fixed NMSBoxes bug * Added NMS for each class * Updated cpp sample * Fixed errors * Refactoring * Added NMS for IE --- samples/dnn/object_detection.cpp | 56 +++++++++++++++++++++++++++----- samples/dnn/object_detection.py | 23 ++++++++++--- 2 files changed, 65 insertions(+), 14 deletions(-) diff --git a/samples/dnn/object_detection.cpp b/samples/dnn/object_detection.cpp index 83ed10db5d..c7e42430fe 100644 --- a/samples/dnn/object_detection.cpp +++ b/samples/dnn/object_detection.cpp @@ -45,7 +45,7 @@ std::vector classes; inline void preprocess(const Mat& frame, Net& net, Size inpSize, float scale, const Scalar& mean, bool swapRB); -void postprocess(Mat& frame, const std::vector& out, Net& net); +void postprocess(Mat& frame, const std::vector& out, Net& net, int backend); void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame); @@ -148,7 +148,8 @@ int main(int argc, char** argv) // Load a model. Net net = readNet(modelPath, configPath, parser.get("framework")); - net.setPreferableBackend(parser.get("backend")); + int backend = parser.get("backend"); + net.setPreferableBackend(backend); net.setPreferableTarget(parser.get("target")); std::vector outNames = net.getUnconnectedOutLayersNames(); @@ -245,7 +246,7 @@ int main(int argc, char** argv) std::vector outs = predictionsQueue.get(); Mat frame = processedFramesQueue.get(); - postprocess(frame, outs, net); + postprocess(frame, outs, net, backend); if (predictionsQueue.counter > 1) { @@ -285,7 +286,7 @@ int main(int argc, char** argv) std::vector outs; net.forward(outs, outNames); - postprocess(frame, outs, net); + postprocess(frame, outs, net, backend); // Put efficiency information. std::vector layersTimes; @@ -319,7 +320,7 @@ inline void preprocess(const Mat& frame, Net& net, Size inpSize, float scale, } } -void postprocess(Mat& frame, const std::vector& outs, Net& net) +void postprocess(Mat& frame, const std::vector& outs, Net& net, int backend) { static std::vector outLayers = net.getUnconnectedOutLayers(); static std::string outLayerType = net.getLayer(outLayers[0])->type; @@ -396,11 +397,48 @@ void postprocess(Mat& frame, const std::vector& outs, Net& net) else CV_Error(Error::StsNotImplemented, "Unknown output layer type: " + outLayerType); - std::vector indices; - NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices); - for (size_t i = 0; i < indices.size(); ++i) + // NMS is used inside Region layer only on DNN_BACKEND_OPENCV for another backends we need NMS in sample + // or NMS is required if number of outputs > 1 + if (outLayers.size() > 1 || (outLayerType == "Region" && backend != DNN_BACKEND_OPENCV)) + { + std::map > class2indices; + for (size_t i = 0; i < classIds.size(); i++) + { + if (confidences[i] >= confThreshold) + { + class2indices[classIds[i]].push_back(i); + } + } + std::vector nmsBoxes; + std::vector nmsConfidences; + std::vector nmsClassIds; + for (std::map >::iterator it = class2indices.begin(); it != class2indices.end(); ++it) + { + std::vector localBoxes; + std::vector localConfidences; + std::vector classIndices = it->second; + for (size_t i = 0; i < classIndices.size(); i++) + { + localBoxes.push_back(boxes[classIndices[i]]); + localConfidences.push_back(confidences[classIndices[i]]); + } + std::vector nmsIndices; + NMSBoxes(localBoxes, localConfidences, confThreshold, nmsThreshold, nmsIndices); + for (size_t i = 0; i < nmsIndices.size(); i++) + { + size_t idx = nmsIndices[i]; + nmsBoxes.push_back(localBoxes[idx]); + nmsConfidences.push_back(localConfidences[idx]); + nmsClassIds.push_back(it->first); + } + } + boxes = nmsBoxes; + classIds = nmsClassIds; + confidences = nmsConfidences; + } + + for (size_t idx = 0; idx < boxes.size(); ++idx) { - int idx = indices[i]; Rect box = boxes[idx]; drawPred(classIds[idx], confidences[idx], box.x, box.y, box.x + box.width, box.y + box.height, frame); diff --git a/samples/dnn/object_detection.py b/samples/dnn/object_detection.py index d4ea40f935..babac0dbe8 100644 --- a/samples/dnn/object_detection.py +++ b/samples/dnn/object_detection.py @@ -141,9 +141,6 @@ def postprocess(frame, outs): # Network produces output blob with a shape NxC where N is a number of # detected objects and C is a number of classes + 4 where the first 4 # numbers are [center_x, center_y, width, height] - classIds = [] - confidences = [] - boxes = [] for out in outs: for detection in out: scores = detection[5:] @@ -163,9 +160,25 @@ def postprocess(frame, outs): print('Unknown output layer type: ' + lastLayer.type) exit() - indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold) + # NMS is used inside Region layer only on DNN_BACKEND_OPENCV for another backends we need NMS in sample + # or NMS is required if number of outputs > 1 + if len(outNames) > 1 or lastLayer.type == 'Region' and args.backend != cv.dnn.DNN_BACKEND_OPENCV: + indices = [] + classIds = np.array(classIds) + boxes = np.array(boxes) + confidences = np.array(confidences) + unique_classes = set(classIds) + for cl in unique_classes: + class_indices = np.where(classIds == cl)[0] + conf = confidences[class_indices] + box = boxes[class_indices].tolist() + nms_indices = cv.dnn.NMSBoxes(box, conf, confThreshold, nmsThreshold) + nms_indices = nms_indices[:, 0] if len(nms_indices) else [] + indices.extend(class_indices[nms_indices]) + else: + indices = np.arange(0, len(classIds)) + for i in indices: - i = i[0] box = boxes[i] left = box[0] top = box[1] From 1bec7ca540541dc1e6dc7c9dc72303d0480365f7 Mon Sep 17 00:00:00 2001 From: Egor Pugin Date: Mon, 25 May 2020 23:25:18 +0300 Subject: [PATCH 04/13] Merge pull request #17352 from egorpugin:patch-2 * Fix integer overflow in parseOption(). Previous code does not work for values like 100000MB. * Fix warning during 32-bit build on inactive code path. * fix build without C++11 --- modules/core/src/system.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index 3c0588c355..fcb9ea45ef 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -1961,7 +1961,11 @@ inline size_t parseOption(const std::string &value) } cv::String valueStr = value.substr(0, pos); cv::String suffixStr = value.substr(pos, value.length() - pos); - int v = atoi(valueStr.c_str()); +#ifdef CV_CXX11 + size_t v = (size_t)std::stoull(valueStr); +#else + size_t v = (size_t)atol(valueStr.c_str()); +#endif if (suffixStr.length() == 0) return v; else if (suffixStr == "MB" || suffixStr == "Mb" || suffixStr == "mb") From f0bef94a03cdbcc2e35d81fbec08723818dd8511 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Tue, 26 May 2020 12:45:55 +0000 Subject: [PATCH 05/13] dnn: update network dump code, include ngraph serialization --- modules/dnn/src/dnn.cpp | 34 ++++++++++++++++++++++------------ modules/dnn/src/dnn_common.hpp | 34 ++++++++++++++++++++++++++++++++++ modules/dnn/src/ie_ngraph.cpp | 34 ++++++++++++++++++++++++++++++---- modules/dnn/src/ie_ngraph.hpp | 6 ++++-- modules/dnn/src/precomp.hpp | 9 +-------- 5 files changed, 91 insertions(+), 26 deletions(-) create mode 100644 modules/dnn/src/dnn_common.hpp diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index a671501749..0cc53926e8 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -1082,17 +1082,26 @@ static Ptr wrapMat(int backendId, int targetId, cv::Mat& m) static int g_networkId = 0; -struct Net::Impl +detail::NetImplBase::NetImplBase() + : networkId(CV_XADD(&g_networkId, 1)) + , networkDumpCounter(0) + , dumpLevel(DNN_NETWORK_DUMP) +{ + // nothing +} + +std::string detail::NetImplBase::getDumpFileNameBase() +{ + std::string dumpFileNameBase = cv::format("ocv_dnn_net_%05d_%02d", networkId, networkDumpCounter++); + return dumpFileNameBase; +} + +struct Net::Impl : public detail::NetImplBase { typedef std::map LayersShapesMap; typedef std::map MapIdToLayerData; - const int networkId; // network global identifier - int networkDumpCounter; // dump counter - Impl() - : networkId(CV_XADD(&g_networkId, 1)) - , networkDumpCounter(0) { //allocate fake net input layer netInputLayer = Ptr(new DataLayer()); @@ -1256,7 +1265,7 @@ struct Net::Impl { CV_TRACE_FUNCTION(); - if (DNN_NETWORK_DUMP > 0 && networkDumpCounter == 0) + if (dumpLevel && networkDumpCounter == 0) { dumpNetworkToFile(); } @@ -1339,7 +1348,7 @@ struct Net::Impl netWasAllocated = true; - if (DNN_NETWORK_DUMP > 0) + if (dumpLevel) { dumpNetworkToFile(); } @@ -2043,7 +2052,7 @@ struct Net::Impl } if (net.empty()) { - net = Ptr(new InfEngineNgraphNet()); + net = Ptr(new InfEngineNgraphNet(*this)); } if (!fused) { @@ -2087,7 +2096,7 @@ struct Net::Impl } } else { - net = Ptr(new InfEngineNgraphNet()); + net = Ptr(new InfEngineNgraphNet(*this)); } if (!fused) @@ -3126,7 +3135,8 @@ struct Net::Impl void dumpNetworkToFile() { #ifndef OPENCV_DNN_DISABLE_NETWORK_AUTO_DUMP - String dumpFileName = cv::format("ocv_dnn_net_%05d_%02d.dot", networkId, networkDumpCounter++); + string dumpFileNameBase = getDumpFileNameBase(); + string dumpFileName = dumpFileNameBase + ".dot"; try { string dumpStr = dump(); @@ -3185,7 +3195,7 @@ Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNe { auto fake_node = std::make_shared(ngraph::element::f32, ngraph::Shape{}); Ptr backendNodeNGraph(new InfEngineNgraphNode(fake_node)); - backendNodeNGraph->net = Ptr(new InfEngineNgraphNet(ieNet)); + backendNodeNGraph->net = Ptr(new InfEngineNgraphNet(*(cvNet.impl), ieNet)); backendNode = backendNodeNGraph; } else diff --git a/modules/dnn/src/dnn_common.hpp b/modules/dnn/src/dnn_common.hpp new file mode 100644 index 0000000000..cd6cea0c6b --- /dev/null +++ b/modules/dnn/src/dnn_common.hpp @@ -0,0 +1,34 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef __OPENCV_DNN_COMMON_HPP__ +#define __OPENCV_DNN_COMMON_HPP__ + +#include + +namespace cv { namespace dnn { +CV__DNN_EXPERIMENTAL_NS_BEGIN +#define IS_DNN_OPENCL_TARGET(id) (id == DNN_TARGET_OPENCL || id == DNN_TARGET_OPENCL_FP16) +Mutex& getInitializationMutex(); +void initializeLayerFactory(); + +namespace detail { + +struct NetImplBase +{ + const int networkId; // network global identifier + int networkDumpCounter; // dump counter + int dumpLevel; // level of information dumps (initialized through OPENCV_DNN_NETWORK_DUMP parameter) + + NetImplBase(); + + std::string getDumpFileNameBase(); +}; + +} // namespace detail + +CV__DNN_EXPERIMENTAL_NS_END +}} // namespace + +#endif // __OPENCV_DNN_COMMON_HPP__ diff --git a/modules/dnn/src/ie_ngraph.cpp b/modules/dnn/src/ie_ngraph.cpp index 05e8205251..cf94500a8c 100644 --- a/modules/dnn/src/ie_ngraph.cpp +++ b/modules/dnn/src/ie_ngraph.cpp @@ -6,6 +6,9 @@ // Third party copyrights are property of their respective owners. #include "precomp.hpp" + +#include + #include "ie_ngraph.hpp" #include @@ -22,6 +25,8 @@ namespace cv { namespace dnn { #ifdef HAVE_DNN_NGRAPH +static bool DNN_IE_SERIALIZE = utils::getConfigurationParameterBool("OPENCV_DNN_IE_SERIALIZE", false); + // For networks with input layer which has an empty name, IE generates a name id[some_number]. // OpenCV lets users use an empty input name and to prevent unexpected naming, // we can use some predefined name. @@ -295,13 +300,16 @@ void InfEngineNgraphNode::setName(const std::string& name) { node->set_friendly_name(name); } -InfEngineNgraphNet::InfEngineNgraphNet() +InfEngineNgraphNet::InfEngineNgraphNet(detail::NetImplBase& netImpl) + : netImpl_(netImpl) { hasNetOwner = false; device_name = "CPU"; } -InfEngineNgraphNet::InfEngineNgraphNet(InferenceEngine::CNNNetwork& net) : cnn(net) +InfEngineNgraphNet::InfEngineNgraphNet(detail::NetImplBase& netImpl, InferenceEngine::CNNNetwork& net) + : netImpl_(netImpl) + , cnn(net) { hasNetOwner = true; device_name = "CPU"; @@ -440,9 +448,27 @@ void InfEngineNgraphNet::init(Target targetId) ngraph_function->validate_nodes_and_infer_types(); } cnn = InferenceEngine::CNNNetwork(ngraph_function); -#ifdef _DEBUG // TODO - //cnn.serialize("/tmp/cnn.xml", "/tmp/cnn.bin"); + + if (DNN_IE_SERIALIZE) + { +#ifndef OPENCV_DNN_DISABLE_NETWORK_AUTO_DUMP + std::string dumpFileNameBase = netImpl_.getDumpFileNameBase(); + try + { + cnn.serialize(dumpFileNameBase + "_ngraph.xml", dumpFileNameBase + "_ngraph.bin"); + } + catch (const std::exception& e) + { + std::ofstream out((dumpFileNameBase + "_ngraph.error").c_str(), std::ios::out); + out << "Exception: " << e.what() << std::endl; + } + catch (...) + { + std::ofstream out((dumpFileNameBase + "_ngraph.error").c_str(), std::ios::out); + out << "Can't dump: unknown exception" << std::endl; + } #endif + } } switch (targetId) diff --git a/modules/dnn/src/ie_ngraph.hpp b/modules/dnn/src/ie_ngraph.hpp index efbdafa7d9..7a8c4bef8d 100644 --- a/modules/dnn/src/ie_ngraph.hpp +++ b/modules/dnn/src/ie_ngraph.hpp @@ -34,8 +34,8 @@ class InfEngineNgraphNode; class InfEngineNgraphNet { public: - InfEngineNgraphNet(); - InfEngineNgraphNet(InferenceEngine::CNNNetwork& net); + InfEngineNgraphNet(detail::NetImplBase& netImpl); + InfEngineNgraphNet(detail::NetImplBase& netImpl, InferenceEngine::CNNNetwork& net); void addOutput(const std::string& name); @@ -55,6 +55,8 @@ public: void reset(); private: + detail::NetImplBase& netImpl_; + void release(); int getNumComponents(); void dfs(std::shared_ptr& node, std::vector>& comp, diff --git a/modules/dnn/src/precomp.hpp b/modules/dnn/src/precomp.hpp index f6230c4c6d..62f8714af1 100644 --- a/modules/dnn/src/precomp.hpp +++ b/modules/dnn/src/precomp.hpp @@ -61,11 +61,4 @@ #include #include - -namespace cv { namespace dnn { -CV__DNN_EXPERIMENTAL_NS_BEGIN -#define IS_DNN_OPENCL_TARGET(id) (id == DNN_TARGET_OPENCL || id == DNN_TARGET_OPENCL_FP16) -Mutex& getInitializationMutex(); -void initializeLayerFactory(); -CV__DNN_EXPERIMENTAL_NS_END -}} // namespace +#include "dnn_common.hpp" From b236f107923f4b6ce6a18532ef4983ee77fcc6e3 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Tue, 26 May 2020 16:59:50 +0300 Subject: [PATCH 06/13] Switch ngraph::op::v1::Multiply to v0 --- modules/dnn/src/layers/normalize_bbox_layer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/dnn/src/layers/normalize_bbox_layer.cpp b/modules/dnn/src/layers/normalize_bbox_layer.cpp index b546a96101..7ce7b37d1e 100644 --- a/modules/dnn/src/layers/normalize_bbox_layer.cpp +++ b/modules/dnn/src/layers/normalize_bbox_layer.cpp @@ -345,7 +345,7 @@ public: weight = std::make_shared( ngraph::element::f32, ngraph::Shape(shape), blobs[0].data); } - auto mul = std::make_shared(norm, weight, ngraph::op::AutoBroadcastType::NUMPY); + auto mul = std::make_shared(norm, weight, ngraph::op::AutoBroadcastType::NUMPY); return Ptr(new InfEngineNgraphNode(mul)); } #endif // HAVE_DNN_NGRAPH From ba3cf4760069b53b2388155e15b095e0897d9659 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Tue, 26 May 2020 22:01:47 +0300 Subject: [PATCH 07/13] Merge pull request #17386 from l-bat:tf_clamp_subgraph * Added ClipByValue subgraph * Return const nodes --- .../dnn/src/tensorflow/tf_graph_simplifier.cpp | 16 ++++++++++++++++ modules/dnn/test/test_tf_importer.cpp | 5 +++++ 2 files changed, 21 insertions(+) diff --git a/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp b/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp index 99b3d7ac2f..ef9e68a873 100644 --- a/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp +++ b/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp @@ -725,6 +725,21 @@ private: bool negativeScales; }; +class ClipByValueSubgraph : public TFSubgraph +{ +public: + ClipByValueSubgraph() + { + int input = addNodeToMatch(""); + int maxValue = addNodeToMatch("Const"); + int minimum = addNodeToMatch("Minimum", input, maxValue); + int minValue = addNodeToMatch("Const"); + addNodeToMatch("Maximum", minimum, minValue); + + setFusedNode("ClipByValue", input, minValue, maxValue); + } +}; + void simplifySubgraphs(tensorflow::GraphDef& net) { std::vector > subgraphs; @@ -749,6 +764,7 @@ void simplifySubgraphs(tensorflow::GraphDef& net) subgraphs.push_back(Ptr(new PReLUSubgraph(false))); subgraphs.push_back(Ptr(new FlattenProdSubgraph())); subgraphs.push_back(Ptr(new ResizeBilinearSubgraphDown())); + subgraphs.push_back(Ptr(new ClipByValueSubgraph())); for (int i = 0; i < net.node_size(); ++i) { diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index b20b2a58ff..fd5da62a93 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -977,6 +977,11 @@ TEST_P(Test_TensorFlow_layers, tf2_dense) runTensorFlowNet("tf2_dense"); } +TEST_P(Test_TensorFlow_layers, clip_by_value) +{ + runTensorFlowNet("clip_by_value"); +} + TEST_P(Test_TensorFlow_layers, tf2_prelu) { if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) From fef6192bcab37c1d5746ce6efd68f6942a0a18ef Mon Sep 17 00:00:00 2001 From: Ningxin Hu Date: Wed, 27 May 2020 16:20:07 +0800 Subject: [PATCH 08/13] Merge pull request #17394 from huningxin:fix_segmentation_py * Fix window title of python segmentation example * Fix float text position of python segmentation examples --- samples/dnn/mask_rcnn.py | 2 +- samples/dnn/segmentation.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/samples/dnn/mask_rcnn.py b/samples/dnn/mask_rcnn.py index a67f19519e..352dfd191a 100644 --- a/samples/dnn/mask_rcnn.py +++ b/samples/dnn/mask_rcnn.py @@ -43,7 +43,7 @@ def showLegend(classes): for i in range(len(classes)): block = legend[i * blockHeight:(i + 1) * blockHeight] block[:,:] = colors[i] - cv.putText(block, classes[i], (0, blockHeight/2), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255)) + cv.putText(block, classes[i], (0, blockHeight//2), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255)) cv.namedWindow('Legend', cv.WINDOW_NORMAL) cv.imshow('Legend', legend) diff --git a/samples/dnn/segmentation.py b/samples/dnn/segmentation.py index a926ca27b3..1a228c63aa 100644 --- a/samples/dnn/segmentation.py +++ b/samples/dnn/segmentation.py @@ -65,7 +65,7 @@ def showLegend(classes): for i in range(len(classes)): block = legend[i * blockHeight:(i + 1) * blockHeight] block[:,:] = colors[i] - cv.putText(block, classes[i], (0, blockHeight/2), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255)) + cv.putText(block, classes[i], (0, blockHeight//2), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255)) cv.namedWindow('Legend', cv.WINDOW_NORMAL) cv.imshow('Legend', legend) @@ -76,7 +76,7 @@ net = cv.dnn.readNet(args.model, args.config, args.framework) net.setPreferableBackend(args.backend) net.setPreferableTarget(args.target) -winName = 'Deep learning image classification in OpenCV' +winName = 'Deep learning semantic segmentation in OpenCV' cv.namedWindow(winName, cv.WINDOW_NORMAL) cap = cv.VideoCapture(args.input if args.input else 0) From 2d11edd1031b8651cbedef40937ed58972eb05a9 Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Wed, 27 May 2020 17:52:17 +0300 Subject: [PATCH 09/13] videoio/MSMF: fixed audio stream handling --- modules/videoio/src/cap_msmf.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/modules/videoio/src/cap_msmf.cpp b/modules/videoio/src/cap_msmf.cpp index 119873beb0..15b1d2ade7 100644 --- a/modules/videoio/src/cap_msmf.cpp +++ b/modules/videoio/src/cap_msmf.cpp @@ -487,12 +487,14 @@ public: } } } - std::pair findBest(const MediaType& newType) + std::pair findBestVideoFormat(const MediaType& newType) { std::pair best; std::map::const_iterator i = formats.begin(); for (; i != formats.end(); ++i) { + if (i->second.majorType != MFMediaType_Video) + continue; if (newType.isEmpty()) // file input - choose first returned media type { best = *i; @@ -770,7 +772,12 @@ bool CvCapture_MSMF::configureOutput(MediaType newType, cv::uint32_t outFormat) { FormatStorage formats; formats.read(videoFileSource.Get()); - std::pair bestMatch = formats.findBest(newType); + std::pair bestMatch = formats.findBestVideoFormat(newType); + if (bestMatch.second.isEmpty()) + { + CV_LOG_DEBUG(NULL, "Can not find video stream with requested parameters"); + return false; + } dwStreamIndex = bestMatch.first.stream; nativeFormat = bestMatch.second; MediaType newFormat = nativeFormat; From 4a9904fe62db1b7829e14e6f398f6ed2cf632ec2 Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Thu, 28 May 2020 13:59:31 +0300 Subject: [PATCH 10/13] Command name typo fix for formulas in documentation. --- doc/mymath.sty | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/mymath.sty b/doc/mymath.sty index 04aedf6302..738c7e9afc 100644 --- a/doc/mymath.sty +++ b/doc/mymath.sty @@ -28,7 +28,7 @@ #3 & \mbox{#4}\\ #5 & \mbox{#6}\\ \end{array} \right.} -\newcommand{\forkthree}[8]{ +\newcommand{\forkfour}[8]{ \left\{ \begin{array}{l l} #1 & \mbox{#2}\\ From 6b89154afde2c80e5ab7d67891c13ada8759eb5d Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Tue, 26 May 2020 16:20:32 +0000 Subject: [PATCH 11/13] dnn(test): add YOLOv4 tests --- modules/dnn/perf/perf_net.cpp | 18 +++- modules/dnn/test/test_darknet_importer.cpp | 109 ++++++++++++++++++--- 2 files changed, 113 insertions(+), 14 deletions(-) diff --git a/modules/dnn/perf/perf_net.cpp b/modules/dnn/perf/perf_net.cpp index d4c962e741..5573a440e3 100644 --- a/modules/dnn/perf/perf_net.cpp +++ b/modules/dnn/perf/perf_net.cpp @@ -197,9 +197,23 @@ PERF_TEST_P_(DNNTestNetwork, YOLOv3) if (backend == DNN_BACKEND_HALIDE) throw SkipTestException(""); Mat sample = imread(findDataFile("dnn/dog416.png")); + cvtColor(sample, sample, COLOR_BGR2RGB); Mat inp; - sample.convertTo(inp, CV_32FC3); - processNet("dnn/yolov3.weights", "dnn/yolov3.cfg", "", inp / 255); + sample.convertTo(inp, CV_32FC3, 1.0f / 255, 0); + processNet("dnn/yolov3.weights", "dnn/yolov3.cfg", "", inp); +} + +PERF_TEST_P_(DNNTestNetwork, YOLOv4) +{ + if (backend == DNN_BACKEND_HALIDE) + throw SkipTestException(""); + if (target == DNN_TARGET_MYRIAD) + throw SkipTestException(""); + Mat sample = imread(findDataFile("dnn/dog416.png")); + cvtColor(sample, sample, COLOR_BGR2RGB); + Mat inp; + sample.convertTo(inp, CV_32FC3, 1.0f / 255, 0); + processNet("dnn/yolov4.weights", "dnn/yolov4.cfg", "", inp); } PERF_TEST_P_(DNNTestNetwork, EAST_text_detection) diff --git a/modules/dnn/test/test_darknet_importer.cpp b/modules/dnn/test/test_darknet_importer.cpp index 244bc80157..607761ed2f 100644 --- a/modules/dnn/test/test_darknet_importer.cpp +++ b/modules/dnn/test/test_darknet_importer.cpp @@ -245,6 +245,13 @@ public: nms_boxes.push_back(box); nms_confidences.push_back(conf); nms_classIds.push_back(class_id); +#if 0 // use to update test reference data + std::cout << b << ", " << class_id << ", " << conf << "f, " + << box.x << "f, " << box.y << "f, " + << box.x + box.width << "f, " << box.y + box.height << "f," + << std::endl; +#endif + } normAssertDetections(refClassIds[b], refConfidences[b], refBoxes[b], nms_classIds, @@ -395,6 +402,9 @@ TEST_P(Test_Darknet_nets_async, Accuracy) std::string prefix = get<0>(GetParam()); + if (targetId == DNN_TARGET_MYRIAD && prefix == "yolov4") // NC_OUT_OF_MEMORY + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + if (backendId != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) throw SkipTestException("No support for async forward"); @@ -439,7 +449,7 @@ TEST_P(Test_Darknet_nets_async, Accuracy) } INSTANTIATE_TEST_CASE_P(/**/, Test_Darknet_nets_async, Combine( - Values("yolo-voc", "tiny-yolo-voc", "yolov3"), + Values("yolo-voc", "tiny-yolo-voc", "yolov3", "yolov4"), dnnBackendsAndTargets() )); @@ -453,15 +463,21 @@ TEST_P(Test_Darknet_nets, YOLOv3) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); // batchId, classId, confidence, left, top, right, bottom - Mat ref = (Mat_(9, 7) << 0, 7, 0.952983f, 0.614622f, 0.150257f, 0.901369f, 0.289251f, // a truck - 0, 1, 0.987908f, 0.150913f, 0.221933f, 0.742255f, 0.74626f, // a bicycle - 0, 16, 0.998836f, 0.160024f, 0.389964f, 0.417885f, 0.943716f, // a dog (COCO) - 1, 9, 0.384801f, 0.659824f, 0.372389f, 0.673926f, 0.429412f, // a traffic light - 1, 9, 0.733283f, 0.376029f, 0.315694f, 0.401776f, 0.395165f, // a traffic light - 1, 9, 0.785352f, 0.665503f, 0.373543f, 0.688893f, 0.439245f, // a traffic light - 1, 0, 0.980052f, 0.195856f, 0.378454f, 0.258626f, 0.629258f, // a person - 1, 2, 0.989633f, 0.450719f, 0.463353f, 0.496305f, 0.522258f, // a car - 1, 2, 0.997412f, 0.647584f, 0.459939f, 0.821038f, 0.663947f); // a car + const int N0 = 3; + const int N1 = 6; + static const float ref_[/* (N0 + N1) * 7 */] = { +0, 16, 0.998836f, 0.160024f, 0.389964f, 0.417885f, 0.943716f, +0, 1, 0.987908f, 0.150913f, 0.221933f, 0.742255f, 0.746261f, +0, 7, 0.952983f, 0.614621f, 0.150257f, 0.901368f, 0.289251f, + +1, 2, 0.997412f, 0.647584f, 0.459939f, 0.821037f, 0.663947f, +1, 2, 0.989633f, 0.450719f, 0.463353f, 0.496306f, 0.522258f, +1, 0, 0.980053f, 0.195856f, 0.378454f, 0.258626f, 0.629257f, +1, 9, 0.785341f, 0.665503f, 0.373543f, 0.688893f, 0.439244f, +1, 9, 0.733275f, 0.376029f, 0.315694f, 0.401776f, 0.395165f, +1, 9, 0.384815f, 0.659824f, 0.372389f, 0.673927f, 0.429412f, + }; + Mat ref(N0 + N1, 7, CV_32FC1, (void*)ref_); double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.006 : 8e-5; double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.042 : 3e-4; @@ -480,8 +496,8 @@ TEST_P(Test_Darknet_nets, YOLOv3) #endif { - SCOPED_TRACE("batch size 1"); - testDarknetModel(config_file, weights_file, ref.rowRange(0, 3), scoreDiff, iouDiff); + SCOPED_TRACE("batch size 1"); + testDarknetModel(config_file, weights_file, ref.rowRange(0, N0), scoreDiff, iouDiff); } #if defined(INF_ENGINE_RELEASE) @@ -503,6 +519,75 @@ TEST_P(Test_Darknet_nets, YOLOv3) } } +TEST_P(Test_Darknet_nets, YOLOv4) +{ + applyTestTag(CV_TEST_TAG_LONG, (target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_1GB : CV_TEST_TAG_MEMORY_2GB)); + +#if defined(INF_ENGINE_RELEASE) + if (target == DNN_TARGET_MYRIAD) // NC_OUT_OF_MEMORY + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + + // batchId, classId, confidence, left, top, right, bottom + const int N0 = 3; + const int N1 = 7; + static const float ref_[/* (N0 + N1) * 7 */] = { +0, 16, 0.992194f, 0.172375f, 0.402458f, 0.403918f, 0.932801f, +0, 1, 0.988326f, 0.166708f, 0.228236f, 0.737208f, 0.735803f, +0, 7, 0.94639f, 0.602523f, 0.130399f, 0.901623f, 0.298452f, + +1, 2, 0.99761f, 0.646556f, 0.45985f, 0.816041f, 0.659067f, +1, 0, 0.988913f, 0.201726f, 0.360282f, 0.266181f, 0.631728f, +1, 2, 0.98233f, 0.452007f, 0.462217f, 0.495612f, 0.521687f, +1, 9, 0.919195f, 0.374642f, 0.316524f, 0.398126f, 0.393714f, +1, 9, 0.856303f, 0.666842f, 0.372215f, 0.685539f, 0.44141f, +1, 9, 0.313516f, 0.656791f, 0.374734f, 0.671959f, 0.438371f, +1, 9, 0.256625f, 0.940232f, 0.326931f, 0.967586f, 0.374002f, + }; + Mat ref(N0 + N1, 7, CV_32FC1, (void*)ref_); + + double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.006 : 8e-5; + double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.042 : 3e-4; + + std::string config_file = "yolov4.cfg"; + std::string weights_file = "yolov4.weights"; + +#if defined(INF_ENGINE_RELEASE) + if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || + backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_MYRIAD && + getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) + { + scoreDiff = 0.04; + iouDiff = 0.2; + } +#endif + + { + SCOPED_TRACE("batch size 1"); + testDarknetModel(config_file, weights_file, ref.rowRange(0, N0), scoreDiff, iouDiff); + } + + { + SCOPED_TRACE("batch size 2"); + +#if defined(INF_ENGINE_RELEASE) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + { + if (target == DNN_TARGET_OPENCL) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + else if (target == DNN_TARGET_OPENCL_FP16 && INF_ENGINE_VER_MAJOR_LE(202010000)) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + else if (target == DNN_TARGET_MYRIAD && + getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X); + } +#endif + + testDarknetModel(config_file, weights_file, ref, scoreDiff, iouDiff); + } +} + + INSTANTIATE_TEST_CASE_P(/**/, Test_Darknet_nets, dnnBackendsAndTargets()); TEST_P(Test_Darknet_layers, shortcut) From d9bada98678c2525f9ac2a9d700bea1d82d0e554 Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Tue, 26 May 2020 10:51:26 +0300 Subject: [PATCH 12/13] dnn: EfficientDet --- modules/dnn/perf/perf_net.cpp | 11 + modules/dnn/src/tensorflow/tf_importer.cpp | 28 ++- modules/dnn/test/test_tf_importer.cpp | 33 +++ samples/dnn/tf_text_graph_common.py | 2 +- samples/dnn/tf_text_graph_efficientdet.py | 236 +++++++++++++++++++++ 5 files changed, 302 insertions(+), 8 deletions(-) create mode 100644 samples/dnn/tf_text_graph_efficientdet.py diff --git a/modules/dnn/perf/perf_net.cpp b/modules/dnn/perf/perf_net.cpp index d4c962e741..6357e9a7c7 100644 --- a/modules/dnn/perf/perf_net.cpp +++ b/modules/dnn/perf/perf_net.cpp @@ -235,6 +235,17 @@ PERF_TEST_P_(DNNTestNetwork, Inception_v2_Faster_RCNN) Mat(cv::Size(800, 600), CV_32FC3)); } +PERF_TEST_P_(DNNTestNetwork, EfficientDet) +{ + if (backend == DNN_BACKEND_HALIDE || target != DNN_TARGET_CPU) + throw SkipTestException(""); + Mat sample = imread(findDataFile("dnn/dog416.png")); + resize(sample, sample, Size(512, 512)); + Mat inp; + sample.convertTo(inp, CV_32FC3, 1.0/255); + processNet("dnn/efficientdet-d0.pb", "dnn/efficientdet-d0.pbtxt", "", inp); +} + INSTANTIATE_TEST_CASE_P(/*nothing*/, DNNTestNetwork, dnnBackendsAndTargets()); } // namespace diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index e684b94e46..c005c99b58 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -1542,22 +1542,32 @@ void TFImporter::populateNet(Net dstNet) connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); } - else if (type == "Mul") + else if (type == "Mul" || type == "RealDiv") { - bool haveConst = false; - for(int ii = 0; !haveConst && ii < layer.input_size(); ++ii) + int constId = -1; + for(int ii = 0; ii < layer.input_size(); ++ii) { Pin input = parsePin(layer.input(ii)); - haveConst = value_id.find(input.name) != value_id.end(); + if (value_id.find(input.name) != value_id.end()) + { + constId = ii; + break; + } } - CV_Assert(!haveConst || layer.input_size() == 2); + CV_Assert((constId != -1) || (layer.input_size() == 2)); - if (haveConst) + if (constId != -1) { // Multiplication by constant. CV_Assert(layer.input_size() == 2); Mat scaleMat = getTensorContent(getConstBlob(layer, value_id)); CV_Assert(scaleMat.type() == CV_32FC1); + if (type == "RealDiv") + { + if (constId == 0) + CV_Error(Error::StsNotImplemented, "Division of constant over variable"); + scaleMat = 1.0f / scaleMat; + } int id; if (scaleMat.total() == 1) // is a scalar. @@ -1659,11 +1669,15 @@ void TFImporter::populateNet(Net dstNet) int id; if (equalInpShapes || netInputShapes.empty()) { - layerParams.set("operation", "prod"); + layerParams.set("operation", type == "RealDiv" ? "div" : "prod"); id = dstNet.addLayer(name, "Eltwise", layerParams); } else + { + if (type == "RealDiv") + CV_Error(Error::StsNotImplemented, "Division of non equal tensors"); id = dstNet.addLayer(name, "Scale", layerParams); + } layer_id[name] = id; diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index b20b2a58ff..d10e847e00 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -1123,4 +1123,37 @@ TEST_P(Test_TensorFlow_nets, Mask_RCNN) expectNoFallbacks(net); } +TEST_P(Test_TensorFlow_nets, EfficientDet) +{ + if (target != DNN_TARGET_CPU) + { + if (target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); + if (target == DNN_TARGET_OPENCL) applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL); + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD); + } + checkBackend(); + std::string proto = findDataFile("dnn/efficientdet-d0.pbtxt"); + std::string model = findDataFile("dnn/efficientdet-d0.pb"); + + Net net = readNetFromTensorflow(model, proto); + Mat img = imread(findDataFile("dnn/dog416.png")); + Mat blob = blobFromImage(img, 1.0/255, Size(512, 512), Scalar(123.675, 116.28, 103.53)); + + net.setPreferableBackend(backend); + net.setPreferableTarget(target); + net.setInput(blob); + // Output has shape 1x1xNx7 where N - number of detections. + // An every detection is a vector of values [id, classId, confidence, left, top, right, bottom] + Mat out = net.forward(); + + // References are from test for TensorFlow model. + Mat ref = (Mat_(3, 7) << 0, 1, 0.8437444, 0.153996080160141, 0.20534580945968628, 0.7463544607162476, 0.7414066195487976, + 0, 17, 0.8245924, 0.16657517850399017, 0.3996818959712982, 0.4111558794975281, 0.9306337833404541, + 0, 7, 0.8039304, 0.6118435263633728, 0.13175517320632935, 0.9065558314323425, 0.2943994700908661); + double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 4e-3 : 1e-5; + double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 2e-3 : 1e-4; + normAssertDetections(ref, out, "", 0.5, scoreDiff, iouDiff); + expectNoFallbacksFromIE(net); +} + } diff --git a/samples/dnn/tf_text_graph_common.py b/samples/dnn/tf_text_graph_common.py index 5aa1d30e39..ea24898873 100644 --- a/samples/dnn/tf_text_graph_common.py +++ b/samples/dnn/tf_text_graph_common.py @@ -269,7 +269,7 @@ def parseTextGraph(filePath): def removeIdentity(graph_def): identities = {} for node in graph_def.node: - if node.op == 'Identity': + if node.op == 'Identity' or node.op == 'IdentityN': identities[node.name] = node.input[0] graph_def.node.remove(node) diff --git a/samples/dnn/tf_text_graph_efficientdet.py b/samples/dnn/tf_text_graph_efficientdet.py new file mode 100644 index 0000000000..855691b2be --- /dev/null +++ b/samples/dnn/tf_text_graph_efficientdet.py @@ -0,0 +1,236 @@ +# This file is a part of OpenCV project. +# It is a subject to the license terms in the LICENSE file found in the top-level directory +# of this distribution and at http://opencv.org/license.html. +# +# Copyright (C) 2020, Intel Corporation, all rights reserved. +# Third party copyrights are property of their respective owners. +# +# Use this script to get the text graph representation (.pbtxt) of EfficientDet +# deep learning network trained in https://github.com/google/automl. +# Then you can import it with a binary frozen graph (.pb) using readNetFromTensorflow() function. +# See details and examples on the following wiki page: https://github.com/opencv/opencv/wiki/TensorFlow-Object-Detection-API +import argparse +import re +from math import sqrt +from tf_text_graph_common import * + + +class AnchorGenerator: + def __init__(self, min_level, aspect_ratios, num_scales, anchor_scale): + self.min_level = min_level + self.aspect_ratios = aspect_ratios + self.anchor_scale = anchor_scale + self.scales = [2**(float(s) / num_scales) for s in range(num_scales)] + + def get(self, layer_id): + widths = [] + heights = [] + for s in self.scales: + for a in self.aspect_ratios: + base_anchor_size = 2**(self.min_level + layer_id) * self.anchor_scale + heights.append(base_anchor_size * s * a[1]) + widths.append(base_anchor_size * s * a[0]) + return widths, heights + + +def createGraph(modelPath, outputPath, min_level, aspect_ratios, num_scales, + anchor_scale, num_classes, image_width, image_height): + print('Min level: %d' % min_level) + print('Anchor scale: %f' % anchor_scale) + print('Num scales: %d' % num_scales) + print('Aspect ratios: %s' % str(aspect_ratios)) + print('Number of classes: %d' % num_classes) + print('Input image size: %dx%d' % (image_width, image_height)) + + # Read the graph. + _inpNames = ['image_arrays'] + outNames = ['detections'] + + writeTextGraph(modelPath, outputPath, outNames) + graph_def = parseTextGraph(outputPath) + + def getUnconnectedNodes(): + unconnected = [] + for node in graph_def.node: + if node.op == 'Const': + continue + unconnected.append(node.name) + for inp in node.input: + if inp in unconnected: + unconnected.remove(inp) + return unconnected + + + nodesToKeep = ['truediv'] # Keep preprocessing nodes + + removeIdentity(graph_def) + + scopesToKeep = ('image_arrays', 'efficientnet', 'resample_p6', 'resample_p7', + 'fpn_cells', 'class_net', 'box_net', 'Reshape', 'concat') + + addConstNode('scale_w', [2.0], graph_def) + addConstNode('scale_h', [2.0], graph_def) + nodesToKeep += ['scale_w', 'scale_h'] + + for node in graph_def.node: + if re.match('efficientnet-(.*)/blocks_\d+/se/mul_1', node.name): + node.input[0], node.input[1] = node.input[1], node.input[0] + + if re.match('fpn_cells/cell_\d+/fnode\d+/resample(.*)/nearest_upsampling/Reshape_1$', node.name): + node.op = 'ResizeNearestNeighbor' + node.input[1] = 'scale_w' + node.input.append('scale_h') + + for inpNode in graph_def.node: + if inpNode.name == node.name[:node.name.rfind('_')]: + node.input[0] = inpNode.input[0] + + if re.match('box_net/box-predict(_\d)*/separable_conv2d$', node.name): + node.addAttr('loc_pred_transposed', True) + + # Replace RealDiv to Mul with inversed scale for compatibility + if node.op == 'RealDiv': + for inpNode in graph_def.node: + if inpNode.name != node.input[1] or not 'value' in inpNode.attr: + continue + + tensor = inpNode.attr['value']['tensor'][0] + if not 'float_val' in tensor: + continue + scale = float(inpNode.attr['value']['tensor'][0]['float_val'][0]) + + addConstNode(inpNode.name + '/inv', [1.0 / scale], graph_def) + nodesToKeep.append(inpNode.name + '/inv') + node.input[1] = inpNode.name + '/inv' + node.op = 'Mul' + break + + + def to_remove(name, op): + if name in nodesToKeep: + return False + return op == 'Const' or not name.startswith(scopesToKeep) + + removeUnusedNodesAndAttrs(to_remove, graph_def) + + # Attach unconnected preprocessing + assert(graph_def.node[1].name == 'truediv' and graph_def.node[1].op == 'RealDiv') + graph_def.node[1].input.insert(0, 'image_arrays') + graph_def.node[2].input.insert(0, 'truediv') + + priors_generator = AnchorGenerator(min_level, aspect_ratios, num_scales, anchor_scale) + priorBoxes = [] + for i in range(5): + inpName = '' + for node in graph_def.node: + if node.name == 'Reshape_%d' % (i * 2 + 1): + inpName = node.input[0] + break + + priorBox = NodeDef() + priorBox.name = 'PriorBox_%d' % i + priorBox.op = 'PriorBox' + priorBox.input.append(inpName) + priorBox.input.append(graph_def.node[0].name) # image_tensor + + priorBox.addAttr('flip', False) + priorBox.addAttr('clip', False) + + widths, heights = priors_generator.get(i) + + priorBox.addAttr('width', widths) + priorBox.addAttr('height', heights) + priorBox.addAttr('variance', [1.0, 1.0, 1.0, 1.0]) + + graph_def.node.extend([priorBox]) + priorBoxes.append(priorBox.name) + + addConstNode('concat/axis_flatten', [-1], graph_def) + + def addConcatNode(name, inputs, axisNodeName): + concat = NodeDef() + concat.name = name + concat.op = 'ConcatV2' + for inp in inputs: + concat.input.append(inp) + concat.input.append(axisNodeName) + graph_def.node.extend([concat]) + + addConcatNode('PriorBox/concat', priorBoxes, 'concat/axis_flatten') + + sigmoid = NodeDef() + sigmoid.name = 'concat/sigmoid' + sigmoid.op = 'Sigmoid' + sigmoid.input.append('concat') + graph_def.node.extend([sigmoid]) + + addFlatten(sigmoid.name, sigmoid.name + '/Flatten', graph_def) + addFlatten('concat_1', 'concat_1/Flatten', graph_def) + + detectionOut = NodeDef() + detectionOut.name = 'detection_out' + detectionOut.op = 'DetectionOutput' + + detectionOut.input.append('concat_1/Flatten') + detectionOut.input.append(sigmoid.name + '/Flatten') + detectionOut.input.append('PriorBox/concat') + + detectionOut.addAttr('num_classes', num_classes) + detectionOut.addAttr('share_location', True) + detectionOut.addAttr('background_label_id', num_classes + 1) + detectionOut.addAttr('nms_threshold', 0.6) + detectionOut.addAttr('confidence_threshold', 0.2) + detectionOut.addAttr('top_k', 100) + detectionOut.addAttr('keep_top_k', 100) + detectionOut.addAttr('code_type', "CENTER_SIZE") + graph_def.node.extend([detectionOut]) + + graph_def.node[0].attr['shape'] = { + 'shape': { + 'dim': [ + {'size': -1}, + {'size': image_height}, + {'size': image_width}, + {'size': 3} + ] + } + } + + while True: + unconnectedNodes = getUnconnectedNodes() + unconnectedNodes.remove(detectionOut.name) + if not unconnectedNodes: + break + + for name in unconnectedNodes: + for i in range(len(graph_def.node)): + if graph_def.node[i].name == name: + del graph_def.node[i] + break + + # Save as text + graph_def.save(outputPath) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Run this script to get a text graph of ' + 'SSD model from TensorFlow Object Detection API. ' + 'Then pass it with .pb file to cv::dnn::readNetFromTensorflow function.') + parser.add_argument('--input', required=True, help='Path to frozen TensorFlow graph.') + parser.add_argument('--output', required=True, help='Path to output text graph.') + parser.add_argument('--min_level', default=3, type=int, help='Parameter from training config') + parser.add_argument('--num_scales', default=3, type=int, help='Parameter from training config') + parser.add_argument('--anchor_scale', default=4.0, type=float, help='Parameter from training config') + parser.add_argument('--aspect_ratios', default=[1.0, 1.0, 1.4, 0.7, 0.7, 1.4], + nargs='+', type=float, help='Parameter from training config') + parser.add_argument('--num_classes', default=90, type=int, help='Number of classes to detect') + parser.add_argument('--width', default=512, type=int, help='Network input width') + parser.add_argument('--height', default=512, type=int, help='Network input height') + args = parser.parse_args() + + ar = args.aspect_ratios + assert(len(ar) % 2 == 0) + ar = list(zip(ar[::2], ar[1::2])) + + createGraph(args.input, args.output, args.min_level, ar, args.num_scales, + args.anchor_scale, args.num_classes, args.width, args.height) From 80037dc6de944d869e26c8caa12d249cf2e93bc7 Mon Sep 17 00:00:00 2001 From: Vadim Pisarevsky Date: Thu, 28 May 2020 21:36:28 +0300 Subject: [PATCH 13/13] fixed fillPoly, the overloaded variant with InputArrayOfArrays parameter (single or multiple polygons) --- modules/imgproc/src/drawing.cpp | 7 +++-- modules/imgproc/test/test_drawing.cpp | 38 +++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/modules/imgproc/src/drawing.cpp b/modules/imgproc/src/drawing.cpp index e9b212d388..aa889d1dc8 100644 --- a/modules/imgproc/src/drawing.cpp +++ b/modules/imgproc/src/drawing.cpp @@ -931,6 +931,7 @@ void ellipse2Poly( Point2d center, Size2d axes, int angle, int delta, std::vector& pts ) { CV_INSTRUMENT_REGION(); + CV_Assert(0 < delta && delta <= 180); float alpha, beta; int i; @@ -2360,7 +2361,9 @@ void cv::fillPoly(InputOutputArray _img, InputArrayOfArrays pts, CV_INSTRUMENT_REGION(); Mat img = _img.getMat(); - int i, ncontours = (int)pts.total(); + bool manyContours = pts.kind() == _InputArray::STD_VECTOR_VECTOR || + pts.kind() == _InputArray::STD_VECTOR_MAT; + int i, ncontours = manyContours ? (int)pts.total() : 1; if( ncontours == 0 ) return; AutoBuffer _ptsptr(ncontours); @@ -2370,7 +2373,7 @@ void cv::fillPoly(InputOutputArray _img, InputArrayOfArrays pts, for( i = 0; i < ncontours; i++ ) { - Mat p = pts.getMat(i); + Mat p = pts.getMat(manyContours ? i : -1); CV_Assert(p.checkVector(2, CV_32S) >= 0); ptsptr[i] = p.ptr(); npts[i] = p.rows*p.cols*p.channels()/2; diff --git a/modules/imgproc/test/test_drawing.cpp b/modules/imgproc/test/test_drawing.cpp index 2796d35ba8..fab2631041 100644 --- a/modules/imgproc/test/test_drawing.cpp +++ b/modules/imgproc/test/test_drawing.cpp @@ -593,4 +593,42 @@ TEST(Drawing, regression_16308) EXPECT_NE(0, (int)img.at(99, 50)); } +TEST(Drawing, fillpoly_circle) +{ + Mat img_c(640, 480, CV_8UC3, Scalar::all(0)); + Mat img_fp = img_c.clone(), img_fcp = img_c.clone(), img_fp3 = img_c.clone(); + + Point center1(img_c.cols/2, img_c.rows/2); + Point center2(img_c.cols/10, img_c.rows*3/4); + Point center3 = Point(img_c.cols, img_c.rows) - center2; + int radius = img_c.rows/4; + int radius_small = img_c.cols/15; + Scalar color(0, 0, 255); + + circle(img_c, center1, radius, color, -1); + + // check that circle, fillConvexPoly and fillPoly + // give almost the same result then asked to draw a single circle + vector vtx; + ellipse2Poly(center1, Size(radius, radius), 0, 0, 360, 1, vtx); + fillConvexPoly(img_fcp, vtx, color); + fillPoly(img_fp, vtx, color); + double diff_fp = cv::norm(img_c, img_fp, NORM_L1)/(255*radius*2*CV_PI); + double diff_fcp = cv::norm(img_c, img_fcp, NORM_L1)/(255*radius*2*CV_PI); + EXPECT_LT(diff_fp, 1.); + EXPECT_LT(diff_fcp, 1.); + + // check that fillPoly can draw 3 disjoint circles at once + circle(img_c, center2, radius_small, color, -1); + circle(img_c, center3, radius_small, color, -1); + + vector > vtx3(3); + vtx3[0] = vtx; + ellipse2Poly(center2, Size(radius_small, radius_small), 0, 0, 360, 1, vtx3[1]); + ellipse2Poly(center3, Size(radius_small, radius_small), 0, 0, 360, 1, vtx3[2]); + fillPoly(img_fp3, vtx3, color); + double diff_fp3 = cv::norm(img_c, img_fp3, NORM_L1)/(255*(radius+radius_small*2)*2*CV_PI); + EXPECT_LT(diff_fp3, 1.); +} + }} // namespace