Merge remote-tracking branch 'upstream/3.4' into merge-3.4

pull/17418/head
Alexander Alekhin 5 years ago
commit c3e8a82c9c
  1. 2
      doc/mymath.sty
  2. 106
      modules/core/include/opencv2/core/eigen.hpp
  3. 2
      modules/core/src/system.cpp
  4. 80
      modules/core/test/test_mat.cpp
  5. 29
      modules/dnn/perf/perf_net.cpp
  6. 34
      modules/dnn/src/dnn.cpp
  7. 34
      modules/dnn/src/dnn_common.hpp
  8. 34
      modules/dnn/src/ie_ngraph.cpp
  9. 6
      modules/dnn/src/ie_ngraph.hpp
  10. 2
      modules/dnn/src/layers/normalize_bbox_layer.cpp
  11. 9
      modules/dnn/src/precomp.hpp
  12. 16
      modules/dnn/src/tensorflow/tf_graph_simplifier.cpp
  13. 28
      modules/dnn/src/tensorflow/tf_importer.cpp
  14. 109
      modules/dnn/test/test_darknet_importer.cpp
  15. 38
      modules/dnn/test/test_tf_importer.cpp
  16. 2
      modules/imgproc/include/opencv2/imgproc.hpp
  17. 7
      modules/imgproc/src/drawing.cpp
  18. 38
      modules/imgproc/test/test_drawing.cpp
  19. 11
      modules/videoio/src/cap_msmf.cpp
  20. 2
      samples/dnn/mask_rcnn.py
  21. 56
      samples/dnn/object_detection.cpp
  22. 23
      samples/dnn/object_detection.py
  23. 4
      samples/dnn/segmentation.py
  24. 2
      samples/dnn/tf_text_graph_common.py
  25. 236
      samples/dnn/tf_text_graph_efficientdet.py

@ -28,7 +28,7 @@
#3 & \mbox{#4}\\
#5 & \mbox{#6}\\
\end{array} \right.}
\newcommand{\forkthree}[8]{
\newcommand{\forkfour}[8]{
\left\{
\begin{array}{l l}
#1 & \mbox{#2}\\

@ -47,6 +47,11 @@
#include "opencv2/core.hpp"
#if EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3
#include <unsupported/Eigen/CXX11/Tensor>
#define OPENCV_EIGEN_TENSOR_SUPPORT
#endif // EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3
#if defined _MSC_VER && _MSC_VER >= 1200
#pragma warning( disable: 4714 ) //__forceinline is not inlined
#pragma warning( disable: 4127 ) //conditional expression is constant
@ -59,6 +64,107 @@ namespace cv
//! @addtogroup core_eigen
//! @{
#ifdef OPENCV_EIGEN_TENSOR_SUPPORT
/** @brief Converts an Eigen::Tensor to a cv::Mat.
The method converts an Eigen::Tensor with shape (H x W x C) to a cv::Mat where:
H = number of rows
W = number of columns
C = number of channels
Usage:
\code
Eigen::Tensor<float, 3, Eigen::RowMajor> a_tensor(...);
// populate tensor with values
Mat a_mat;
eigen2cv(a_tensor, a_mat);
\endcode
*/
template <typename _Tp, int _layout> static inline
void eigen2cv( const Eigen::Tensor<_Tp, 3, _layout> &src, OutputArray dst )
{
if( !(_layout & Eigen::RowMajorBit) )
{
const std::array<int, 3> shuffle{2, 1, 0};
Eigen::Tensor<_Tp, 3, !_layout> row_major_tensor = src.swap_layout().shuffle(shuffle);
Mat _src(src.dimension(0), src.dimension(1), CV_MAKETYPE(DataType<_Tp>::type, src.dimension(2)), row_major_tensor.data());
_src.copyTo(dst);
}
else
{
Mat _src(src.dimension(0), src.dimension(1), CV_MAKETYPE(DataType<_Tp>::type, src.dimension(2)), (void *)src.data());
_src.copyTo(dst);
}
}
/** @brief Converts a cv::Mat to an Eigen::Tensor.
The method converts a cv::Mat to an Eigen Tensor with shape (H x W x C) where:
H = number of rows
W = number of columns
C = number of channels
Usage:
\code
Mat a_mat(...);
// populate Mat with values
Eigen::Tensor<float, 3, Eigen::RowMajor> a_tensor(...);
cv2eigen(a_mat, a_tensor);
\endcode
*/
template <typename _Tp, int _layout> static inline
void cv2eigen( const Mat &src, Eigen::Tensor<_Tp, 3, _layout> &dst )
{
if( !(_layout & Eigen::RowMajorBit) )
{
Eigen::Tensor<_Tp, 3, !_layout> row_major_tensor(src.rows, src.cols, src.channels());
Mat _dst(src.rows, src.cols, CV_MAKETYPE(DataType<_Tp>::type, src.channels()), row_major_tensor.data());
if (src.type() == _dst.type())
src.copyTo(_dst);
else
src.convertTo(_dst, _dst.type());
const std::array<int, 3> shuffle{2, 1, 0};
dst = row_major_tensor.swap_layout().shuffle(shuffle);
}
else
{
dst.resize(src.rows, src.cols, src.channels());
Mat _dst(src.rows, src.cols, CV_MAKETYPE(DataType<_Tp>::type, src.channels()), dst.data());
if (src.type() == _dst.type())
src.copyTo(_dst);
else
src.convertTo(_dst, _dst.type());
}
}
/** @brief Maps cv::Mat data to an Eigen::TensorMap.
The method wraps an existing Mat data array with an Eigen TensorMap of shape (H x W x C) where:
H = number of rows
W = number of columns
C = number of channels
Explicit instantiation of the return type is required.
@note Caller should be aware of the lifetime of the cv::Mat instance and take appropriate safety measures.
The cv::Mat instance will retain ownership of the data and the Eigen::TensorMap will lose access when the cv::Mat data is deallocated.
The example below initializes a cv::Mat and produces an Eigen::TensorMap:
\code
float arr[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
Mat a_mat(2, 2, CV_32FC3, arr);
Eigen::TensorMap<Eigen::Tensor<float, 3, Eigen::RowMajor>> a_tensormap = cv2eigen_tensormap<float>(a_mat);
\endcode
*/
template <typename _Tp> static inline
Eigen::TensorMap<Eigen::Tensor<_Tp, 3, Eigen::RowMajor>> cv2eigen_tensormap(const cv::InputArray &src)
{
Mat mat = src.getMat();
CV_CheckTypeEQ(mat.type(), CV_MAKETYPE(traits::Type<_Tp>::value, mat.channels()), "");
return Eigen::TensorMap<Eigen::Tensor<_Tp, 3, Eigen::RowMajor>>((_Tp *)mat.data, mat.rows, mat.cols, mat.channels());
}
#endif // OPENCV_EIGEN_TENSOR_SUPPORT
template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
void eigen2cv( const Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& src, OutputArray dst )
{

@ -1888,7 +1888,7 @@ inline size_t parseOption(const std::string &value)
}
cv::String valueStr = value.substr(0, pos);
cv::String suffixStr = value.substr(pos, value.length() - pos);
int v = atoi(valueStr.c_str());
size_t v = (size_t)std::stoull(valueStr);
if (suffixStr.length() == 0)
return v;
else if (suffixStr == "MB" || suffixStr == "Mb" || suffixStr == "mb")

@ -2074,6 +2074,86 @@ TEST(Core_Eigen, eigen2cv_check_Mat_type)
}
#endif // HAVE_EIGEN
#ifdef OPENCV_EIGEN_TENSOR_SUPPORT
TEST(Core_Eigen, cv2eigen_check_tensor_conversion)
{
Mat A(2, 3, CV_32FC3);
float value = 0;
for(int row=0; row<A.rows; row++)
for(int col=0; col<A.cols; col++)
for(int ch=0; ch<A.channels(); ch++)
A.at<Vec3f>(row,col)[ch] = value++;
Eigen::Tensor<float, 3, Eigen::RowMajor> row_tensor;
cv2eigen(A, row_tensor);
float* mat_ptr = (float*)A.data;
float* tensor_ptr = row_tensor.data();
for (int i=0; i< row_tensor.size(); i++)
ASSERT_FLOAT_EQ(mat_ptr[i], tensor_ptr[i]);
Eigen::Tensor<float, 3, Eigen::ColMajor> col_tensor;
cv2eigen(A, col_tensor);
value = 0;
for(int row=0; row<A.rows; row++)
for(int col=0; col<A.cols; col++)
for(int ch=0; ch<A.channels(); ch++)
ASSERT_FLOAT_EQ(value++, col_tensor(row,col,ch));
}
#endif // OPENCV_EIGEN_TENSOR_SUPPORT
#ifdef OPENCV_EIGEN_TENSOR_SUPPORT
TEST(Core_Eigen, eigen2cv_check_tensor_conversion)
{
Eigen::Tensor<float, 3, Eigen::RowMajor> row_tensor(2,3,3);
Eigen::Tensor<float, 3, Eigen::ColMajor> col_tensor(2,3,3);
float value = 0;
for(int row=0; row<row_tensor.dimension(0); row++)
for(int col=0; col<row_tensor.dimension(1); col++)
for(int ch=0; ch<row_tensor.dimension(2); ch++)
{
row_tensor(row,col,ch) = value;
col_tensor(row,col,ch) = value;
value++;
}
Mat A;
eigen2cv(row_tensor, A);
float* tensor_ptr = row_tensor.data();
float* mat_ptr = (float*)A.data;
for (int i=0; i< row_tensor.size(); i++)
ASSERT_FLOAT_EQ(tensor_ptr[i], mat_ptr[i]);
Mat B;
eigen2cv(col_tensor, B);
value = 0;
for(int row=0; row<B.rows; row++)
for(int col=0; col<B.cols; col++)
for(int ch=0; ch<B.channels(); ch++)
ASSERT_FLOAT_EQ(value++, B.at<Vec3f>(row,col)[ch]);
}
#endif // OPENCV_EIGEN_TENSOR_SUPPORT
#ifdef OPENCV_EIGEN_TENSOR_SUPPORT
TEST(Core_Eigen, cv2eigen_tensormap_check_tensormap_access)
{
float arr[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
Mat a_mat(2, 2, CV_32FC3, arr);
Eigen::TensorMap<Eigen::Tensor<float, 3, Eigen::RowMajor>> a_tensor = cv2eigen_tensormap<float>(a_mat);
for(int i=0; i<a_mat.rows; i++) {
for (int j=0; j<a_mat.cols; j++) {
for (int ch=0; ch<a_mat.channels(); ch++) {
ASSERT_FLOAT_EQ(a_mat.at<Vec3f>(i,j)[ch], a_tensor(i,j,ch));
ASSERT_EQ(&a_mat.at<Vec3f>(i,j)[ch], &a_tensor(i,j,ch));
}
}
}
}
#endif // OPENCV_EIGEN_TENSOR_SUPPORT
TEST(Mat, regression_12943) // memory usage: ~4.5 Gb
{
applyTestTag(CV_TEST_TAG_MEMORY_6GB);

@ -197,9 +197,23 @@ PERF_TEST_P_(DNNTestNetwork, YOLOv3)
if (backend == DNN_BACKEND_HALIDE)
throw SkipTestException("");
Mat sample = imread(findDataFile("dnn/dog416.png"));
cvtColor(sample, sample, COLOR_BGR2RGB);
Mat inp;
sample.convertTo(inp, CV_32FC3);
processNet("dnn/yolov3.weights", "dnn/yolov3.cfg", "", inp / 255);
sample.convertTo(inp, CV_32FC3, 1.0f / 255, 0);
processNet("dnn/yolov3.weights", "dnn/yolov3.cfg", "", inp);
}
PERF_TEST_P_(DNNTestNetwork, YOLOv4)
{
if (backend == DNN_BACKEND_HALIDE)
throw SkipTestException("");
if (target == DNN_TARGET_MYRIAD)
throw SkipTestException("");
Mat sample = imread(findDataFile("dnn/dog416.png"));
cvtColor(sample, sample, COLOR_BGR2RGB);
Mat inp;
sample.convertTo(inp, CV_32FC3, 1.0f / 255, 0);
processNet("dnn/yolov4.weights", "dnn/yolov4.cfg", "", inp);
}
PERF_TEST_P_(DNNTestNetwork, EAST_text_detection)
@ -235,6 +249,17 @@ PERF_TEST_P_(DNNTestNetwork, Inception_v2_Faster_RCNN)
Mat(cv::Size(800, 600), CV_32FC3));
}
PERF_TEST_P_(DNNTestNetwork, EfficientDet)
{
if (backend == DNN_BACKEND_HALIDE || target != DNN_TARGET_CPU)
throw SkipTestException("");
Mat sample = imread(findDataFile("dnn/dog416.png"));
resize(sample, sample, Size(512, 512));
Mat inp;
sample.convertTo(inp, CV_32FC3, 1.0/255);
processNet("dnn/efficientdet-d0.pb", "dnn/efficientdet-d0.pbtxt", "", inp);
}
INSTANTIATE_TEST_CASE_P(/*nothing*/, DNNTestNetwork, dnnBackendsAndTargets());
} // namespace

@ -1141,17 +1141,26 @@ static Ptr<BackendWrapper> wrapMat(int backendId, int targetId, cv::Mat& m)
static int g_networkId = 0;
struct Net::Impl
detail::NetImplBase::NetImplBase()
: networkId(CV_XADD(&g_networkId, 1))
, networkDumpCounter(0)
, dumpLevel(DNN_NETWORK_DUMP)
{
// nothing
}
std::string detail::NetImplBase::getDumpFileNameBase()
{
std::string dumpFileNameBase = cv::format("ocv_dnn_net_%05d_%02d", networkId, networkDumpCounter++);
return dumpFileNameBase;
}
struct Net::Impl : public detail::NetImplBase
{
typedef std::map<int, LayerShapes> LayersShapesMap;
typedef std::map<int, LayerData> MapIdToLayerData;
const int networkId; // network global identifier
int networkDumpCounter; // dump counter
Impl()
: networkId(CV_XADD(&g_networkId, 1))
, networkDumpCounter(0)
{
//allocate fake net input layer
netInputLayer = Ptr<DataLayer>(new DataLayer());
@ -1366,7 +1375,7 @@ struct Net::Impl
{
CV_TRACE_FUNCTION();
if (DNN_NETWORK_DUMP > 0 && networkDumpCounter == 0)
if (dumpLevel && networkDumpCounter == 0)
{
dumpNetworkToFile();
}
@ -1470,7 +1479,7 @@ struct Net::Impl
netWasAllocated = true;
if (DNN_NETWORK_DUMP > 0)
if (dumpLevel)
{
dumpNetworkToFile();
}
@ -2178,7 +2187,7 @@ struct Net::Impl
}
if (net.empty()) {
net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet());
net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(*this));
}
if (!fused) {
@ -2222,7 +2231,7 @@ struct Net::Impl
}
}
else {
net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet());
net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(*this));
}
if (!fused)
@ -3406,7 +3415,8 @@ struct Net::Impl
void dumpNetworkToFile()
{
#ifndef OPENCV_DNN_DISABLE_NETWORK_AUTO_DUMP
String dumpFileName = cv::format("ocv_dnn_net_%05d_%02d.dot", networkId, networkDumpCounter++);
string dumpFileNameBase = getDumpFileNameBase();
string dumpFileName = dumpFileNameBase + ".dot";
try
{
string dumpStr = dump();
@ -3465,7 +3475,7 @@ Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNe
{
auto fake_node = std::make_shared<ngraph::op::Parameter>(ngraph::element::f32, ngraph::Shape{});
Ptr<InfEngineNgraphNode> backendNodeNGraph(new InfEngineNgraphNode(fake_node));
backendNodeNGraph->net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(ieNet));
backendNodeNGraph->net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(*(cvNet.impl), ieNet));
backendNode = backendNodeNGraph;
}
else

@ -0,0 +1,34 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#ifndef __OPENCV_DNN_COMMON_HPP__
#define __OPENCV_DNN_COMMON_HPP__
#include <opencv2/dnn.hpp>
namespace cv { namespace dnn {
CV__DNN_INLINE_NS_BEGIN
#define IS_DNN_OPENCL_TARGET(id) (id == DNN_TARGET_OPENCL || id == DNN_TARGET_OPENCL_FP16)
Mutex& getInitializationMutex();
void initializeLayerFactory();
namespace detail {
struct NetImplBase
{
const int networkId; // network global identifier
int networkDumpCounter; // dump counter
int dumpLevel; // level of information dumps (initialized through OPENCV_DNN_NETWORK_DUMP parameter)
NetImplBase();
std::string getDumpFileNameBase();
};
} // namespace detail
CV__DNN_INLINE_NS_END
}} // namespace
#endif // __OPENCV_DNN_COMMON_HPP__

@ -6,6 +6,9 @@
// Third party copyrights are property of their respective owners.
#include "precomp.hpp"
#include <fstream>
#include "ie_ngraph.hpp"
#include <opencv2/dnn/shape_utils.hpp>
@ -22,6 +25,8 @@ namespace cv { namespace dnn {
#ifdef HAVE_DNN_NGRAPH
static bool DNN_IE_SERIALIZE = utils::getConfigurationParameterBool("OPENCV_DNN_IE_SERIALIZE", false);
// For networks with input layer which has an empty name, IE generates a name id[some_number].
// OpenCV lets users use an empty input name and to prevent unexpected naming,
// we can use some predefined name.
@ -295,13 +300,16 @@ void InfEngineNgraphNode::setName(const std::string& name) {
node->set_friendly_name(name);
}
InfEngineNgraphNet::InfEngineNgraphNet()
InfEngineNgraphNet::InfEngineNgraphNet(detail::NetImplBase& netImpl)
: netImpl_(netImpl)
{
hasNetOwner = false;
device_name = "CPU";
}
InfEngineNgraphNet::InfEngineNgraphNet(InferenceEngine::CNNNetwork& net) : cnn(net)
InfEngineNgraphNet::InfEngineNgraphNet(detail::NetImplBase& netImpl, InferenceEngine::CNNNetwork& net)
: netImpl_(netImpl)
, cnn(net)
{
hasNetOwner = true;
device_name = "CPU";
@ -440,9 +448,27 @@ void InfEngineNgraphNet::init(Target targetId)
ngraph_function->validate_nodes_and_infer_types();
}
cnn = InferenceEngine::CNNNetwork(ngraph_function);
#ifdef _DEBUG // TODO
//cnn.serialize("/tmp/cnn.xml", "/tmp/cnn.bin");
if (DNN_IE_SERIALIZE)
{
#ifndef OPENCV_DNN_DISABLE_NETWORK_AUTO_DUMP
std::string dumpFileNameBase = netImpl_.getDumpFileNameBase();
try
{
cnn.serialize(dumpFileNameBase + "_ngraph.xml", dumpFileNameBase + "_ngraph.bin");
}
catch (const std::exception& e)
{
std::ofstream out((dumpFileNameBase + "_ngraph.error").c_str(), std::ios::out);
out << "Exception: " << e.what() << std::endl;
}
catch (...)
{
std::ofstream out((dumpFileNameBase + "_ngraph.error").c_str(), std::ios::out);
out << "Can't dump: unknown exception" << std::endl;
}
#endif
}
}
switch (targetId)

@ -34,8 +34,8 @@ class InfEngineNgraphNode;
class InfEngineNgraphNet
{
public:
InfEngineNgraphNet();
InfEngineNgraphNet(InferenceEngine::CNNNetwork& net);
InfEngineNgraphNet(detail::NetImplBase& netImpl);
InfEngineNgraphNet(detail::NetImplBase& netImpl, InferenceEngine::CNNNetwork& net);
void addOutput(const std::string& name);
@ -55,6 +55,8 @@ public:
void reset();
private:
detail::NetImplBase& netImpl_;
void release();
int getNumComponents();
void dfs(std::shared_ptr<ngraph::Node>& node, std::vector<std::shared_ptr<ngraph::Node>>& comp,

@ -354,7 +354,7 @@ public:
weight = std::make_shared<ngraph::op::Constant>(
ngraph::element::f32, ngraph::Shape(shape), blobs[0].data);
}
auto mul = std::make_shared<ngraph::op::v1::Multiply>(norm, weight, ngraph::op::AutoBroadcastType::NUMPY);
auto mul = std::make_shared<ngraph::op::v0::Multiply>(norm, weight, ngraph::op::AutoBroadcastType::NUMPY);
return Ptr<BackendNode>(new InfEngineNgraphNode(mul));
}
#endif // HAVE_DNN_NGRAPH

@ -73,11 +73,4 @@
#include <opencv2/dnn.hpp>
#include <opencv2/dnn/all_layers.hpp>
namespace cv { namespace dnn {
CV__DNN_INLINE_NS_BEGIN
#define IS_DNN_OPENCL_TARGET(id) (id == DNN_TARGET_OPENCL || id == DNN_TARGET_OPENCL_FP16)
Mutex& getInitializationMutex();
void initializeLayerFactory();
CV__DNN_INLINE_NS_END
}} // namespace
#include "dnn_common.hpp"

@ -725,6 +725,21 @@ private:
bool negativeScales;
};
class ClipByValueSubgraph : public TFSubgraph
{
public:
ClipByValueSubgraph()
{
int input = addNodeToMatch("");
int maxValue = addNodeToMatch("Const");
int minimum = addNodeToMatch("Minimum", input, maxValue);
int minValue = addNodeToMatch("Const");
addNodeToMatch("Maximum", minimum, minValue);
setFusedNode("ClipByValue", input, minValue, maxValue);
}
};
void simplifySubgraphs(tensorflow::GraphDef& net)
{
std::vector<Ptr<Subgraph> > subgraphs;
@ -749,6 +764,7 @@ void simplifySubgraphs(tensorflow::GraphDef& net)
subgraphs.push_back(Ptr<Subgraph>(new PReLUSubgraph(false)));
subgraphs.push_back(Ptr<Subgraph>(new FlattenProdSubgraph()));
subgraphs.push_back(Ptr<Subgraph>(new ResizeBilinearSubgraphDown()));
subgraphs.push_back(Ptr<Subgraph>(new ClipByValueSubgraph()));
for (int i = 0; i < net.node_size(); ++i)
{

@ -1542,22 +1542,32 @@ void TFImporter::populateNet(Net dstNet)
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
}
else if (type == "Mul")
else if (type == "Mul" || type == "RealDiv")
{
bool haveConst = false;
for(int ii = 0; !haveConst && ii < layer.input_size(); ++ii)
int constId = -1;
for(int ii = 0; ii < layer.input_size(); ++ii)
{
Pin input = parsePin(layer.input(ii));
haveConst = value_id.find(input.name) != value_id.end();
if (value_id.find(input.name) != value_id.end())
{
constId = ii;
break;
}
}
CV_Assert(!haveConst || layer.input_size() == 2);
CV_Assert((constId != -1) || (layer.input_size() == 2));
if (haveConst)
if (constId != -1)
{
// Multiplication by constant.
CV_Assert(layer.input_size() == 2);
Mat scaleMat = getTensorContent(getConstBlob(layer, value_id));
CV_Assert(scaleMat.type() == CV_32FC1);
if (type == "RealDiv")
{
if (constId == 0)
CV_Error(Error::StsNotImplemented, "Division of constant over variable");
scaleMat = 1.0f / scaleMat;
}
int id;
if (scaleMat.total() == 1) // is a scalar.
@ -1659,11 +1669,15 @@ void TFImporter::populateNet(Net dstNet)
int id;
if (equalInpShapes || netInputShapes.empty())
{
layerParams.set("operation", "prod");
layerParams.set("operation", type == "RealDiv" ? "div" : "prod");
id = dstNet.addLayer(name, "Eltwise", layerParams);
}
else
{
if (type == "RealDiv")
CV_Error(Error::StsNotImplemented, "Division of non equal tensors");
id = dstNet.addLayer(name, "Scale", layerParams);
}
layer_id[name] = id;

@ -245,6 +245,13 @@ public:
nms_boxes.push_back(box);
nms_confidences.push_back(conf);
nms_classIds.push_back(class_id);
#if 0 // use to update test reference data
std::cout << b << ", " << class_id << ", " << conf << "f, "
<< box.x << "f, " << box.y << "f, "
<< box.x + box.width << "f, " << box.y + box.height << "f,"
<< std::endl;
#endif
}
normAssertDetections(refClassIds[b], refConfidences[b], refBoxes[b], nms_classIds,
@ -413,6 +420,9 @@ TEST_P(Test_Darknet_nets_async, Accuracy)
std::string prefix = get<0>(GetParam());
if (targetId == DNN_TARGET_MYRIAD && prefix == "yolov4") // NC_OUT_OF_MEMORY
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
if (backendId != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
throw SkipTestException("No support for async forward");
@ -457,7 +467,7 @@ TEST_P(Test_Darknet_nets_async, Accuracy)
}
INSTANTIATE_TEST_CASE_P(/**/, Test_Darknet_nets_async, Combine(
Values("yolo-voc", "tiny-yolo-voc", "yolov3"),
Values("yolo-voc", "tiny-yolo-voc", "yolov3", "yolov4"),
dnnBackendsAndTargets()
));
@ -471,15 +481,21 @@ TEST_P(Test_Darknet_nets, YOLOv3)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
// batchId, classId, confidence, left, top, right, bottom
Mat ref = (Mat_<float>(9, 7) << 0, 7, 0.952983f, 0.614622f, 0.150257f, 0.901369f, 0.289251f, // a truck
0, 1, 0.987908f, 0.150913f, 0.221933f, 0.742255f, 0.74626f, // a bicycle
0, 16, 0.998836f, 0.160024f, 0.389964f, 0.417885f, 0.943716f, // a dog (COCO)
1, 9, 0.384801f, 0.659824f, 0.372389f, 0.673926f, 0.429412f, // a traffic light
1, 9, 0.733283f, 0.376029f, 0.315694f, 0.401776f, 0.395165f, // a traffic light
1, 9, 0.785352f, 0.665503f, 0.373543f, 0.688893f, 0.439245f, // a traffic light
1, 0, 0.980052f, 0.195856f, 0.378454f, 0.258626f, 0.629258f, // a person
1, 2, 0.989633f, 0.450719f, 0.463353f, 0.496305f, 0.522258f, // a car
1, 2, 0.997412f, 0.647584f, 0.459939f, 0.821038f, 0.663947f); // a car
const int N0 = 3;
const int N1 = 6;
static const float ref_[/* (N0 + N1) * 7 */] = {
0, 16, 0.998836f, 0.160024f, 0.389964f, 0.417885f, 0.943716f,
0, 1, 0.987908f, 0.150913f, 0.221933f, 0.742255f, 0.746261f,
0, 7, 0.952983f, 0.614621f, 0.150257f, 0.901368f, 0.289251f,
1, 2, 0.997412f, 0.647584f, 0.459939f, 0.821037f, 0.663947f,
1, 2, 0.989633f, 0.450719f, 0.463353f, 0.496306f, 0.522258f,
1, 0, 0.980053f, 0.195856f, 0.378454f, 0.258626f, 0.629257f,
1, 9, 0.785341f, 0.665503f, 0.373543f, 0.688893f, 0.439244f,
1, 9, 0.733275f, 0.376029f, 0.315694f, 0.401776f, 0.395165f,
1, 9, 0.384815f, 0.659824f, 0.372389f, 0.673927f, 0.429412f,
};
Mat ref(N0 + N1, 7, CV_32FC1, (void*)ref_);
double scoreDiff = 8e-5, iouDiff = 3e-4;
if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD)
@ -506,8 +522,8 @@ TEST_P(Test_Darknet_nets, YOLOv3)
#endif
{
SCOPED_TRACE("batch size 1");
testDarknetModel(config_file, weights_file, ref.rowRange(0, 3), scoreDiff, iouDiff);
SCOPED_TRACE("batch size 1");
testDarknetModel(config_file, weights_file, ref.rowRange(0, N0), scoreDiff, iouDiff);
}
#if defined(INF_ENGINE_RELEASE)
@ -529,6 +545,75 @@ TEST_P(Test_Darknet_nets, YOLOv3)
}
}
TEST_P(Test_Darknet_nets, YOLOv4)
{
applyTestTag(CV_TEST_TAG_LONG, (target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_1GB : CV_TEST_TAG_MEMORY_2GB));
#if defined(INF_ENGINE_RELEASE)
if (target == DNN_TARGET_MYRIAD) // NC_OUT_OF_MEMORY
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
#endif
// batchId, classId, confidence, left, top, right, bottom
const int N0 = 3;
const int N1 = 7;
static const float ref_[/* (N0 + N1) * 7 */] = {
0, 16, 0.992194f, 0.172375f, 0.402458f, 0.403918f, 0.932801f,
0, 1, 0.988326f, 0.166708f, 0.228236f, 0.737208f, 0.735803f,
0, 7, 0.94639f, 0.602523f, 0.130399f, 0.901623f, 0.298452f,
1, 2, 0.99761f, 0.646556f, 0.45985f, 0.816041f, 0.659067f,
1, 0, 0.988913f, 0.201726f, 0.360282f, 0.266181f, 0.631728f,
1, 2, 0.98233f, 0.452007f, 0.462217f, 0.495612f, 0.521687f,
1, 9, 0.919195f, 0.374642f, 0.316524f, 0.398126f, 0.393714f,
1, 9, 0.856303f, 0.666842f, 0.372215f, 0.685539f, 0.44141f,
1, 9, 0.313516f, 0.656791f, 0.374734f, 0.671959f, 0.438371f,
1, 9, 0.256625f, 0.940232f, 0.326931f, 0.967586f, 0.374002f,
};
Mat ref(N0 + N1, 7, CV_32FC1, (void*)ref_);
double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.006 : 8e-5;
double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.042 : 3e-4;
std::string config_file = "yolov4.cfg";
std::string weights_file = "yolov4.weights";
#if defined(INF_ENGINE_RELEASE)
if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_MYRIAD &&
getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
{
scoreDiff = 0.04;
iouDiff = 0.2;
}
#endif
{
SCOPED_TRACE("batch size 1");
testDarknetModel(config_file, weights_file, ref.rowRange(0, N0), scoreDiff, iouDiff);
}
{
SCOPED_TRACE("batch size 2");
#if defined(INF_ENGINE_RELEASE)
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
{
if (target == DNN_TARGET_OPENCL)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
else if (target == DNN_TARGET_OPENCL_FP16 && INF_ENGINE_VER_MAJOR_LE(202010000))
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
else if (target == DNN_TARGET_MYRIAD &&
getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
}
#endif
testDarknetModel(config_file, weights_file, ref, scoreDiff, iouDiff);
}
}
INSTANTIATE_TEST_CASE_P(/**/, Test_Darknet_nets, dnnBackendsAndTargets());
TEST_P(Test_Darknet_layers, shortcut)

@ -1056,6 +1056,11 @@ TEST_P(Test_TensorFlow_layers, tf2_dense)
runTensorFlowNet("tf2_dense");
}
TEST_P(Test_TensorFlow_layers, clip_by_value)
{
runTensorFlowNet("clip_by_value");
}
TEST_P(Test_TensorFlow_layers, tf2_prelu)
{
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
@ -1205,4 +1210,37 @@ TEST_P(Test_TensorFlow_nets, Mask_RCNN)
expectNoFallbacks(net);
}
TEST_P(Test_TensorFlow_nets, EfficientDet)
{
if (target != DNN_TARGET_CPU)
{
if (target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
if (target == DNN_TARGET_OPENCL) applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
}
checkBackend();
std::string proto = findDataFile("dnn/efficientdet-d0.pbtxt");
std::string model = findDataFile("dnn/efficientdet-d0.pb");
Net net = readNetFromTensorflow(model, proto);
Mat img = imread(findDataFile("dnn/dog416.png"));
Mat blob = blobFromImage(img, 1.0/255, Size(512, 512), Scalar(123.675, 116.28, 103.53));
net.setPreferableBackend(backend);
net.setPreferableTarget(target);
net.setInput(blob);
// Output has shape 1x1xNx7 where N - number of detections.
// An every detection is a vector of values [id, classId, confidence, left, top, right, bottom]
Mat out = net.forward();
// References are from test for TensorFlow model.
Mat ref = (Mat_<float>(3, 7) << 0, 1, 0.8437444, 0.153996080160141, 0.20534580945968628, 0.7463544607162476, 0.7414066195487976,
0, 17, 0.8245924, 0.16657517850399017, 0.3996818959712982, 0.4111558794975281, 0.9306337833404541,
0, 7, 0.8039304, 0.6118435263633728, 0.13175517320632935, 0.9065558314323425, 0.2943994700908661);
double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 4e-3 : 1e-5;
double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 2e-3 : 1e-4;
normAssertDetections(ref, out, "", 0.5, scoreDiff, iouDiff);
expectNoFallbacksFromIE(net);
}
}

@ -1545,7 +1545,7 @@ The function smooths an image using the kernel:
\f[\texttt{K} = \frac{1}{\texttt{ksize.width*ksize.height}} \begin{bmatrix} 1 & 1 & 1 & \cdots & 1 & 1 \\ 1 & 1 & 1 & \cdots & 1 & 1 \\ \hdotsfor{6} \\ 1 & 1 & 1 & \cdots & 1 & 1 \\ \end{bmatrix}\f]
The call `blur(src, dst, ksize, anchor, borderType)` is equivalent to `boxFilter(src, dst, src.type(),
The call `blur(src, dst, ksize, anchor, borderType)` is equivalent to `boxFilter(src, dst, src.type(), ksize,
anchor, true, borderType)`.
@param src input image; it can have any number of channels, which are processed independently, but

@ -949,6 +949,7 @@ void ellipse2Poly( Point2d center, Size2d axes, int angle,
int delta, std::vector<Point2d>& pts )
{
CV_INSTRUMENT_REGION();
CV_Assert(0 < delta && delta <= 180);
float alpha, beta;
int i;
@ -2378,7 +2379,9 @@ void cv::fillPoly(InputOutputArray img, InputArrayOfArrays pts,
{
CV_INSTRUMENT_REGION();
int i, ncontours = (int)pts.total();
bool manyContours = pts.kind() == _InputArray::STD_VECTOR_VECTOR ||
pts.kind() == _InputArray::STD_VECTOR_MAT;
int i, ncontours = manyContours ? (int)pts.total() : 1;
if( ncontours == 0 )
return;
AutoBuffer<Point*> _ptsptr(ncontours);
@ -2388,7 +2391,7 @@ void cv::fillPoly(InputOutputArray img, InputArrayOfArrays pts,
for( i = 0; i < ncontours; i++ )
{
Mat p = pts.getMat(i);
Mat p = pts.getMat(manyContours ? i : -1);
CV_Assert(p.checkVector(2, CV_32S) >= 0);
ptsptr[i] = p.ptr<Point>();
npts[i] = p.rows*p.cols*p.channels()/2;

@ -641,4 +641,42 @@ TEST(Drawing, regression_16308)
EXPECT_NE(0, (int)img.at<uchar>(99, 50));
}
TEST(Drawing, fillpoly_circle)
{
Mat img_c(640, 480, CV_8UC3, Scalar::all(0));
Mat img_fp = img_c.clone(), img_fcp = img_c.clone(), img_fp3 = img_c.clone();
Point center1(img_c.cols/2, img_c.rows/2);
Point center2(img_c.cols/10, img_c.rows*3/4);
Point center3 = Point(img_c.cols, img_c.rows) - center2;
int radius = img_c.rows/4;
int radius_small = img_c.cols/15;
Scalar color(0, 0, 255);
circle(img_c, center1, radius, color, -1);
// check that circle, fillConvexPoly and fillPoly
// give almost the same result then asked to draw a single circle
vector<Point> vtx;
ellipse2Poly(center1, Size(radius, radius), 0, 0, 360, 1, vtx);
fillConvexPoly(img_fcp, vtx, color);
fillPoly(img_fp, vtx, color);
double diff_fp = cv::norm(img_c, img_fp, NORM_L1)/(255*radius*2*CV_PI);
double diff_fcp = cv::norm(img_c, img_fcp, NORM_L1)/(255*radius*2*CV_PI);
EXPECT_LT(diff_fp, 1.);
EXPECT_LT(diff_fcp, 1.);
// check that fillPoly can draw 3 disjoint circles at once
circle(img_c, center2, radius_small, color, -1);
circle(img_c, center3, radius_small, color, -1);
vector<vector<Point> > vtx3(3);
vtx3[0] = vtx;
ellipse2Poly(center2, Size(radius_small, radius_small), 0, 0, 360, 1, vtx3[1]);
ellipse2Poly(center3, Size(radius_small, radius_small), 0, 0, 360, 1, vtx3[2]);
fillPoly(img_fp3, vtx3, color);
double diff_fp3 = cv::norm(img_c, img_fp3, NORM_L1)/(255*(radius+radius_small*2)*2*CV_PI);
EXPECT_LT(diff_fp3, 1.);
}
}} // namespace

@ -492,12 +492,14 @@ public:
}
}
}
std::pair<MediaID, MediaType> findBest(const MediaType& newType)
std::pair<MediaID, MediaType> findBestVideoFormat(const MediaType& newType)
{
std::pair<MediaID, MediaType> best;
std::map<MediaID, MediaType>::const_iterator i = formats.begin();
for (; i != formats.end(); ++i)
{
if (i->second.majorType != MFMediaType_Video)
continue;
if (newType.isEmpty()) // file input - choose first returned media type
{
best = *i;
@ -775,7 +777,12 @@ bool CvCapture_MSMF::configureOutput(MediaType newType, cv::uint32_t outFormat)
{
FormatStorage formats;
formats.read(videoFileSource.Get());
std::pair<FormatStorage::MediaID, MediaType> bestMatch = formats.findBest(newType);
std::pair<FormatStorage::MediaID, MediaType> bestMatch = formats.findBestVideoFormat(newType);
if (bestMatch.second.isEmpty())
{
CV_LOG_DEBUG(NULL, "Can not find video stream with requested parameters");
return false;
}
dwStreamIndex = bestMatch.first.stream;
nativeFormat = bestMatch.second;
MediaType newFormat = nativeFormat;

@ -43,7 +43,7 @@ def showLegend(classes):
for i in range(len(classes)):
block = legend[i * blockHeight:(i + 1) * blockHeight]
block[:,:] = colors[i]
cv.putText(block, classes[i], (0, blockHeight/2), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255))
cv.putText(block, classes[i], (0, blockHeight//2), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255))
cv.namedWindow('Legend', cv.WINDOW_NORMAL)
cv.imshow('Legend', legend)

@ -45,7 +45,7 @@ std::vector<std::string> classes;
inline void preprocess(const Mat& frame, Net& net, Size inpSize, float scale,
const Scalar& mean, bool swapRB);
void postprocess(Mat& frame, const std::vector<Mat>& out, Net& net);
void postprocess(Mat& frame, const std::vector<Mat>& out, Net& net, int backend);
void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame);
@ -148,7 +148,8 @@ int main(int argc, char** argv)
// Load a model.
Net net = readNet(modelPath, configPath, parser.get<String>("framework"));
net.setPreferableBackend(parser.get<int>("backend"));
int backend = parser.get<int>("backend");
net.setPreferableBackend(backend);
net.setPreferableTarget(parser.get<int>("target"));
std::vector<String> outNames = net.getUnconnectedOutLayersNames();
@ -245,7 +246,7 @@ int main(int argc, char** argv)
std::vector<Mat> outs = predictionsQueue.get();
Mat frame = processedFramesQueue.get();
postprocess(frame, outs, net);
postprocess(frame, outs, net, backend);
if (predictionsQueue.counter > 1)
{
@ -285,7 +286,7 @@ int main(int argc, char** argv)
std::vector<Mat> outs;
net.forward(outs, outNames);
postprocess(frame, outs, net);
postprocess(frame, outs, net, backend);
// Put efficiency information.
std::vector<double> layersTimes;
@ -319,7 +320,7 @@ inline void preprocess(const Mat& frame, Net& net, Size inpSize, float scale,
}
}
void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net)
void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net, int backend)
{
static std::vector<int> outLayers = net.getUnconnectedOutLayers();
static std::string outLayerType = net.getLayer(outLayers[0])->type;
@ -396,11 +397,48 @@ void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net)
else
CV_Error(Error::StsNotImplemented, "Unknown output layer type: " + outLayerType);
std::vector<int> indices;
NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
for (size_t i = 0; i < indices.size(); ++i)
// NMS is used inside Region layer only on DNN_BACKEND_OPENCV for another backends we need NMS in sample
// or NMS is required if number of outputs > 1
if (outLayers.size() > 1 || (outLayerType == "Region" && backend != DNN_BACKEND_OPENCV))
{
std::map<int, std::vector<size_t> > class2indices;
for (size_t i = 0; i < classIds.size(); i++)
{
if (confidences[i] >= confThreshold)
{
class2indices[classIds[i]].push_back(i);
}
}
std::vector<Rect> nmsBoxes;
std::vector<float> nmsConfidences;
std::vector<int> nmsClassIds;
for (std::map<int, std::vector<size_t> >::iterator it = class2indices.begin(); it != class2indices.end(); ++it)
{
std::vector<Rect> localBoxes;
std::vector<float> localConfidences;
std::vector<size_t> classIndices = it->second;
for (size_t i = 0; i < classIndices.size(); i++)
{
localBoxes.push_back(boxes[classIndices[i]]);
localConfidences.push_back(confidences[classIndices[i]]);
}
std::vector<int> nmsIndices;
NMSBoxes(localBoxes, localConfidences, confThreshold, nmsThreshold, nmsIndices);
for (size_t i = 0; i < nmsIndices.size(); i++)
{
size_t idx = nmsIndices[i];
nmsBoxes.push_back(localBoxes[idx]);
nmsConfidences.push_back(localConfidences[idx]);
nmsClassIds.push_back(it->first);
}
}
boxes = nmsBoxes;
classIds = nmsClassIds;
confidences = nmsConfidences;
}
for (size_t idx = 0; idx < boxes.size(); ++idx)
{
int idx = indices[i];
Rect box = boxes[idx];
drawPred(classIds[idx], confidences[idx], box.x, box.y,
box.x + box.width, box.y + box.height, frame);

@ -141,9 +141,6 @@ def postprocess(frame, outs):
# Network produces output blob with a shape NxC where N is a number of
# detected objects and C is a number of classes + 4 where the first 4
# numbers are [center_x, center_y, width, height]
classIds = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
@ -163,9 +160,25 @@ def postprocess(frame, outs):
print('Unknown output layer type: ' + lastLayer.type)
exit()
indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)
# NMS is used inside Region layer only on DNN_BACKEND_OPENCV for another backends we need NMS in sample
# or NMS is required if number of outputs > 1
if len(outNames) > 1 or lastLayer.type == 'Region' and args.backend != cv.dnn.DNN_BACKEND_OPENCV:
indices = []
classIds = np.array(classIds)
boxes = np.array(boxes)
confidences = np.array(confidences)
unique_classes = set(classIds)
for cl in unique_classes:
class_indices = np.where(classIds == cl)[0]
conf = confidences[class_indices]
box = boxes[class_indices].tolist()
nms_indices = cv.dnn.NMSBoxes(box, conf, confThreshold, nmsThreshold)
nms_indices = nms_indices[:, 0] if len(nms_indices) else []
indices.extend(class_indices[nms_indices])
else:
indices = np.arange(0, len(classIds))
for i in indices:
i = i[0]
box = boxes[i]
left = box[0]
top = box[1]

@ -65,7 +65,7 @@ def showLegend(classes):
for i in range(len(classes)):
block = legend[i * blockHeight:(i + 1) * blockHeight]
block[:,:] = colors[i]
cv.putText(block, classes[i], (0, blockHeight/2), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255))
cv.putText(block, classes[i], (0, blockHeight//2), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255))
cv.namedWindow('Legend', cv.WINDOW_NORMAL)
cv.imshow('Legend', legend)
@ -76,7 +76,7 @@ net = cv.dnn.readNet(args.model, args.config, args.framework)
net.setPreferableBackend(args.backend)
net.setPreferableTarget(args.target)
winName = 'Deep learning image classification in OpenCV'
winName = 'Deep learning semantic segmentation in OpenCV'
cv.namedWindow(winName, cv.WINDOW_NORMAL)
cap = cv.VideoCapture(args.input if args.input else 0)

@ -269,7 +269,7 @@ def parseTextGraph(filePath):
def removeIdentity(graph_def):
identities = {}
for node in graph_def.node:
if node.op == 'Identity':
if node.op == 'Identity' or node.op == 'IdentityN':
identities[node.name] = node.input[0]
graph_def.node.remove(node)

@ -0,0 +1,236 @@
# This file is a part of OpenCV project.
# It is a subject to the license terms in the LICENSE file found in the top-level directory
# of this distribution and at http://opencv.org/license.html.
#
# Copyright (C) 2020, Intel Corporation, all rights reserved.
# Third party copyrights are property of their respective owners.
#
# Use this script to get the text graph representation (.pbtxt) of EfficientDet
# deep learning network trained in https://github.com/google/automl.
# Then you can import it with a binary frozen graph (.pb) using readNetFromTensorflow() function.
# See details and examples on the following wiki page: https://github.com/opencv/opencv/wiki/TensorFlow-Object-Detection-API
import argparse
import re
from math import sqrt
from tf_text_graph_common import *
class AnchorGenerator:
def __init__(self, min_level, aspect_ratios, num_scales, anchor_scale):
self.min_level = min_level
self.aspect_ratios = aspect_ratios
self.anchor_scale = anchor_scale
self.scales = [2**(float(s) / num_scales) for s in range(num_scales)]
def get(self, layer_id):
widths = []
heights = []
for s in self.scales:
for a in self.aspect_ratios:
base_anchor_size = 2**(self.min_level + layer_id) * self.anchor_scale
heights.append(base_anchor_size * s * a[1])
widths.append(base_anchor_size * s * a[0])
return widths, heights
def createGraph(modelPath, outputPath, min_level, aspect_ratios, num_scales,
anchor_scale, num_classes, image_width, image_height):
print('Min level: %d' % min_level)
print('Anchor scale: %f' % anchor_scale)
print('Num scales: %d' % num_scales)
print('Aspect ratios: %s' % str(aspect_ratios))
print('Number of classes: %d' % num_classes)
print('Input image size: %dx%d' % (image_width, image_height))
# Read the graph.
_inpNames = ['image_arrays']
outNames = ['detections']
writeTextGraph(modelPath, outputPath, outNames)
graph_def = parseTextGraph(outputPath)
def getUnconnectedNodes():
unconnected = []
for node in graph_def.node:
if node.op == 'Const':
continue
unconnected.append(node.name)
for inp in node.input:
if inp in unconnected:
unconnected.remove(inp)
return unconnected
nodesToKeep = ['truediv'] # Keep preprocessing nodes
removeIdentity(graph_def)
scopesToKeep = ('image_arrays', 'efficientnet', 'resample_p6', 'resample_p7',
'fpn_cells', 'class_net', 'box_net', 'Reshape', 'concat')
addConstNode('scale_w', [2.0], graph_def)
addConstNode('scale_h', [2.0], graph_def)
nodesToKeep += ['scale_w', 'scale_h']
for node in graph_def.node:
if re.match('efficientnet-(.*)/blocks_\d+/se/mul_1', node.name):
node.input[0], node.input[1] = node.input[1], node.input[0]
if re.match('fpn_cells/cell_\d+/fnode\d+/resample(.*)/nearest_upsampling/Reshape_1$', node.name):
node.op = 'ResizeNearestNeighbor'
node.input[1] = 'scale_w'
node.input.append('scale_h')
for inpNode in graph_def.node:
if inpNode.name == node.name[:node.name.rfind('_')]:
node.input[0] = inpNode.input[0]
if re.match('box_net/box-predict(_\d)*/separable_conv2d$', node.name):
node.addAttr('loc_pred_transposed', True)
# Replace RealDiv to Mul with inversed scale for compatibility
if node.op == 'RealDiv':
for inpNode in graph_def.node:
if inpNode.name != node.input[1] or not 'value' in inpNode.attr:
continue
tensor = inpNode.attr['value']['tensor'][0]
if not 'float_val' in tensor:
continue
scale = float(inpNode.attr['value']['tensor'][0]['float_val'][0])
addConstNode(inpNode.name + '/inv', [1.0 / scale], graph_def)
nodesToKeep.append(inpNode.name + '/inv')
node.input[1] = inpNode.name + '/inv'
node.op = 'Mul'
break
def to_remove(name, op):
if name in nodesToKeep:
return False
return op == 'Const' or not name.startswith(scopesToKeep)
removeUnusedNodesAndAttrs(to_remove, graph_def)
# Attach unconnected preprocessing
assert(graph_def.node[1].name == 'truediv' and graph_def.node[1].op == 'RealDiv')
graph_def.node[1].input.insert(0, 'image_arrays')
graph_def.node[2].input.insert(0, 'truediv')
priors_generator = AnchorGenerator(min_level, aspect_ratios, num_scales, anchor_scale)
priorBoxes = []
for i in range(5):
inpName = ''
for node in graph_def.node:
if node.name == 'Reshape_%d' % (i * 2 + 1):
inpName = node.input[0]
break
priorBox = NodeDef()
priorBox.name = 'PriorBox_%d' % i
priorBox.op = 'PriorBox'
priorBox.input.append(inpName)
priorBox.input.append(graph_def.node[0].name) # image_tensor
priorBox.addAttr('flip', False)
priorBox.addAttr('clip', False)
widths, heights = priors_generator.get(i)
priorBox.addAttr('width', widths)
priorBox.addAttr('height', heights)
priorBox.addAttr('variance', [1.0, 1.0, 1.0, 1.0])
graph_def.node.extend([priorBox])
priorBoxes.append(priorBox.name)
addConstNode('concat/axis_flatten', [-1], graph_def)
def addConcatNode(name, inputs, axisNodeName):
concat = NodeDef()
concat.name = name
concat.op = 'ConcatV2'
for inp in inputs:
concat.input.append(inp)
concat.input.append(axisNodeName)
graph_def.node.extend([concat])
addConcatNode('PriorBox/concat', priorBoxes, 'concat/axis_flatten')
sigmoid = NodeDef()
sigmoid.name = 'concat/sigmoid'
sigmoid.op = 'Sigmoid'
sigmoid.input.append('concat')
graph_def.node.extend([sigmoid])
addFlatten(sigmoid.name, sigmoid.name + '/Flatten', graph_def)
addFlatten('concat_1', 'concat_1/Flatten', graph_def)
detectionOut = NodeDef()
detectionOut.name = 'detection_out'
detectionOut.op = 'DetectionOutput'
detectionOut.input.append('concat_1/Flatten')
detectionOut.input.append(sigmoid.name + '/Flatten')
detectionOut.input.append('PriorBox/concat')
detectionOut.addAttr('num_classes', num_classes)
detectionOut.addAttr('share_location', True)
detectionOut.addAttr('background_label_id', num_classes + 1)
detectionOut.addAttr('nms_threshold', 0.6)
detectionOut.addAttr('confidence_threshold', 0.2)
detectionOut.addAttr('top_k', 100)
detectionOut.addAttr('keep_top_k', 100)
detectionOut.addAttr('code_type', "CENTER_SIZE")
graph_def.node.extend([detectionOut])
graph_def.node[0].attr['shape'] = {
'shape': {
'dim': [
{'size': -1},
{'size': image_height},
{'size': image_width},
{'size': 3}
]
}
}
while True:
unconnectedNodes = getUnconnectedNodes()
unconnectedNodes.remove(detectionOut.name)
if not unconnectedNodes:
break
for name in unconnectedNodes:
for i in range(len(graph_def.node)):
if graph_def.node[i].name == name:
del graph_def.node[i]
break
# Save as text
graph_def.save(outputPath)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Run this script to get a text graph of '
'SSD model from TensorFlow Object Detection API. '
'Then pass it with .pb file to cv::dnn::readNetFromTensorflow function.')
parser.add_argument('--input', required=True, help='Path to frozen TensorFlow graph.')
parser.add_argument('--output', required=True, help='Path to output text graph.')
parser.add_argument('--min_level', default=3, type=int, help='Parameter from training config')
parser.add_argument('--num_scales', default=3, type=int, help='Parameter from training config')
parser.add_argument('--anchor_scale', default=4.0, type=float, help='Parameter from training config')
parser.add_argument('--aspect_ratios', default=[1.0, 1.0, 1.4, 0.7, 0.7, 1.4],
nargs='+', type=float, help='Parameter from training config')
parser.add_argument('--num_classes', default=90, type=int, help='Number of classes to detect')
parser.add_argument('--width', default=512, type=int, help='Network input width')
parser.add_argument('--height', default=512, type=int, help='Network input height')
args = parser.parse_args()
ar = args.aspect_ratios
assert(len(ar) % 2 == 0)
ar = list(zip(ar[::2], ar[1::2]))
createGraph(args.input, args.output, args.min_level, ar, args.num_scales,
args.anchor_scale, args.num_classes, args.width, args.height)
Loading…
Cancel
Save