Merge remote-tracking branch 'upstream/3.4' into merge-3.4

pull/20455/head
Alexander Alekhin 3 years ago
commit f4d6a3ec4e
  1. 2
      cmake/OpenCVUtils.cmake
  2. 2
      doc/py_tutorials/py_imgproc/py_canny/py_canny.markdown
  3. 18
      doc/tutorials/core/mat_the_basic_image_container/mat_the_basic_image_container.markdown
  4. 28
      modules/dnn/src/layers/recurrent_layers.cpp
  5. 11
      modules/dnn/src/onnx/onnx_importer.cpp
  6. 127
      modules/dnn/src/tensorflow/tf_importer.cpp
  7. 80
      modules/dnn/test/test_layers.cpp
  8. 10
      modules/dnn/test/test_onnx_importer.cpp
  9. 10
      modules/dnn/test/test_tf_importer.cpp
  10. 2
      modules/js/src/make_umd.py

@ -1481,8 +1481,8 @@ function(ocv_target_link_libraries target)
if(NOT LINK_PENDING STREQUAL "") if(NOT LINK_PENDING STREQUAL "")
__ocv_push_target_link_libraries(${LINK_MODE} ${LINK_PENDING}) __ocv_push_target_link_libraries(${LINK_MODE} ${LINK_PENDING})
set(LINK_PENDING "") set(LINK_PENDING "")
set(LINK_MODE "${dep}")
endif() endif()
set(LINK_MODE "${dep}")
else() else()
if(BUILD_opencv_world) if(BUILD_opencv_world)
if(OPENCV_MODULE_${dep}_IS_PART_OF_WORLD) if(OPENCV_MODULE_${dep}_IS_PART_OF_WORLD)

@ -74,7 +74,7 @@ Canny Edge Detection in OpenCV
OpenCV puts all the above in single function, **cv.Canny()**. We will see how to use it. First OpenCV puts all the above in single function, **cv.Canny()**. We will see how to use it. First
argument is our input image. Second and third arguments are our minVal and maxVal respectively. argument is our input image. Second and third arguments are our minVal and maxVal respectively.
Third argument is aperture_size. It is the size of Sobel kernel used for find image gradients. By Fourth argument is aperture_size. It is the size of Sobel kernel used for find image gradients. By
default it is 3. Last argument is L2gradient which specifies the equation for finding gradient default it is 3. Last argument is L2gradient which specifies the equation for finding gradient
magnitude. If it is True, it uses the equation mentioned above which is more accurate, otherwise it magnitude. If it is True, it uses the equation mentioned above which is more accurate, otherwise it
uses this function: \f$Edge\_Gradient \; (G) = |G_x| + |G_y|\f$. By default, it is False. uses this function: \f$Edge\_Gradient \; (G) = |G_x| + |G_y|\f$. By default, it is False.

@ -91,8 +91,8 @@ a new header with the new boundaries:
Mat D (A, Rect(10, 10, 100, 100) ); // using a rectangle Mat D (A, Rect(10, 10, 100, 100) ); // using a rectangle
Mat E = A(Range::all(), Range(1,3)); // using row and column boundaries Mat E = A(Range::all(), Range(1,3)); // using row and column boundaries
@endcode @endcode
Now you may ask -- if the matrix itself may belong to multiple *Mat* objects who takes responsibility Now you may ask -- if the matrix itself may belong to multiple *Mat* objects, who takes responsibility
for cleaning it up when it's no longer needed. The short answer is: the last object that used it. for cleaning it up when it's no longer needed? The short answer is: the last object that used it.
This is handled by using a reference counting mechanism. Whenever somebody copies a header of a This is handled by using a reference counting mechanism. Whenever somebody copies a header of a
*Mat* object, a counter is increased for the matrix. Whenever a header is cleaned, this counter *Mat* object, a counter is increased for the matrix. Whenever a header is cleaned, this counter
is decreased. When the counter reaches zero the matrix is freed. Sometimes you will want to copy is decreased. When the counter reaches zero the matrix is freed. Sometimes you will want to copy
@ -102,12 +102,12 @@ Mat F = A.clone();
Mat G; Mat G;
A.copyTo(G); A.copyTo(G);
@endcode @endcode
Now modifying *F* or *G* will not affect the matrix pointed by the *A*'s header. What you need to Now modifying *F* or *G* will not affect the matrix pointed to by the *A*'s header. What you need to
remember from all this is that: remember from all this is that:
- Output image allocation for OpenCV functions is automatic (unless specified otherwise). - Output image allocation for OpenCV functions is automatic (unless specified otherwise).
- You do not need to think about memory management with OpenCV's C++ interface. - You do not need to think about memory management with OpenCV's C++ interface.
- The assignment operator and the copy constructor only copies the header. - The assignment operator and the copy constructor only copy the header.
- The underlying matrix of an image may be copied using the @ref cv::Mat::clone() and @ref cv::Mat::copyTo() - The underlying matrix of an image may be copied using the @ref cv::Mat::clone() and @ref cv::Mat::copyTo()
functions. functions.
@ -122,10 +122,10 @@ of these allows us to create many shades of gray.
For *colorful* ways we have a lot more methods to choose from. Each of them breaks it down to three For *colorful* ways we have a lot more methods to choose from. Each of them breaks it down to three
or four basic components and we can use the combination of these to create the others. The most or four basic components and we can use the combination of these to create the others. The most
popular one is RGB, mainly because this is also how our eye builds up colors. Its base colors are popular one is RGB, mainly because this is also how our eye builds up colors. Its base colors are
red, green and blue. To code the transparency of a color sometimes a fourth element: alpha (A) is red, green and blue. To code the transparency of a color sometimes a fourth element, alpha (A), is
added. added.
There are, however, many other color systems each with their own advantages: There are, however, many other color systems, each with their own advantages:
- RGB is the most common as our eyes use something similar, however keep in mind that OpenCV standard display - RGB is the most common as our eyes use something similar, however keep in mind that OpenCV standard display
system composes colors using the BGR color space (red and blue channels are swapped places). system composes colors using the BGR color space (red and blue channels are swapped places).
@ -139,11 +139,11 @@ There are, however, many other color systems each with their own advantages:
Each of the building components has its own valid domains. This leads to the data type used. How Each of the building components has its own valid domains. This leads to the data type used. How
we store a component defines the control we have over its domain. The smallest data type possible is we store a component defines the control we have over its domain. The smallest data type possible is
*char*, which means one byte or 8 bits. This may be unsigned (so can store values from 0 to 255) or *char*, which means one byte or 8 bits. This may be unsigned (so can store values from 0 to 255) or
signed (values from -127 to +127). Although in case of three components this already gives 16 signed (values from -127 to +127). Although this width, in the case of three components (like RGB), already gives 16
million possible colors to represent (like in case of RGB) we may acquire an even finer control by million possible colors to represent, we may acquire an even finer control by
using the float (4 byte = 32 bit) or double (8 byte = 64 bit) data types for each component. using the float (4 byte = 32 bit) or double (8 byte = 64 bit) data types for each component.
Nevertheless, remember that increasing the size of a component also increases the size of the whole Nevertheless, remember that increasing the size of a component also increases the size of the whole
picture in the memory. picture in memory.
Creating a Mat object explicitly Creating a Mat object explicitly
---------------------------------- ----------------------------------

@ -112,19 +112,24 @@ public:
const Mat& Wh = blobs[0]; const Mat& Wh = blobs[0];
const Mat& Wx = blobs[1]; const Mat& Wx = blobs[1];
const Mat& bias = blobs[2]; const Mat& bias = blobs[2];
const Mat& hInternal = blobs[3];
const Mat& cInternal = blobs[4];
CV_CheckEQ(Wh.dims, 2, ""); CV_CheckEQ(Wh.dims, 2, "");
CV_CheckEQ(Wx.dims, 2, ""); CV_CheckEQ(Wx.dims, 2, "");
CV_CheckEQ(Wh.rows, Wx.rows, ""); CV_CheckEQ(Wh.rows, Wx.rows, "");
CV_CheckEQ(Wh.rows, (1 + static_cast<int>(bidirectional))*4*Wh.cols, ""); CV_CheckEQ(Wh.rows, (1 + static_cast<int>(bidirectional))*4*Wh.cols, "");
CV_CheckEQ(Wh.rows, (int)bias.total(), ""); CV_CheckEQ(Wh.rows, (int)bias.total(), "");
CV_CheckEQ(hInternal.cols, Wh.cols, "");
CV_CheckEQ(hInternal.cols, cInternal.cols, "");
CV_CheckEQ(hInternal.rows, cInternal.rows, "");
CV_Assert(Wh.type() == Wx.type() && Wx.type() == bias.type()); CV_Assert(Wh.type() == Wx.type() && Wx.type() == bias.type());
// Peephole weights. // Peephole weights.
if (blobs.size() > 3) if (blobs.size() > 5)
{ {
CV_Assert(blobs.size() == 6); CV_Assert(blobs.size() == 8);
const int N = Wh.cols; const int N = Wh.cols;
for (int i = 3; i < 6; ++i) for (int i = 5; i < 8; ++i)
{ {
CV_Assert(blobs[i].rows == N && blobs[i].cols == N); CV_Assert(blobs[i].rows == N && blobs[i].cols == N);
CV_Assert(blobs[i].type() == bias.type()); CV_Assert(blobs[i].type() == bias.type());
@ -181,7 +186,7 @@ public:
std::vector<MatShape> &outputs, std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE std::vector<MatShape> &internals) const CV_OVERRIDE
{ {
CV_Assert((!usePeephole && blobs.size() == 3) || (usePeephole && blobs.size() == 6)); CV_Assert((!usePeephole && blobs.size() == 5) || (usePeephole && blobs.size() == 8));
CV_Assert(inputs.size() == 1); CV_Assert(inputs.size() == 1);
const MatShape& inp0 = inputs[0]; const MatShape& inp0 = inputs[0];
@ -228,7 +233,7 @@ public:
std::vector<Mat> input; std::vector<Mat> input;
inputs_arr.getMatVector(input); inputs_arr.getMatVector(input);
CV_Assert((!usePeephole && blobs.size() == 3) || (usePeephole && blobs.size() == 6)); CV_Assert((!usePeephole && blobs.size() == 5) || (usePeephole && blobs.size() == 8));
CV_Assert(input.size() == 1); CV_Assert(input.size() == 1);
const Mat& inp0 = input[0]; const Mat& inp0 = input[0];
@ -284,13 +289,14 @@ public:
const Mat &Wh = blobs[0].rowRange(i * blobs[0].rows / numDirs, (i + 1) * blobs[0].rows / numDirs); const Mat &Wh = blobs[0].rowRange(i * blobs[0].rows / numDirs, (i + 1) * blobs[0].rows / numDirs);
const Mat &Wx = blobs[1].rowRange(i * blobs[1].rows / numDirs, (i + 1) * blobs[1].rows / numDirs); const Mat &Wx = blobs[1].rowRange(i * blobs[1].rows / numDirs, (i + 1) * blobs[1].rows / numDirs);
const Mat &bias = blobs[2].colRange(i * blobs[2].cols / numDirs, (i + 1) * blobs[2].cols / numDirs); const Mat &bias = blobs[2].colRange(i * blobs[2].cols / numDirs, (i + 1) * blobs[2].cols / numDirs);
const Mat &h_0 = blobs[3].rowRange(i * blobs[3].rows / numDirs, (i + 1) * blobs[3].rows / numDirs);
const Mat &c_0 = blobs[4].rowRange(i * blobs[4].rows / numDirs, (i + 1) * blobs[4].rows / numDirs);
int numOut = Wh.size[1]; int numOut = Wh.size[1];
Mat hInternal = internals[0], cInternal = internals[1], Mat hInternal = internals[0], cInternal = internals[1],
dummyOnes = internals[2], gates = internals[3]; dummyOnes = internals[2], gates = internals[3];
hInternal.setTo(0.); h_0.copyTo(hInternal);
cInternal.setTo(0.); c_0.copyTo(cInternal);
dummyOnes.setTo(1.); dummyOnes.setTo(1.);
int numSamplesTotal = numTimeStamps*numSamples; int numSamplesTotal = numTimeStamps*numSamples;
@ -331,8 +337,8 @@ public:
if (usePeephole) if (usePeephole)
{ {
Mat gatesIF = gates.colRange(0, 2*numOut); Mat gatesIF = gates.colRange(0, 2*numOut);
gemm(cInternal, blobs[3], 1, gateI, 1, gateI); gemm(cInternal, blobs[5], 1, gateI, 1, gateI);
gemm(cInternal, blobs[4], 1, gateF, 1, gateF); gemm(cInternal, blobs[6], 1, gateF, 1, gateF);
sigmoid(gatesIF, gatesIF); sigmoid(gatesIF, gatesIF);
} }
else else
@ -355,7 +361,7 @@ public:
} }
if (usePeephole) if (usePeephole)
{ {
gemm(cInternal, blobs[5], 1, gateO, 1, gateO); gemm(cInternal, blobs[7], 1, gateO, 1, gateO);
sigmoid(gateO, gateO); sigmoid(gateO, gateO);
} }

@ -1048,8 +1048,9 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
Mat Wx = getBlob(node_proto, 1); Mat Wx = getBlob(node_proto, 1);
Mat Wh = getBlob(node_proto, 2); Mat Wh = getBlob(node_proto, 2);
Mat b = getBlob(node_proto, 3); Mat b = getBlob(node_proto, 3);
CV_CheckEQ(countNonZero(getBlob(node_proto, 5)), 0, "Unsupported non zero initial_h"); Mat h0 = getBlob(node_proto, 5);
CV_CheckEQ(countNonZero(getBlob(node_proto, 6)), 0, "Unsupported non zero initial_c"); Mat c0 = getBlob(node_proto, 6);
b = b.reshape(1, b.size[0]); b = b.reshape(1, b.size[0]);
const int numHidden = lstmParams.get<int>("hidden_size"); const int numHidden = lstmParams.get<int>("hidden_size");
@ -1082,11 +1083,15 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
} }
Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]); Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]);
Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]); Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]);
h0 = h0.reshape(1, h0.size[0] * h0.size[1]);
c0 = c0.reshape(1, c0.size[0] * c0.size[1]);
lstmParams.blobs.resize(3); lstmParams.blobs.resize(5);
lstmParams.blobs[0] = Wh; lstmParams.blobs[0] = Wh;
lstmParams.blobs[1] = Wx; lstmParams.blobs[1] = Wx;
lstmParams.blobs[2] = b; lstmParams.blobs[2] = b;
lstmParams.blobs[3] = h0;
lstmParams.blobs[4] = c0;
lstmParams.set("bidirectional", lstmParams.get<String>("direction", "") == "bidirectional"); lstmParams.set("bidirectional", lstmParams.get<String>("direction", "") == "bidirectional");
node_proto.set_output(0, lstmParams.name); // set different name so output shapes will be registered on that name node_proto.set_output(0, lstmParams.name); // set different name so output shapes will be registered on that name

@ -406,12 +406,53 @@ void setKSize(LayerParams &layerParams, const tensorflow::NodeDef &layer)
} }
} }
void setPadding(LayerParams &layerParams, const tensorflow::NodeDef &layer) void setPadMode(LayerParams &layerParams, const tensorflow::NodeDef &layer)
{ {
if (hasLayerAttr(layer, "padding")) if (hasLayerAttr(layer, "padding"))
layerParams.set("pad_mode", getLayerAttr(layer, "padding").s()); layerParams.set("pad_mode", getLayerAttr(layer, "padding").s());
} }
bool getExplicitPadding(LayerParams &layerParams, const tensorflow::NodeDef &layer, int64_t (&pads)[8])
{
if (!layerParams.has("pad_mode") ||
layerParams.get("pad_mode").getStringValue() != "EXPLICIT")
{
return false;
}
CV_Assert(hasLayerAttr(layer, "explicit_paddings"));
const tensorflow::AttrValue& protoPads = getLayerAttr(layer, "explicit_paddings");
if (protoPads.list().i_size() != 8)
{
CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding configuration.");
}
int n = sizeof(pads) / sizeof(pads[0]);
for (int i = 0; i < n; ++i)
{
pads[i] = protoPads.list().i(i);
}
if (getDataLayout(layer) != DATA_LAYOUT_NCHW)
{
CV_LOG_DEBUG(NULL, "DNN/TF: Data format " << getLayerAttr(layer, "data_format").s() << ", assuming NHWC.");
// Perhaps, we have NHWC padding dimensions order.
// N H W C
// 0 1 2 3 4 5 6 7
std::swap(pads[2], pads[6]);
std::swap(pads[3], pads[7]);
// N C W H
// 0 1 2 3 4 5 6 7
std::swap(pads[4], pads[6]);
std::swap(pads[5], pads[7]);
// N C H W
// 0 1 2 3 4 5 6 7
}
return true;
}
Pin parsePin(const std::string &name) Pin parsePin(const std::string &name)
{ {
Pin pin(name); Pin pin(name);
@ -516,6 +557,7 @@ protected:
private: private:
void addPermuteLayer(const int* order, const std::string& permName, Pin& inpId); void addPermuteLayer(const int* order, const std::string& permName, Pin& inpId);
void setPadding(LayerParams &layerParams, const tensorflow::NodeDef &layer, std::string& inputName, float value = 0.);
friend class LayerHandler; friend class LayerHandler;
typedef void (TFImporter::*TFImporterNodeParser)(tensorflow::GraphDef&, const tensorflow::NodeDef&, LayerParams&); typedef void (TFImporter::*TFImporterNodeParser)(tensorflow::GraphDef&, const tensorflow::NodeDef&, LayerParams&);
@ -558,6 +600,31 @@ private:
void parseCustomLayer (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); void parseCustomLayer (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams);
}; };
void TFImporter::setPadding(LayerParams &layerParams, const tensorflow::NodeDef &layer, std::string& inputName, float value)
{
setPadMode(layerParams, layer);
int64_t pads[8];
if (!getExplicitPadding(layerParams, layer, pads))
{
return;
}
LayerParams padLp;
padLp.name = layer.name() + "/pad";
padLp.type = "Padding";
padLp.set("paddings", DictValue::arrayInt(pads, sizeof(pads) / sizeof(pads[0])));
padLp.set("value", value);
int id = dstNet.addLayer(padLp.name, padLp.type, padLp);
layer_id[padLp.name] = id;
connect(layer_id, dstNet, parsePin(inputName), id, 0);
inputName = padLp.name;
layerParams.set("pad_mode", "VALID");
}
class LayerHandler class LayerHandler
{ {
public: public:
@ -808,7 +875,7 @@ void TFImporter::parseConvolution(tensorflow::GraphDef& net, const tensorflow::N
setStrides(layerParams, layer); setStrides(layerParams, layer);
if (!layerParams.has("pad_w") && !layerParams.has("pad_h")) if (!layerParams.has("pad_w") && !layerParams.has("pad_h"))
setPadding(layerParams, layer); setPadding(layerParams, layer, input);
// The final node of dilated convolution subgraph. // The final node of dilated convolution subgraph.
next_layers = getNextLayers(net, name, "BatchToSpaceND"); next_layers = getNextLayers(net, name, "BatchToSpaceND");
@ -1253,20 +1320,21 @@ void TFImporter::parseMaxPool(tensorflow::GraphDef& net, const tensorflow::NodeD
{ {
const std::string& name = layer.name(); const std::string& name = layer.name();
const int num_inputs = layer.input_size(); const int num_inputs = layer.input_size();
std::string inputName = layer.input(0);
CV_CheckGT(num_inputs, 0, ""); CV_CheckGT(num_inputs, 0, "");
layerParams.set("pool", "max"); layerParams.set("pool", "max");
setKSize(layerParams, layer); setKSize(layerParams, layer);
setStrides(layerParams, layer); setStrides(layerParams, layer);
setPadding(layerParams, layer); setPadding(layerParams, layer, inputName, -std::numeric_limits<float>::infinity());
// Test_TensorFlow_nets.EAST_text_detection/1, NGRAPH/CPU // Test_TensorFlow_nets.EAST_text_detection/1, NGRAPH/CPU
layerParams.set("ceil_mode", false); layerParams.set("ceil_mode", false);
int id = dstNet.addLayer(name, "Pooling", layerParams); int id = dstNet.addLayer(name, "Pooling", layerParams);
layer_id[name] = id; layer_id[name] = id;
connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); connectToAllBlobs(layer_id, dstNet, parsePin(inputName), id, num_inputs);
} }
void TFImporter::parseAvgPool(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) void TFImporter::parseAvgPool(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams)
@ -1279,7 +1347,7 @@ void TFImporter::parseAvgPool(tensorflow::GraphDef& net, const tensorflow::NodeD
layerParams.set("ave_pool_padded_area", false); layerParams.set("ave_pool_padded_area", false);
setKSize(layerParams, layer); setKSize(layerParams, layer);
setStrides(layerParams, layer); setStrides(layerParams, layer);
setPadding(layerParams, layer); setPadMode(layerParams, layer);
int id = dstNet.addLayer(name, "Pooling", layerParams); int id = dstNet.addLayer(name, "Pooling", layerParams);
layer_id[name] = id; layer_id[name] = id;
@ -1694,7 +1762,7 @@ void TFImporter::parseConv2DBackpropInput(tensorflow::GraphDef& net, const tenso
// input: "weights" // input: "weights"
// input: "input" // input: "input"
const std::string& name = layer.name(); std::string name = layer.name();
const int num_inputs = layer.input_size(); const int num_inputs = layer.input_size();
CV_CheckEQ(num_inputs, 3, "Expected output shape, weights and input nodes"); CV_CheckEQ(num_inputs, 3, "Expected output shape, weights and input nodes");
@ -1725,7 +1793,21 @@ void TFImporter::parseConv2DBackpropInput(tensorflow::GraphDef& net, const tenso
layerParams.set("num_output", kshape[1]); layerParams.set("num_output", kshape[1]);
setStrides(layerParams, layer); setStrides(layerParams, layer);
setPadding(layerParams, layer); setPadMode(layerParams, layer);
int64_t pads[8];
bool explicit_pads = getExplicitPadding(layerParams, layer, pads);
int64_t begs[4] = {};
int64_t ends[4] = {-1, -1, -1, -1};
if (explicit_pads)
{
name += "/deconv";
layerParams.set("pad_mode", "VALID");
for (int i = 2; i < 4; ++i) // begins=[0, 0, a, b], ends=[-1, -1, c, d]
{
begs[i] = pads[2*i];
ends[i] = -1 - pads[2*i + 1];
}
}
// For convolution layer, output shape computes as // For convolution layer, output shape computes as
// o = 1 + (i - k + 2*p) / s // o = 1 + (i - k + 2*p) / s
@ -1742,8 +1824,9 @@ void TFImporter::parseConv2DBackpropInput(tensorflow::GraphDef& net, const tenso
const int strideY = layerParams.get<int>("stride_h"); const int strideY = layerParams.get<int>("stride_h");
const int strideX = layerParams.get<int>("stride_w"); const int strideX = layerParams.get<int>("stride_w");
Mat outShape = getTensorContent(getConstBlob(layer, value_id, 0)); Mat outShape = getTensorContent(getConstBlob(layer, value_id, 0));
const int outH = outShape.at<int>(1); int shift = (getDataLayout(layer) == DATA_LAYOUT_NCHW);
const int outW = outShape.at<int>(2); const int outH = outShape.at<int>(1 + shift) + begs[2] - 1 - ends[2];
const int outW = outShape.at<int>(2 + shift) + begs[3] - 1 - ends[3];
if (layerParams.get<String>("pad_mode") == "SAME") if (layerParams.get<String>("pad_mode") == "SAME")
{ {
layerParams.set("adj_w", (outW - 1) % strideX); layerParams.set("adj_w", (outW - 1) % strideX);
@ -1759,6 +1842,16 @@ void TFImporter::parseConv2DBackpropInput(tensorflow::GraphDef& net, const tenso
// one input only // one input only
connect(layer_id, dstNet, parsePin(layer.input(2)), id, 0); connect(layer_id, dstNet, parsePin(layer.input(2)), id, 0);
if (explicit_pads) // If we have explicit paddings, remove extra data
{
layerParams.set("begin", DictValue::arrayInt(begs, sizeof(begs) / sizeof(begs[0])));
layerParams.set("end", DictValue::arrayInt(ends, sizeof(ends) / sizeof(ends[0])));
int id = dstNet.addLayer(layer.name(), "Slice", layerParams);
layer_id[layer.name()] = id;
connect(layer_id, dstNet, parsePin(name), id, 0);
}
} }
void TFImporter::parseBlockLSTM(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) void TFImporter::parseBlockLSTM(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams)
@ -1766,8 +1859,8 @@ void TFImporter::parseBlockLSTM(tensorflow::GraphDef& net, const tensorflow::Nod
// op: "BlockLSTM" // op: "BlockLSTM"
// input: "lstm_block_wrapper/ToInt64/x" (ignore, number of time stamps) // input: "lstm_block_wrapper/ToInt64/x" (ignore, number of time stamps)
// input: "input" // input: "input"
// input: "lstm_block_wrapper/zeros" (ignore) // input: "lstm_block_wrapper/zeros"
// input: "lstm_block_wrapper/zeros" (ignore) // input: "lstm_block_wrapper/zeros"
// input: "lstm_block_wrapper/kernel" // input: "lstm_block_wrapper/kernel"
// input: "lstm_block_wrapper/w_i_diag" // input: "lstm_block_wrapper/w_i_diag"
// input: "lstm_block_wrapper/w_f_diag" // input: "lstm_block_wrapper/w_f_diag"
@ -1793,9 +1886,11 @@ void TFImporter::parseBlockLSTM(tensorflow::GraphDef& net, const tensorflow::Nod
} }
} }
Mat W, Wh, Wx, b; Mat W, Wh, Wx, b, cs_prev, h_prev;
blobFromTensor(getConstBlob(layer, value_id, 4), W); blobFromTensor(getConstBlob(layer, value_id, 4), W);
blobFromTensor(getConstBlob(layer, value_id, 8), b); blobFromTensor(getConstBlob(layer, value_id, 8), b);
blobFromTensor(getConstBlob(layer, value_id, 2), cs_prev);
blobFromTensor(getConstBlob(layer, value_id, 3), h_prev);
const int outSize = W.cols / 4; const int outSize = W.cols / 4;
// IGFO->IFOG // IGFO->IFOG
@ -1811,10 +1906,12 @@ void TFImporter::parseBlockLSTM(tensorflow::GraphDef& net, const tensorflow::Nod
Wx = W.rowRange(0, W.rows - outSize).t(); Wx = W.rowRange(0, W.rows - outSize).t();
Wh = W.rowRange(W.rows - outSize, W.rows).t(); Wh = W.rowRange(W.rows - outSize, W.rows).t();
layerParams.blobs.resize(3); layerParams.blobs.resize(5);
layerParams.blobs[0] = Wh; layerParams.blobs[0] = Wh;
layerParams.blobs[1] = Wx; layerParams.blobs[1] = Wx;
layerParams.blobs[2] = b; layerParams.blobs[2] = b;
layerParams.blobs[3] = h_prev;
layerParams.blobs[4] = cs_prev;
if (hasLayerAttr(layer, "use_peephole")) if (hasLayerAttr(layer, "use_peephole"))
{ {
@ -1822,14 +1919,14 @@ void TFImporter::parseBlockLSTM(tensorflow::GraphDef& net, const tensorflow::Nod
if (usePeephole) if (usePeephole)
{ {
layerParams.set("use_peephole", true); layerParams.set("use_peephole", true);
layerParams.blobs.resize(6); layerParams.blobs.resize(8);
for (int i = 0; i < 3; ++i) for (int i = 0; i < 3; ++i)
{ {
Mat w; Mat w;
blobFromTensor(getConstBlob(layer, value_id, 5 + i), w); blobFromTensor(getConstBlob(layer, value_id, 5 + i), w);
w = w.reshape(1, w.total()); // Single column. w = w.reshape(1, w.total()); // Single column.
w = Mat::diag(w); // Make a diagonal matrix. w = Mat::diag(w); // Make a diagonal matrix.
layerParams.blobs[3 + i] = w; layerParams.blobs[5 + i] = w;
} }
} }
} }

@ -445,7 +445,7 @@ class Layer_LSTM_Test : public ::testing::Test
{ {
public: public:
int numInp, numOut; int numInp, numOut;
Mat Wh, Wx, b; Mat Wh, Wx, b, h, c;
Ptr<LSTMLayer> layer; Ptr<LSTMLayer> layer;
std::vector<Mat> inputs, outputs; std::vector<Mat> inputs, outputs;
@ -460,12 +460,17 @@ public:
Wh = Mat::ones(4 * numOut, numOut, CV_32F); Wh = Mat::ones(4 * numOut, numOut, CV_32F);
Wx = Mat::ones(4 * numOut, numInp, CV_32F); Wx = Mat::ones(4 * numOut, numInp, CV_32F);
b = Mat::ones(4 * numOut, 1, CV_32F); b = Mat::ones(4 * numOut, 1, CV_32F);
h = Mat::ones(4, numOut, CV_32F);
c = Mat::ones(4, numOut, CV_32F);
LayerParams lp; LayerParams lp;
lp.blobs.resize(3); lp.blobs.resize(5);
lp.blobs[0] = Wh; lp.blobs[0] = Wh;
lp.blobs[1] = Wx; lp.blobs[1] = Wx;
lp.blobs[2] = b; lp.blobs[2] = b;
lp.blobs[3] = h;
lp.blobs[4] = c;
lp.set<bool>("produce_cell_output", produceCellOutput); lp.set<bool>("produce_cell_output", produceCellOutput);
lp.set<bool>("use_timestamp_dim", useTimestampDim); lp.set<bool>("use_timestamp_dim", useTimestampDim);
@ -513,10 +518,12 @@ TEST_F(Layer_LSTM_Test, get_set_test)
TEST(Layer_LSTM_Test_Accuracy_with_, CaffeRecurrent) TEST(Layer_LSTM_Test_Accuracy_with_, CaffeRecurrent)
{ {
LayerParams lp; LayerParams lp;
lp.blobs.resize(3); lp.blobs.resize(5);
lp.blobs[0] = blobFromNPY(_tf("lstm.prototxt.w_2.npy")); // Wh lp.blobs[0] = blobFromNPY(_tf("lstm.prototxt.w_2.npy")); // Wh
lp.blobs[1] = blobFromNPY(_tf("lstm.prototxt.w_0.npy")); // Wx lp.blobs[1] = blobFromNPY(_tf("lstm.prototxt.w_0.npy")); // Wx
lp.blobs[2] = blobFromNPY(_tf("lstm.prototxt.w_1.npy")); // bias lp.blobs[2] = blobFromNPY(_tf("lstm.prototxt.w_1.npy")); // bias
lp.blobs[3] = Mat::zeros(2, 17, CV_32F); // h_0
lp.blobs[4] = Mat::zeros(2, 17, CV_32F); // c_0
Ptr<LSTMLayer> layer = LSTMLayer::create(lp); Ptr<LSTMLayer> layer = LSTMLayer::create(lp);
Mat inp = blobFromNPY(_tf("recurrent.input.npy")); Mat inp = blobFromNPY(_tf("recurrent.input.npy"));
@ -527,6 +534,68 @@ TEST(Layer_LSTM_Test_Accuracy_with_, CaffeRecurrent)
normAssert(h_t_reference, outputs[0]); normAssert(h_t_reference, outputs[0]);
} }
TEST(Layer_LSTM_Test_Accuracy_with_, HiddenParams)
{
Mat Wx = blobFromNPY(_tf("lstm.hidden.W.npy"));
Mat Wh = blobFromNPY(_tf("lstm.hidden.R.npy"));
Mat b = blobFromNPY(_tf("lstm.hidden.B.npy"));
Mat h0 = blobFromNPY(_tf("lstm.hidden.h0.npy"));
Mat c0 = blobFromNPY(_tf("lstm.hidden.c0.npy"));
const int numHidden = 3;
const int numDirs = Wx.size[0];
const int numFeatures = Wx.size[2];
b = b.reshape(1, b.size[0]);
Mat bx = b.colRange(0, b.cols / 2);
Mat bh = b.colRange(b.cols / 2, b.cols);
b = bx + bh;
// IFGO->IGFO
for (int k = 0; k < numDirs; ++k)
{
float* WxData = Wx.ptr<float>(k);
float* WhData = Wh.ptr<float>(k);
float* biasData = b.ptr<float>(k);
for (int j = 0; j < numHidden; ++j)
{
for (int i = 0; i < numFeatures; ++i)
{
std::swap(WxData[(numHidden + j) * numFeatures + i],
WxData[(numHidden * 2 + j) * numFeatures + i]);
}
for (int i = 0; i < numHidden; ++i)
{
std::swap(WhData[(numHidden + j) * numHidden + i],
WhData[(numHidden * 2 + j) * numHidden + i]);
}
std::swap(biasData[numHidden + j], biasData[numHidden * 2 + j]);
}
}
Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]);
Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]);
h0 = h0.reshape(1, h0.size[0] * h0.size[1]);
c0 = c0.reshape(1, c0.size[0] * c0.size[1]);
LayerParams lstmParams;
lstmParams.blobs.resize(5);
lstmParams.blobs[0] = Wh;
lstmParams.blobs[1] = Wx;
lstmParams.blobs[2] = b;
lstmParams.blobs[3] = h0;
lstmParams.blobs[4] = c0;
lstmParams.set("bidirectional", false);
Ptr<LSTMLayer> layer = LSTMLayer::create(lstmParams);
Mat inp = blobFromNPY(_tf("lstm.hidden.input.npy"));
std::vector<Mat> inputs(1, inp), outputs;
runLayer(layer, inputs, outputs);
Mat h_t_reference = blobFromNPY(_tf("lstm.hidden.output.npy"));
normAssert(h_t_reference, outputs[0]);
}
TEST(Layer_RNN_Test_Accuracy_with_, CaffeRecurrent) TEST(Layer_RNN_Test_Accuracy_with_, CaffeRecurrent)
{ {
Ptr<RNNLayer> layer = RNNLayer::create(LayerParams()); Ptr<RNNLayer> layer = RNNLayer::create(LayerParams());
@ -571,6 +640,9 @@ TEST(Layer_LSTM_Test_Accuracy_, Reverse)
bias.at<float>(2, 0) = 1e10f; // Output gate - always output everything bias.at<float>(2, 0) = 1e10f; // Output gate - always output everything
bias.at<float>(3, 0) = 0.f; // Update signal bias.at<float>(3, 0) = 0.f; // Update signal
cv::Mat hInternal = cv::Mat::zeros(1, 1, CV_32FC1);
cv::Mat cInternal = cv::Mat::zeros(1, 1, CV_32FC1);
LayerParams lp; LayerParams lp;
lp.set("reverse", true); lp.set("reverse", true);
lp.set("use_timestamp_dim", true); lp.set("use_timestamp_dim", true);
@ -578,6 +650,8 @@ TEST(Layer_LSTM_Test_Accuracy_, Reverse)
lp.blobs.push_back(Wh); lp.blobs.push_back(Wh);
lp.blobs.push_back(Wx); lp.blobs.push_back(Wx);
lp.blobs.push_back(bias); lp.blobs.push_back(bias);
lp.blobs.push_back(hInternal);
lp.blobs.push_back(cInternal);
cv::Ptr<cv::dnn::LSTMLayer> layer = LSTMLayer::create(lp); cv::Ptr<cv::dnn::LSTMLayer> layer = LSTMLayer::create(lp);
std::vector<cv::Mat> outputs; std::vector<cv::Mat> outputs;

@ -710,6 +710,16 @@ TEST_P(Test_ONNX_layers, LSTM_bidirectional)
testONNXModels("lstm_bidirectional", npy, 0, 0, false, false); testONNXModels("lstm_bidirectional", npy, 0, 0, false, false);
} }
TEST_P(Test_ONNX_layers, LSTM_hidden)
{
testONNXModels("hidden_lstm", npy, 0, 0, false, false);
}
TEST_P(Test_ONNX_layers, LSTM_hidden_bidirectional)
{
testONNXModels("hidden_lstm_bi", npy, 0, 0, false, false);
}
TEST_P(Test_ONNX_layers, Pad2d_Unfused) TEST_P(Test_ONNX_layers, Pad2d_Unfused)
{ {
testONNXModels("ReflectionPad2d"); testONNXModels("ReflectionPad2d");

@ -203,6 +203,16 @@ TEST_P(Test_TensorFlow_layers, padding)
runTensorFlowNet("keras_pad_concat"); runTensorFlowNet("keras_pad_concat");
} }
TEST_P(Test_TensorFlow_layers, padding_asymmetric)
{
runTensorFlowNet("conv2d_asymmetric_pads_nchw");
runTensorFlowNet("conv2d_asymmetric_pads_nhwc");
runTensorFlowNet("max_pool2d_asymmetric_pads_nchw");
runTensorFlowNet("max_pool2d_asymmetric_pads_nhwc");
runTensorFlowNet("conv2d_backprop_input_asymmetric_pads_nchw");
runTensorFlowNet("conv2d_backprop_input_asymmetric_pads_nhwc");
}
TEST_P(Test_TensorFlow_layers, padding_same) TEST_P(Test_TensorFlow_layers, padding_same)
{ {
// Reference output values are in range [0.0006, 2.798] // Reference output values are in range [0.0006, 2.798]

@ -95,7 +95,7 @@ def make_umd(opencvjs, cvjs):
root.cv = factory(); root.cv = factory();
} else if (typeof importScripts === 'function') { } else if (typeof importScripts === 'function') {
// Web worker // Web worker
root.cv = factory; root.cv = factory();
} else { } else {
// Other shells, e.g. d8 // Other shells, e.g. d8
root.cv = factory(); root.cv = factory();

Loading…
Cancel
Save