Merge remote-tracking branch 'upstream/3.4' into merge-3.4

4 years ago · f4d6a3ec4e
parent 7aa922ceac 06dcc5a2c6
commit f4d6a3ec4e
10 changed files with 246 additions and 44 deletions
--- a/cmake/OpenCVUtils.cmake
+++ b/cmake/OpenCVUtils.cmake
@ -1481,8 +1481,8 @@ function(ocv_target_link_libraries target)
      if(NOT LINK_PENDING STREQUAL "")
        __ocv_push_target_link_libraries(${LINK_MODE} ${LINK_PENDING})
        set(LINK_PENDING "")
-        set(LINK_MODE "${dep}")
      endif()
+      set(LINK_MODE "${dep}")
    else()
      if(BUILD_opencv_world)
        if(OPENCV_MODULE_${dep}_IS_PART_OF_WORLD)
--- a/doc/py_tutorials/py_imgproc/py_canny/py_canny.markdown
+++ b/doc/py_tutorials/py_imgproc/py_canny/py_canny.markdown
@ -74,7 +74,7 @@ Canny Edge Detection in OpenCV

 OpenCV puts all the above in single function, **cv.Canny()**. We will see how to use it. First
 argument is our input image. Second and third arguments are our minVal and maxVal respectively.
-Third argument is aperture_size. It is the size of Sobel kernel used for find image gradients. By
+Fourth argument is aperture_size. It is the size of Sobel kernel used for find image gradients. By
 default it is 3. Last argument is L2gradient which specifies the equation for finding gradient
 magnitude. If it is True, it uses the equation mentioned above which is more accurate, otherwise it
 uses this function: \f$Edge\_Gradient \; (G) = |G_x| + |G_y|\f$. By default, it is False.
--- a/doc/tutorials/core/mat_the_basic_image_container/mat_the_basic_image_container.markdown
+++ b/doc/tutorials/core/mat_the_basic_image_container/mat_the_basic_image_container.markdown
@ -91,8 +91,8 @@ a new header with the new boundaries:
 Mat D (A, Rect(10, 10, 100, 100) ); // using a rectangle
 Mat E = A(Range::all(), Range(1,3)); // using row and column boundaries
@endcode
-Now you may ask -- if the matrix itself may belong to multiple *Mat* objects who takes responsibility
-for cleaning it up when it's no longer needed. The short answer is: the last object that used it.
+Now you may ask -- if the matrix itself may belong to multiple *Mat* objects, who takes responsibility
+for cleaning it up when it's no longer needed? The short answer is: the last object that used it.
 This is handled by using a reference counting mechanism. Whenever somebody copies a header of a
 *Mat* object, a counter is increased for the matrix. Whenever a header is cleaned, this counter
 is decreased. When the counter reaches zero the matrix is freed. Sometimes you will want to copy
@ -102,12 +102,12 @@ Mat F = A.clone();
 Mat G;
 A.copyTo(G);
@endcode
-Now modifying *F* or *G* will not affect the matrix pointed by the *A*'s header. What you need to
+Now modifying *F* or *G* will not affect the matrix pointed to by the *A*'s header. What you need to
 remember from all this is that:

 -   Output image allocation for OpenCV functions is automatic (unless specified otherwise).
 -   You do not need to think about memory management with OpenCV's C++ interface.
-   The assignment operator and the copy constructor only copies the header.
+-   The assignment operator and the copy constructor only copy the header.
 -   The underlying matrix of an image may be copied using the @ref cv::Mat::clone() and @ref cv::Mat::copyTo()
    functions.

@ -122,10 +122,10 @@ of these allows us to create many shades of gray.
 For *colorful* ways we have a lot more methods to choose from. Each of them breaks it down to three
 or four basic components and we can use the combination of these to create the others. The most
 popular one is RGB, mainly because this is also how our eye builds up colors. Its base colors are
-red, green and blue. To code the transparency of a color sometimes a fourth element: alpha (A) is
+red, green and blue. To code the transparency of a color sometimes a fourth element, alpha (A), is
 added.

-There are, however, many other color systems each with their own advantages:
+There are, however, many other color systems, each with their own advantages:

 -   RGB is the most common as our eyes use something similar, however keep in mind that OpenCV standard display
    system composes colors using the BGR color space (red and blue channels are swapped places).
@ -139,11 +139,11 @@ There are, however, many other color systems each with their own advantages:
 Each of the building components has its own valid domains. This leads to the data type used. How
 we store a component defines the control we have over its domain. The smallest data type possible is
 *char*, which means one byte or 8 bits. This may be unsigned (so can store values from 0 to 255) or
-signed (values from -127 to +127). Although in case of three components this already gives 16
-million possible colors to represent (like in case of RGB) we may acquire an even finer control by
+signed (values from -127 to +127). Although this width, in the case of three components (like RGB), already gives 16
+million possible colors to represent, we may acquire an even finer control by
 using the float (4 byte = 32 bit) or double (8 byte = 64 bit) data types for each component.
 Nevertheless, remember that increasing the size of a component also increases the size of the whole
-picture in the memory.
+picture in memory.

 Creating a Mat object explicitly
 ----------------------------------
--- a/modules/dnn/src/layers/recurrent_layers.cpp
+++ b/modules/dnn/src/layers/recurrent_layers.cpp
@ -112,19 +112,24 @@ public:
            const Mat& Wh = blobs[0];
            const Mat& Wx = blobs[1];
            const Mat& bias = blobs[2];
+            const Mat& hInternal = blobs[3];
+            const Mat& cInternal = blobs[4];
            CV_CheckEQ(Wh.dims, 2, "");
            CV_CheckEQ(Wx.dims, 2, "");
            CV_CheckEQ(Wh.rows, Wx.rows, "");
            CV_CheckEQ(Wh.rows, (1 + static_cast<int>(bidirectional))*4*Wh.cols, "");
            CV_CheckEQ(Wh.rows, (int)bias.total(), "");
+            CV_CheckEQ(hInternal.cols, Wh.cols, "");
+            CV_CheckEQ(hInternal.cols, cInternal.cols, "");
+            CV_CheckEQ(hInternal.rows, cInternal.rows, "");
            CV_Assert(Wh.type() == Wx.type() && Wx.type() == bias.type());

            // Peephole weights.
-            if (blobs.size() > 3)
+            if (blobs.size() > 5)
            {
-                CV_Assert(blobs.size() == 6);
+                CV_Assert(blobs.size() == 8);
                const int N = Wh.cols;
-                for (int i = 3; i < 6; ++i)
+                for (int i = 5; i < 8; ++i)
                {
                    CV_Assert(blobs[i].rows == N && blobs[i].cols == N);
                    CV_Assert(blobs[i].type() == bias.type());
@ -181,7 +186,7 @@ public:
                         std::vector<MatShape> &outputs,
                         std::vector<MatShape> &internals) const CV_OVERRIDE
    {
-        CV_Assert((!usePeephole && blobs.size() == 3) || (usePeephole && blobs.size() == 6));
+        CV_Assert((!usePeephole && blobs.size() == 5) || (usePeephole && blobs.size() == 8));
        CV_Assert(inputs.size() == 1);
        const MatShape& inp0 = inputs[0];

@ -228,7 +233,7 @@ public:
        std::vector<Mat> input;
        inputs_arr.getMatVector(input);

-        CV_Assert((!usePeephole && blobs.size() == 3) || (usePeephole && blobs.size() == 6));
+        CV_Assert((!usePeephole && blobs.size() == 5) || (usePeephole && blobs.size() == 8));
        CV_Assert(input.size() == 1);
        const Mat& inp0 = input[0];

@ -284,13 +289,14 @@ public:
            const Mat &Wh = blobs[0].rowRange(i * blobs[0].rows / numDirs, (i + 1) * blobs[0].rows / numDirs);
            const Mat &Wx = blobs[1].rowRange(i * blobs[1].rows / numDirs, (i + 1) * blobs[1].rows / numDirs);
            const Mat &bias = blobs[2].colRange(i * blobs[2].cols / numDirs, (i + 1) * blobs[2].cols / numDirs);
+            const Mat &h_0 = blobs[3].rowRange(i * blobs[3].rows / numDirs, (i + 1) * blobs[3].rows / numDirs);
+            const Mat &c_0 = blobs[4].rowRange(i * blobs[4].rows / numDirs, (i + 1) * blobs[4].rows / numDirs);

            int numOut = Wh.size[1];
-
            Mat hInternal = internals[0], cInternal = internals[1],
                    dummyOnes = internals[2], gates = internals[3];
-            hInternal.setTo(0.);
-            cInternal.setTo(0.);
+            h_0.copyTo(hInternal);
+            c_0.copyTo(cInternal);
            dummyOnes.setTo(1.);

            int numSamplesTotal = numTimeStamps*numSamples;
@ -331,8 +337,8 @@ public:
                if (usePeephole)
                {
                    Mat gatesIF = gates.colRange(0, 2*numOut);
-                    gemm(cInternal, blobs[3], 1, gateI, 1, gateI);
-                    gemm(cInternal, blobs[4], 1, gateF, 1, gateF);
+                    gemm(cInternal, blobs[5], 1, gateI, 1, gateI);
+                    gemm(cInternal, blobs[6], 1, gateF, 1, gateF);
                    sigmoid(gatesIF, gatesIF);
                }
                else
@ -355,7 +361,7 @@ public:
                }
                if (usePeephole)
                {
-                    gemm(cInternal, blobs[5], 1, gateO, 1, gateO);
+                    gemm(cInternal, blobs[7], 1, gateO, 1, gateO);
                    sigmoid(gateO, gateO);
                }

--- a/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@ -1048,8 +1048,9 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
            Mat Wx = getBlob(node_proto, 1);
            Mat Wh = getBlob(node_proto, 2);
            Mat b = getBlob(node_proto, 3);
-            CV_CheckEQ(countNonZero(getBlob(node_proto, 5)), 0, "Unsupported non zero initial_h");
-            CV_CheckEQ(countNonZero(getBlob(node_proto, 6)), 0, "Unsupported non zero initial_c");
+            Mat h0 = getBlob(node_proto, 5);
+            Mat c0 = getBlob(node_proto, 6);
+
            b = b.reshape(1, b.size[0]);

            const int numHidden = lstmParams.get<int>("hidden_size");
@ -1082,11 +1083,15 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
            }
            Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]);
            Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]);
+            h0 = h0.reshape(1, h0.size[0] * h0.size[1]);
+            c0 = c0.reshape(1, c0.size[0] * c0.size[1]);

-            lstmParams.blobs.resize(3);
+            lstmParams.blobs.resize(5);
            lstmParams.blobs[0] = Wh;
            lstmParams.blobs[1] = Wx;
            lstmParams.blobs[2] = b;
+            lstmParams.blobs[3] = h0;
+            lstmParams.blobs[4] = c0;
            lstmParams.set("bidirectional", lstmParams.get<String>("direction", "") == "bidirectional");

            node_proto.set_output(0, lstmParams.name);  // set different name so output shapes will be registered on that name
--- a/modules/dnn/src/tensorflow/tf_importer.cpp
+++ b/modules/dnn/src/tensorflow/tf_importer.cpp
@ -406,12 +406,53 @@ void setKSize(LayerParams &layerParams, const tensorflow::NodeDef &layer)
    }
 }

-void setPadding(LayerParams &layerParams, const tensorflow::NodeDef &layer)
+void setPadMode(LayerParams &layerParams, const tensorflow::NodeDef &layer)
 {
    if (hasLayerAttr(layer, "padding"))
        layerParams.set("pad_mode", getLayerAttr(layer, "padding").s());
 }

+bool getExplicitPadding(LayerParams &layerParams, const tensorflow::NodeDef &layer, int64_t (&pads)[8])
+{
+    if (!layerParams.has("pad_mode") ||
+        layerParams.get("pad_mode").getStringValue() != "EXPLICIT")
+    {
+        return false;
+    }
+
+    CV_Assert(hasLayerAttr(layer, "explicit_paddings"));
+
+    const tensorflow::AttrValue& protoPads = getLayerAttr(layer, "explicit_paddings");
+    if (protoPads.list().i_size() != 8)
+    {
+        CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding configuration.");
+    }
+
+    int n = sizeof(pads) / sizeof(pads[0]);
+    for (int i = 0; i < n; ++i)
+    {
+        pads[i] = protoPads.list().i(i);
+    }
+
+    if (getDataLayout(layer) != DATA_LAYOUT_NCHW)
+    {
+        CV_LOG_DEBUG(NULL, "DNN/TF:     Data format " << getLayerAttr(layer, "data_format").s() << ", assuming NHWC.");
+        // Perhaps, we have NHWC padding dimensions order.
+        //  N    H    W    C
+        // 0 1  2 3  4 5  6 7
+        std::swap(pads[2], pads[6]);
+        std::swap(pads[3], pads[7]);
+        //  N    C    W    H
+        // 0 1  2 3  4 5  6 7
+        std::swap(pads[4], pads[6]);
+        std::swap(pads[5], pads[7]);
+        //  N    C    H    W
+        // 0 1  2 3  4 5  6 7
+    }
+
+    return true;
+}
+
 Pin parsePin(const std::string &name)
 {
    Pin pin(name);
@ -516,6 +557,7 @@ protected:

 private:
    void addPermuteLayer(const int* order, const std::string& permName, Pin& inpId);
+    void setPadding(LayerParams &layerParams, const tensorflow::NodeDef &layer, std::string& inputName, float value = 0.);

    friend class LayerHandler;
    typedef void (TFImporter::*TFImporterNodeParser)(tensorflow::GraphDef&, const tensorflow::NodeDef&, LayerParams&);
@ -558,6 +600,31 @@ private:
    void parseCustomLayer        (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams);
 };

+void TFImporter::setPadding(LayerParams &layerParams, const tensorflow::NodeDef &layer, std::string& inputName, float value)
+{
+    setPadMode(layerParams, layer);
+    int64_t pads[8];
+
+    if (!getExplicitPadding(layerParams, layer, pads))
+    {
+        return;
+    }
+
+    LayerParams padLp;
+    padLp.name = layer.name() + "/pad";
+    padLp.type = "Padding";
+    padLp.set("paddings", DictValue::arrayInt(pads, sizeof(pads) / sizeof(pads[0])));
+    padLp.set("value", value);
+
+    int id = dstNet.addLayer(padLp.name, padLp.type, padLp);
+    layer_id[padLp.name] = id;
+
+    connect(layer_id, dstNet, parsePin(inputName), id, 0);
+    inputName = padLp.name;
+
+    layerParams.set("pad_mode", "VALID");
+}
+
 class LayerHandler
 {
 public:
@ -808,7 +875,7 @@ void TFImporter::parseConvolution(tensorflow::GraphDef& net, const tensorflow::N

    setStrides(layerParams, layer);
    if (!layerParams.has("pad_w") && !layerParams.has("pad_h"))
-        setPadding(layerParams, layer);
+        setPadding(layerParams, layer, input);

    // The final node of dilated convolution subgraph.
    next_layers = getNextLayers(net, name, "BatchToSpaceND");
@ -1253,20 +1320,21 @@ void TFImporter::parseMaxPool(tensorflow::GraphDef& net, const tensorflow::NodeD
 {
    const std::string& name = layer.name();
    const int num_inputs = layer.input_size();
+    std::string inputName = layer.input(0);

    CV_CheckGT(num_inputs, 0, "");
    layerParams.set("pool", "max");

    setKSize(layerParams, layer);
    setStrides(layerParams, layer);
-    setPadding(layerParams, layer);
+    setPadding(layerParams, layer, inputName, -std::numeric_limits<float>::infinity());
    // Test_TensorFlow_nets.EAST_text_detection/1, NGRAPH/CPU
    layerParams.set("ceil_mode", false);

    int id = dstNet.addLayer(name, "Pooling", layerParams);
    layer_id[name] = id;

-    connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs);
+    connectToAllBlobs(layer_id, dstNet, parsePin(inputName), id, num_inputs);
 }

 void TFImporter::parseAvgPool(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams)
@ -1279,7 +1347,7 @@ void TFImporter::parseAvgPool(tensorflow::GraphDef& net, const tensorflow::NodeD
    layerParams.set("ave_pool_padded_area", false);
    setKSize(layerParams, layer);
    setStrides(layerParams, layer);
-    setPadding(layerParams, layer);
+    setPadMode(layerParams, layer);

    int id = dstNet.addLayer(name, "Pooling", layerParams);
    layer_id[name] = id;
@ -1694,7 +1762,7 @@ void TFImporter::parseConv2DBackpropInput(tensorflow::GraphDef& net, const tenso
    // input: "weights"
    // input: "input"

-    const std::string& name = layer.name();
+    std::string name = layer.name();
    const int num_inputs = layer.input_size();

    CV_CheckEQ(num_inputs, 3, "Expected output shape, weights and input nodes");
@ -1725,7 +1793,21 @@ void TFImporter::parseConv2DBackpropInput(tensorflow::GraphDef& net, const tenso
    layerParams.set("num_output", kshape[1]);

    setStrides(layerParams, layer);
-    setPadding(layerParams, layer);
+    setPadMode(layerParams, layer);
+    int64_t pads[8];
+    bool explicit_pads = getExplicitPadding(layerParams, layer, pads);
+    int64_t begs[4] = {};
+    int64_t ends[4] = {-1, -1, -1, -1};
+    if (explicit_pads)
+    {
+        name += "/deconv";
+        layerParams.set("pad_mode", "VALID");
+        for (int i = 2; i < 4; ++i) // begins=[0, 0, a, b], ends=[-1, -1, c, d]
+        {
+            begs[i] = pads[2*i];
+            ends[i] = -1 - pads[2*i + 1];
+        }
+    }

    // For convolution layer, output shape computes as
    // o = 1 + (i - k + 2*p) / s
@ -1742,8 +1824,9 @@ void TFImporter::parseConv2DBackpropInput(tensorflow::GraphDef& net, const tenso
    const int strideY = layerParams.get<int>("stride_h");
    const int strideX = layerParams.get<int>("stride_w");
    Mat outShape = getTensorContent(getConstBlob(layer, value_id, 0));
-    const int outH = outShape.at<int>(1);
-    const int outW = outShape.at<int>(2);
+    int shift = (getDataLayout(layer) == DATA_LAYOUT_NCHW);
+    const int outH = outShape.at<int>(1 + shift) + begs[2] - 1 - ends[2];
+    const int outW = outShape.at<int>(2 + shift) + begs[3] - 1 - ends[3];
    if (layerParams.get<String>("pad_mode") == "SAME")
    {
        layerParams.set("adj_w", (outW - 1) % strideX);
@ -1759,6 +1842,16 @@ void TFImporter::parseConv2DBackpropInput(tensorflow::GraphDef& net, const tenso

    // one input only
    connect(layer_id, dstNet, parsePin(layer.input(2)), id, 0);
+    if (explicit_pads) // If we have explicit paddings, remove extra data
+    {
+        layerParams.set("begin", DictValue::arrayInt(begs, sizeof(begs) / sizeof(begs[0])));
+        layerParams.set("end", DictValue::arrayInt(ends, sizeof(ends) / sizeof(ends[0])));
+
+        int id = dstNet.addLayer(layer.name(), "Slice", layerParams);
+        layer_id[layer.name()] = id;
+
+        connect(layer_id, dstNet, parsePin(name), id, 0);
+    }
 }

 void TFImporter::parseBlockLSTM(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams)
@ -1766,8 +1859,8 @@ void TFImporter::parseBlockLSTM(tensorflow::GraphDef& net, const tensorflow::Nod
    // op: "BlockLSTM"
    // input: "lstm_block_wrapper/ToInt64/x"  (ignore, number of time stamps)
    // input: "input"
-    // input: "lstm_block_wrapper/zeros"      (ignore)
-    // input: "lstm_block_wrapper/zeros"      (ignore)
+    // input: "lstm_block_wrapper/zeros"
+    // input: "lstm_block_wrapper/zeros"
    // input: "lstm_block_wrapper/kernel"
    // input: "lstm_block_wrapper/w_i_diag"
    // input: "lstm_block_wrapper/w_f_diag"
@ -1793,9 +1886,11 @@ void TFImporter::parseBlockLSTM(tensorflow::GraphDef& net, const tensorflow::Nod
        }
    }

-    Mat W, Wh, Wx, b;
+    Mat W, Wh, Wx, b, cs_prev, h_prev;
    blobFromTensor(getConstBlob(layer, value_id, 4), W);
    blobFromTensor(getConstBlob(layer, value_id, 8), b);
+    blobFromTensor(getConstBlob(layer, value_id, 2), cs_prev);
+    blobFromTensor(getConstBlob(layer, value_id, 3), h_prev);
    const int outSize = W.cols / 4;

    // IGFO->IFOG
@ -1811,10 +1906,12 @@ void TFImporter::parseBlockLSTM(tensorflow::GraphDef& net, const tensorflow::Nod
    Wx = W.rowRange(0, W.rows - outSize).t();
    Wh = W.rowRange(W.rows - outSize, W.rows).t();

-    layerParams.blobs.resize(3);
+    layerParams.blobs.resize(5);
    layerParams.blobs[0] = Wh;
    layerParams.blobs[1] = Wx;
    layerParams.blobs[2] = b;
+    layerParams.blobs[3] = h_prev;
+    layerParams.blobs[4] = cs_prev;

    if (hasLayerAttr(layer, "use_peephole"))
    {
@ -1822,14 +1919,14 @@ void TFImporter::parseBlockLSTM(tensorflow::GraphDef& net, const tensorflow::Nod
        if (usePeephole)
        {
            layerParams.set("use_peephole", true);
-            layerParams.blobs.resize(6);
+            layerParams.blobs.resize(8);
            for (int i = 0; i < 3; ++i)
            {
                Mat w;
                blobFromTensor(getConstBlob(layer, value_id, 5 + i), w);
                w = w.reshape(1, w.total());  // Single column.
                w = Mat::diag(w);  // Make a diagonal matrix.
-                layerParams.blobs[3 + i] = w;
+                layerParams.blobs[5 + i] = w;
            }
        }
    }
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@ -445,7 +445,7 @@ class Layer_LSTM_Test : public ::testing::Test
 {
 public:
    int numInp, numOut;
-    Mat Wh, Wx, b;
+    Mat Wh, Wx, b, h, c;
    Ptr<LSTMLayer> layer;
    std::vector<Mat> inputs, outputs;

@ -460,12 +460,17 @@ public:
        Wh = Mat::ones(4 * numOut, numOut, CV_32F);
        Wx = Mat::ones(4 * numOut, numInp, CV_32F);
        b  = Mat::ones(4 * numOut, 1, CV_32F);
+        h  = Mat::ones(4, numOut, CV_32F);
+        c  = Mat::ones(4, numOut, CV_32F);

        LayerParams lp;
-        lp.blobs.resize(3);
+        lp.blobs.resize(5);
        lp.blobs[0] = Wh;
        lp.blobs[1] = Wx;
        lp.blobs[2] = b;
+        lp.blobs[3] = h;
+        lp.blobs[4] = c;
+
        lp.set<bool>("produce_cell_output", produceCellOutput);
        lp.set<bool>("use_timestamp_dim", useTimestampDim);

@ -513,10 +518,12 @@ TEST_F(Layer_LSTM_Test, get_set_test)
 TEST(Layer_LSTM_Test_Accuracy_with_, CaffeRecurrent)
 {
    LayerParams lp;
-    lp.blobs.resize(3);
+    lp.blobs.resize(5);
    lp.blobs[0] = blobFromNPY(_tf("lstm.prototxt.w_2.npy"));  // Wh
    lp.blobs[1] = blobFromNPY(_tf("lstm.prototxt.w_0.npy"));  // Wx
    lp.blobs[2] = blobFromNPY(_tf("lstm.prototxt.w_1.npy"));  // bias
+    lp.blobs[3] = Mat::zeros(2, 17, CV_32F);                     // h_0
+    lp.blobs[4] = Mat::zeros(2, 17, CV_32F);                     // c_0
    Ptr<LSTMLayer> layer = LSTMLayer::create(lp);

    Mat inp = blobFromNPY(_tf("recurrent.input.npy"));
@ -527,6 +534,68 @@ TEST(Layer_LSTM_Test_Accuracy_with_, CaffeRecurrent)
    normAssert(h_t_reference, outputs[0]);
 }

+TEST(Layer_LSTM_Test_Accuracy_with_, HiddenParams)
+{
+    Mat Wx = blobFromNPY(_tf("lstm.hidden.W.npy"));
+    Mat Wh = blobFromNPY(_tf("lstm.hidden.R.npy"));
+    Mat b = blobFromNPY(_tf("lstm.hidden.B.npy"));
+    Mat h0 = blobFromNPY(_tf("lstm.hidden.h0.npy"));
+    Mat c0 = blobFromNPY(_tf("lstm.hidden.c0.npy"));
+
+    const int numHidden = 3;
+    const int numDirs = Wx.size[0];
+    const int numFeatures = Wx.size[2];
+
+    b = b.reshape(1, b.size[0]);
+    Mat bx = b.colRange(0, b.cols / 2);
+    Mat bh = b.colRange(b.cols / 2, b.cols);
+    b = bx + bh;
+
+    // IFGO->IGFO
+    for (int k = 0; k < numDirs; ++k)
+    {
+        float* WxData = Wx.ptr<float>(k);
+        float* WhData = Wh.ptr<float>(k);
+        float* biasData = b.ptr<float>(k);
+        for (int j = 0; j < numHidden; ++j)
+        {
+            for (int i = 0; i < numFeatures; ++i)
+            {
+                std::swap(WxData[(numHidden + j) * numFeatures + i],
+                          WxData[(numHidden * 2 + j) * numFeatures + i]);
+            }
+            for (int i = 0; i < numHidden; ++i)
+            {
+                std::swap(WhData[(numHidden + j) * numHidden + i],
+                          WhData[(numHidden * 2 + j) * numHidden + i]);
+            }
+            std::swap(biasData[numHidden + j], biasData[numHidden * 2 + j]);
+        }
+    }
+
+    Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]);
+    Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]);
+    h0 = h0.reshape(1, h0.size[0] * h0.size[1]);
+    c0 = c0.reshape(1, c0.size[0] * c0.size[1]);
+
+    LayerParams lstmParams;
+    lstmParams.blobs.resize(5);
+    lstmParams.blobs[0] = Wh;
+    lstmParams.blobs[1] = Wx;
+    lstmParams.blobs[2] = b;
+    lstmParams.blobs[3] = h0;
+    lstmParams.blobs[4] = c0;
+    lstmParams.set("bidirectional", false);
+    Ptr<LSTMLayer> layer = LSTMLayer::create(lstmParams);
+
+    Mat inp = blobFromNPY(_tf("lstm.hidden.input.npy"));
+    std::vector<Mat> inputs(1, inp), outputs;
+    runLayer(layer, inputs, outputs);
+
+    Mat h_t_reference = blobFromNPY(_tf("lstm.hidden.output.npy"));
+    normAssert(h_t_reference, outputs[0]);
+}
+
 TEST(Layer_RNN_Test_Accuracy_with_, CaffeRecurrent)
 {
    Ptr<RNNLayer> layer = RNNLayer::create(LayerParams());
@ -571,6 +640,9 @@ TEST(Layer_LSTM_Test_Accuracy_, Reverse)
    bias.at<float>(2, 0) = 1e10f;  // Output gate - always output everything
    bias.at<float>(3, 0) = 0.f;  // Update signal

+    cv::Mat hInternal = cv::Mat::zeros(1, 1, CV_32FC1);
+    cv::Mat cInternal = cv::Mat::zeros(1, 1, CV_32FC1);
+
    LayerParams lp;
    lp.set("reverse", true);
    lp.set("use_timestamp_dim", true);
@ -578,6 +650,8 @@ TEST(Layer_LSTM_Test_Accuracy_, Reverse)
    lp.blobs.push_back(Wh);
    lp.blobs.push_back(Wx);
    lp.blobs.push_back(bias);
+    lp.blobs.push_back(hInternal);
+    lp.blobs.push_back(cInternal);

    cv::Ptr<cv::dnn::LSTMLayer> layer = LSTMLayer::create(lp);
    std::vector<cv::Mat> outputs;
--- a/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@ -710,6 +710,16 @@ TEST_P(Test_ONNX_layers, LSTM_bidirectional)
    testONNXModels("lstm_bidirectional", npy, 0, 0, false, false);
 }

+TEST_P(Test_ONNX_layers, LSTM_hidden)
+{
+    testONNXModels("hidden_lstm", npy, 0, 0, false, false);
+}
+
+TEST_P(Test_ONNX_layers, LSTM_hidden_bidirectional)
+{
+    testONNXModels("hidden_lstm_bi", npy, 0, 0, false, false);
+}
+
 TEST_P(Test_ONNX_layers, Pad2d_Unfused)
 {
    testONNXModels("ReflectionPad2d");
--- a/modules/dnn/test/test_tf_importer.cpp
+++ b/modules/dnn/test/test_tf_importer.cpp
@ -203,6 +203,16 @@ TEST_P(Test_TensorFlow_layers, padding)
    runTensorFlowNet("keras_pad_concat");
 }

+TEST_P(Test_TensorFlow_layers, padding_asymmetric)
+{
+    runTensorFlowNet("conv2d_asymmetric_pads_nchw");
+    runTensorFlowNet("conv2d_asymmetric_pads_nhwc");
+    runTensorFlowNet("max_pool2d_asymmetric_pads_nchw");
+    runTensorFlowNet("max_pool2d_asymmetric_pads_nhwc");
+    runTensorFlowNet("conv2d_backprop_input_asymmetric_pads_nchw");
+    runTensorFlowNet("conv2d_backprop_input_asymmetric_pads_nhwc");
+}
+
 TEST_P(Test_TensorFlow_layers, padding_same)
 {
    // Reference output values are in range [0.0006, 2.798]
--- a/modules/js/src/make_umd.py
+++ b/modules/js/src/make_umd.py
@ -95,7 +95,7 @@ def make_umd(opencvjs, cvjs):
    root.cv = factory();
  } else if (typeof importScripts === 'function') {
    // Web worker
-    root.cv = factory;
+    root.cv = factory();
  } else {
    // Other shells, e.g. d8
    root.cv = factory();