diff --git a/cmake/OpenCVUtils.cmake b/cmake/OpenCVUtils.cmake
index da0ee3b36b..932eb039b1 100644
--- a/cmake/OpenCVUtils.cmake
+++ b/cmake/OpenCVUtils.cmake
@@ -1481,8 +1481,8 @@ function(ocv_target_link_libraries target)
       if(NOT LINK_PENDING STREQUAL "")
         __ocv_push_target_link_libraries(${LINK_MODE} ${LINK_PENDING})
         set(LINK_PENDING "")
-        set(LINK_MODE "${dep}")
       endif()
+      set(LINK_MODE "${dep}")
     else()
       if(BUILD_opencv_world)
         if(OPENCV_MODULE_${dep}_IS_PART_OF_WORLD)
diff --git a/doc/py_tutorials/py_imgproc/py_canny/py_canny.markdown b/doc/py_tutorials/py_imgproc/py_canny/py_canny.markdown
index cbc2a72eec..d36e5784eb 100644
--- a/doc/py_tutorials/py_imgproc/py_canny/py_canny.markdown
+++ b/doc/py_tutorials/py_imgproc/py_canny/py_canny.markdown
@@ -74,7 +74,7 @@ Canny Edge Detection in OpenCV
 
 OpenCV puts all the above in single function, **cv.Canny()**. We will see how to use it. First
 argument is our input image. Second and third arguments are our minVal and maxVal respectively.
-Third argument is aperture_size. It is the size of Sobel kernel used for find image gradients. By
+Fourth argument is aperture_size. It is the size of Sobel kernel used for find image gradients. By
 default it is 3. Last argument is L2gradient which specifies the equation for finding gradient
 magnitude. If it is True, it uses the equation mentioned above which is more accurate, otherwise it
 uses this function: \f$Edge\_Gradient \; (G) = |G_x| + |G_y|\f$. By default, it is False.
diff --git a/doc/tutorials/core/mat_the_basic_image_container/mat_the_basic_image_container.markdown b/doc/tutorials/core/mat_the_basic_image_container/mat_the_basic_image_container.markdown
index 4f6f2b8a88..c53296b3bf 100644
--- a/doc/tutorials/core/mat_the_basic_image_container/mat_the_basic_image_container.markdown
+++ b/doc/tutorials/core/mat_the_basic_image_container/mat_the_basic_image_container.markdown
@@ -91,8 +91,8 @@ a new header with the new boundaries:
 Mat D (A, Rect(10, 10, 100, 100) ); // using a rectangle
 Mat E = A(Range::all(), Range(1,3)); // using row and column boundaries
 @endcode
-Now you may ask -- if the matrix itself may belong to multiple *Mat* objects who takes responsibility
-for cleaning it up when it's no longer needed. The short answer is: the last object that used it.
+Now you may ask -- if the matrix itself may belong to multiple *Mat* objects, who takes responsibility
+for cleaning it up when it's no longer needed? The short answer is: the last object that used it.
 This is handled by using a reference counting mechanism. Whenever somebody copies a header of a
 *Mat* object, a counter is increased for the matrix. Whenever a header is cleaned, this counter
 is decreased. When the counter reaches zero the matrix is freed. Sometimes you will want to copy
@@ -102,12 +102,12 @@ Mat F = A.clone();
 Mat G;
 A.copyTo(G);
 @endcode
-Now modifying *F* or *G* will not affect the matrix pointed by the *A*'s header. What you need to
+Now modifying *F* or *G* will not affect the matrix pointed to by the *A*'s header. What you need to
 remember from all this is that:
 
 -   Output image allocation for OpenCV functions is automatic (unless specified otherwise).
 -   You do not need to think about memory management with OpenCV's C++ interface.
--   The assignment operator and the copy constructor only copies the header.
+-   The assignment operator and the copy constructor only copy the header.
 -   The underlying matrix of an image may be copied using the @ref cv::Mat::clone() and @ref cv::Mat::copyTo()
     functions.
 
@@ -122,10 +122,10 @@ of these allows us to create many shades of gray.
 For *colorful* ways we have a lot more methods to choose from. Each of them breaks it down to three
 or four basic components and we can use the combination of these to create the others. The most
 popular one is RGB, mainly because this is also how our eye builds up colors. Its base colors are
-red, green and blue. To code the transparency of a color sometimes a fourth element: alpha (A) is
+red, green and blue. To code the transparency of a color sometimes a fourth element, alpha (A), is
 added.
 
-There are, however, many other color systems each with their own advantages:
+There are, however, many other color systems, each with their own advantages:
 
 -   RGB is the most common as our eyes use something similar, however keep in mind that OpenCV standard display
     system composes colors using the BGR color space (red and blue channels are swapped places).
@@ -139,11 +139,11 @@ There are, however, many other color systems each with their own advantages:
 Each of the building components has its own valid domains. This leads to the data type used. How
 we store a component defines the control we have over its domain. The smallest data type possible is
 *char*, which means one byte or 8 bits. This may be unsigned (so can store values from 0 to 255) or
-signed (values from -127 to +127). Although in case of three components this already gives 16
-million possible colors to represent (like in case of RGB) we may acquire an even finer control by
+signed (values from -127 to +127). Although this width, in the case of three components (like RGB), already gives 16
+million possible colors to represent, we may acquire an even finer control by
 using the float (4 byte = 32 bit) or double (8 byte = 64 bit) data types for each component.
 Nevertheless, remember that increasing the size of a component also increases the size of the whole
-picture in the memory.
+picture in memory.
 
 Creating a Mat object explicitly
 ----------------------------------
diff --git a/modules/dnn/src/layers/recurrent_layers.cpp b/modules/dnn/src/layers/recurrent_layers.cpp
index bdc46643fc..feae35dac0 100644
--- a/modules/dnn/src/layers/recurrent_layers.cpp
+++ b/modules/dnn/src/layers/recurrent_layers.cpp
@@ -112,19 +112,24 @@ public:
             const Mat& Wh = blobs[0];
             const Mat& Wx = blobs[1];
             const Mat& bias = blobs[2];
+            const Mat& hInternal = blobs[3];
+            const Mat& cInternal = blobs[4];
             CV_CheckEQ(Wh.dims, 2, "");
             CV_CheckEQ(Wx.dims, 2, "");
             CV_CheckEQ(Wh.rows, Wx.rows, "");
             CV_CheckEQ(Wh.rows, (1 + static_cast<int>(bidirectional))*4*Wh.cols, "");
             CV_CheckEQ(Wh.rows, (int)bias.total(), "");
+            CV_CheckEQ(hInternal.cols, Wh.cols, "");
+            CV_CheckEQ(hInternal.cols, cInternal.cols, "");
+            CV_CheckEQ(hInternal.rows, cInternal.rows, "");
             CV_Assert(Wh.type() == Wx.type() && Wx.type() == bias.type());
 
             // Peephole weights.
-            if (blobs.size() > 3)
+            if (blobs.size() > 5)
             {
-                CV_Assert(blobs.size() == 6);
+                CV_Assert(blobs.size() == 8);
                 const int N = Wh.cols;
-                for (int i = 3; i < 6; ++i)
+                for (int i = 5; i < 8; ++i)
                 {
                     CV_Assert(blobs[i].rows == N && blobs[i].cols == N);
                     CV_Assert(blobs[i].type() == bias.type());
@@ -181,7 +186,7 @@ public:
                          std::vector<MatShape> &outputs,
                          std::vector<MatShape> &internals) const CV_OVERRIDE
     {
-        CV_Assert((!usePeephole && blobs.size() == 3) || (usePeephole && blobs.size() == 6));
+        CV_Assert((!usePeephole && blobs.size() == 5) || (usePeephole && blobs.size() == 8));
         CV_Assert(inputs.size() == 1);
         const MatShape& inp0 = inputs[0];
 
@@ -228,7 +233,7 @@ public:
         std::vector<Mat> input;
         inputs_arr.getMatVector(input);
 
-        CV_Assert((!usePeephole && blobs.size() == 3) || (usePeephole && blobs.size() == 6));
+        CV_Assert((!usePeephole && blobs.size() == 5) || (usePeephole && blobs.size() == 8));
         CV_Assert(input.size() == 1);
         const Mat& inp0 = input[0];
 
@@ -284,13 +289,14 @@ public:
             const Mat &Wh = blobs[0].rowRange(i * blobs[0].rows / numDirs, (i + 1) * blobs[0].rows / numDirs);
             const Mat &Wx = blobs[1].rowRange(i * blobs[1].rows / numDirs, (i + 1) * blobs[1].rows / numDirs);
             const Mat &bias = blobs[2].colRange(i * blobs[2].cols / numDirs, (i + 1) * blobs[2].cols / numDirs);
+            const Mat &h_0 = blobs[3].rowRange(i * blobs[3].rows / numDirs, (i + 1) * blobs[3].rows / numDirs);
+            const Mat &c_0 = blobs[4].rowRange(i * blobs[4].rows / numDirs, (i + 1) * blobs[4].rows / numDirs);
 
             int numOut = Wh.size[1];
-
             Mat hInternal = internals[0], cInternal = internals[1],
                     dummyOnes = internals[2], gates = internals[3];
-            hInternal.setTo(0.);
-            cInternal.setTo(0.);
+            h_0.copyTo(hInternal);
+            c_0.copyTo(cInternal);
             dummyOnes.setTo(1.);
 
             int numSamplesTotal = numTimeStamps*numSamples;
@@ -331,8 +337,8 @@ public:
                 if (usePeephole)
                 {
                     Mat gatesIF = gates.colRange(0, 2*numOut);
-                    gemm(cInternal, blobs[3], 1, gateI, 1, gateI);
-                    gemm(cInternal, blobs[4], 1, gateF, 1, gateF);
+                    gemm(cInternal, blobs[5], 1, gateI, 1, gateI);
+                    gemm(cInternal, blobs[6], 1, gateF, 1, gateF);
                     sigmoid(gatesIF, gatesIF);
                 }
                 else
@@ -355,7 +361,7 @@ public:
                 }
                 if (usePeephole)
                 {
-                    gemm(cInternal, blobs[5], 1, gateO, 1, gateO);
+                    gemm(cInternal, blobs[7], 1, gateO, 1, gateO);
                     sigmoid(gateO, gateO);
                 }
 
diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp
index 94acd8f7fd..d33fb68ac1 100644
--- a/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@@ -1048,8 +1048,9 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
             Mat Wx = getBlob(node_proto, 1);
             Mat Wh = getBlob(node_proto, 2);
             Mat b = getBlob(node_proto, 3);
-            CV_CheckEQ(countNonZero(getBlob(node_proto, 5)), 0, "Unsupported non zero initial_h");
-            CV_CheckEQ(countNonZero(getBlob(node_proto, 6)), 0, "Unsupported non zero initial_c");
+            Mat h0 = getBlob(node_proto, 5);
+            Mat c0 = getBlob(node_proto, 6);
+
             b = b.reshape(1, b.size[0]);
 
             const int numHidden = lstmParams.get<int>("hidden_size");
@@ -1082,11 +1083,15 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
             }
             Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]);
             Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]);
+            h0 = h0.reshape(1, h0.size[0] * h0.size[1]);
+            c0 = c0.reshape(1, c0.size[0] * c0.size[1]);
 
-            lstmParams.blobs.resize(3);
+            lstmParams.blobs.resize(5);
             lstmParams.blobs[0] = Wh;
             lstmParams.blobs[1] = Wx;
             lstmParams.blobs[2] = b;
+            lstmParams.blobs[3] = h0;
+            lstmParams.blobs[4] = c0;
             lstmParams.set("bidirectional", lstmParams.get<String>("direction", "") == "bidirectional");
 
             node_proto.set_output(0, lstmParams.name);  // set different name so output shapes will be registered on that name
diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp
index 10670bfef9..4bd09adda0 100644
--- a/modules/dnn/src/tensorflow/tf_importer.cpp
+++ b/modules/dnn/src/tensorflow/tf_importer.cpp
@@ -406,12 +406,53 @@ void setKSize(LayerParams &layerParams, const tensorflow::NodeDef &layer)
     }
 }
 
-void setPadding(LayerParams &layerParams, const tensorflow::NodeDef &layer)
+void setPadMode(LayerParams &layerParams, const tensorflow::NodeDef &layer)
 {
     if (hasLayerAttr(layer, "padding"))
         layerParams.set("pad_mode", getLayerAttr(layer, "padding").s());
 }
 
+bool getExplicitPadding(LayerParams &layerParams, const tensorflow::NodeDef &layer, int64_t (&pads)[8])
+{
+    if (!layerParams.has("pad_mode") ||
+        layerParams.get("pad_mode").getStringValue() != "EXPLICIT")
+    {
+        return false;
+    }
+
+    CV_Assert(hasLayerAttr(layer, "explicit_paddings"));
+
+    const tensorflow::AttrValue& protoPads = getLayerAttr(layer, "explicit_paddings");
+    if (protoPads.list().i_size() != 8)
+    {
+        CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding configuration.");
+    }
+
+    int n = sizeof(pads) / sizeof(pads[0]);
+    for (int i = 0; i < n; ++i)
+    {
+        pads[i] = protoPads.list().i(i);
+    }
+
+    if (getDataLayout(layer) != DATA_LAYOUT_NCHW)
+    {
+        CV_LOG_DEBUG(NULL, "DNN/TF:     Data format " << getLayerAttr(layer, "data_format").s() << ", assuming NHWC.");
+        // Perhaps, we have NHWC padding dimensions order.
+        //  N    H    W    C
+        // 0 1  2 3  4 5  6 7
+        std::swap(pads[2], pads[6]);
+        std::swap(pads[3], pads[7]);
+        //  N    C    W    H
+        // 0 1  2 3  4 5  6 7
+        std::swap(pads[4], pads[6]);
+        std::swap(pads[5], pads[7]);
+        //  N    C    H    W
+        // 0 1  2 3  4 5  6 7
+    }
+
+    return true;
+}
+
 Pin parsePin(const std::string &name)
 {
     Pin pin(name);
@@ -516,6 +557,7 @@ protected:
 
 private:
     void addPermuteLayer(const int* order, const std::string& permName, Pin& inpId);
+    void setPadding(LayerParams &layerParams, const tensorflow::NodeDef &layer, std::string& inputName, float value = 0.);
 
     friend class LayerHandler;
     typedef void (TFImporter::*TFImporterNodeParser)(tensorflow::GraphDef&, const tensorflow::NodeDef&, LayerParams&);
@@ -558,6 +600,31 @@ private:
     void parseCustomLayer        (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams);
 };
 
+void TFImporter::setPadding(LayerParams &layerParams, const tensorflow::NodeDef &layer, std::string& inputName, float value)
+{
+    setPadMode(layerParams, layer);
+    int64_t pads[8];
+
+    if (!getExplicitPadding(layerParams, layer, pads))
+    {
+        return;
+    }
+
+    LayerParams padLp;
+    padLp.name = layer.name() + "/pad";
+    padLp.type = "Padding";
+    padLp.set("paddings", DictValue::arrayInt(pads, sizeof(pads) / sizeof(pads[0])));
+    padLp.set("value", value);
+
+    int id = dstNet.addLayer(padLp.name, padLp.type, padLp);
+    layer_id[padLp.name] = id;
+
+    connect(layer_id, dstNet, parsePin(inputName), id, 0);
+    inputName = padLp.name;
+
+    layerParams.set("pad_mode", "VALID");
+}
+
 class LayerHandler
 {
 public:
@@ -808,7 +875,7 @@ void TFImporter::parseConvolution(tensorflow::GraphDef& net, const tensorflow::N
 
     setStrides(layerParams, layer);
     if (!layerParams.has("pad_w") && !layerParams.has("pad_h"))
-        setPadding(layerParams, layer);
+        setPadding(layerParams, layer, input);
 
     // The final node of dilated convolution subgraph.
     next_layers = getNextLayers(net, name, "BatchToSpaceND");
@@ -1253,20 +1320,21 @@ void TFImporter::parseMaxPool(tensorflow::GraphDef& net, const tensorflow::NodeD
 {
     const std::string& name = layer.name();
     const int num_inputs = layer.input_size();
+    std::string inputName = layer.input(0);
 
     CV_CheckGT(num_inputs, 0, "");
     layerParams.set("pool", "max");
 
     setKSize(layerParams, layer);
     setStrides(layerParams, layer);
-    setPadding(layerParams, layer);
+    setPadding(layerParams, layer, inputName, -std::numeric_limits<float>::infinity());
     // Test_TensorFlow_nets.EAST_text_detection/1, NGRAPH/CPU
     layerParams.set("ceil_mode", false);
 
     int id = dstNet.addLayer(name, "Pooling", layerParams);
     layer_id[name] = id;
 
-    connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs);
+    connectToAllBlobs(layer_id, dstNet, parsePin(inputName), id, num_inputs);
 }
 
 void TFImporter::parseAvgPool(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams)
@@ -1279,7 +1347,7 @@ void TFImporter::parseAvgPool(tensorflow::GraphDef& net, const tensorflow::NodeD
     layerParams.set("ave_pool_padded_area", false);
     setKSize(layerParams, layer);
     setStrides(layerParams, layer);
-    setPadding(layerParams, layer);
+    setPadMode(layerParams, layer);
 
     int id = dstNet.addLayer(name, "Pooling", layerParams);
     layer_id[name] = id;
@@ -1694,7 +1762,7 @@ void TFImporter::parseConv2DBackpropInput(tensorflow::GraphDef& net, const tenso
     // input: "weights"
     // input: "input"
 
-    const std::string& name = layer.name();
+    std::string name = layer.name();
     const int num_inputs = layer.input_size();
 
     CV_CheckEQ(num_inputs, 3, "Expected output shape, weights and input nodes");
@@ -1725,7 +1793,21 @@ void TFImporter::parseConv2DBackpropInput(tensorflow::GraphDef& net, const tenso
     layerParams.set("num_output", kshape[1]);
 
     setStrides(layerParams, layer);
-    setPadding(layerParams, layer);
+    setPadMode(layerParams, layer);
+    int64_t pads[8];
+    bool explicit_pads = getExplicitPadding(layerParams, layer, pads);
+    int64_t begs[4] = {};
+    int64_t ends[4] = {-1, -1, -1, -1};
+    if (explicit_pads)
+    {
+        name += "/deconv";
+        layerParams.set("pad_mode", "VALID");
+        for (int i = 2; i < 4; ++i) // begins=[0, 0, a, b], ends=[-1, -1, c, d]
+        {
+            begs[i] = pads[2*i];
+            ends[i] = -1 - pads[2*i + 1];
+        }
+    }
 
     // For convolution layer, output shape computes as
     // o = 1 + (i - k + 2*p) / s
@@ -1742,8 +1824,9 @@ void TFImporter::parseConv2DBackpropInput(tensorflow::GraphDef& net, const tenso
     const int strideY = layerParams.get<int>("stride_h");
     const int strideX = layerParams.get<int>("stride_w");
     Mat outShape = getTensorContent(getConstBlob(layer, value_id, 0));
-    const int outH = outShape.at<int>(1);
-    const int outW = outShape.at<int>(2);
+    int shift = (getDataLayout(layer) == DATA_LAYOUT_NCHW);
+    const int outH = outShape.at<int>(1 + shift) + begs[2] - 1 - ends[2];
+    const int outW = outShape.at<int>(2 + shift) + begs[3] - 1 - ends[3];
     if (layerParams.get<String>("pad_mode") == "SAME")
     {
         layerParams.set("adj_w", (outW - 1) % strideX);
@@ -1759,6 +1842,16 @@ void TFImporter::parseConv2DBackpropInput(tensorflow::GraphDef& net, const tenso
 
     // one input only
     connect(layer_id, dstNet, parsePin(layer.input(2)), id, 0);
+    if (explicit_pads) // If we have explicit paddings, remove extra data
+    {
+        layerParams.set("begin", DictValue::arrayInt(begs, sizeof(begs) / sizeof(begs[0])));
+        layerParams.set("end", DictValue::arrayInt(ends, sizeof(ends) / sizeof(ends[0])));
+
+        int id = dstNet.addLayer(layer.name(), "Slice", layerParams);
+        layer_id[layer.name()] = id;
+
+        connect(layer_id, dstNet, parsePin(name), id, 0);
+    }
 }
 
 void TFImporter::parseBlockLSTM(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams)
@@ -1766,8 +1859,8 @@ void TFImporter::parseBlockLSTM(tensorflow::GraphDef& net, const tensorflow::Nod
     // op: "BlockLSTM"
     // input: "lstm_block_wrapper/ToInt64/x"  (ignore, number of time stamps)
     // input: "input"
-    // input: "lstm_block_wrapper/zeros"      (ignore)
-    // input: "lstm_block_wrapper/zeros"      (ignore)
+    // input: "lstm_block_wrapper/zeros"
+    // input: "lstm_block_wrapper/zeros"
     // input: "lstm_block_wrapper/kernel"
     // input: "lstm_block_wrapper/w_i_diag"
     // input: "lstm_block_wrapper/w_f_diag"
@@ -1793,9 +1886,11 @@ void TFImporter::parseBlockLSTM(tensorflow::GraphDef& net, const tensorflow::Nod
         }
     }
 
-    Mat W, Wh, Wx, b;
+    Mat W, Wh, Wx, b, cs_prev, h_prev;
     blobFromTensor(getConstBlob(layer, value_id, 4), W);
     blobFromTensor(getConstBlob(layer, value_id, 8), b);
+    blobFromTensor(getConstBlob(layer, value_id, 2), cs_prev);
+    blobFromTensor(getConstBlob(layer, value_id, 3), h_prev);
     const int outSize = W.cols / 4;
 
     // IGFO->IFOG
@@ -1811,10 +1906,12 @@ void TFImporter::parseBlockLSTM(tensorflow::GraphDef& net, const tensorflow::Nod
     Wx = W.rowRange(0, W.rows - outSize).t();
     Wh = W.rowRange(W.rows - outSize, W.rows).t();
 
-    layerParams.blobs.resize(3);
+    layerParams.blobs.resize(5);
     layerParams.blobs[0] = Wh;
     layerParams.blobs[1] = Wx;
     layerParams.blobs[2] = b;
+    layerParams.blobs[3] = h_prev;
+    layerParams.blobs[4] = cs_prev;
 
     if (hasLayerAttr(layer, "use_peephole"))
     {
@@ -1822,14 +1919,14 @@ void TFImporter::parseBlockLSTM(tensorflow::GraphDef& net, const tensorflow::Nod
         if (usePeephole)
         {
             layerParams.set("use_peephole", true);
-            layerParams.blobs.resize(6);
+            layerParams.blobs.resize(8);
             for (int i = 0; i < 3; ++i)
             {
                 Mat w;
                 blobFromTensor(getConstBlob(layer, value_id, 5 + i), w);
                 w = w.reshape(1, w.total());  // Single column.
                 w = Mat::diag(w);  // Make a diagonal matrix.
-                layerParams.blobs[3 + i] = w;
+                layerParams.blobs[5 + i] = w;
             }
         }
     }
diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp
index 20d3fb41eb..1383c59e28 100644
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@@ -445,7 +445,7 @@ class Layer_LSTM_Test : public ::testing::Test
 {
 public:
     int numInp, numOut;
-    Mat Wh, Wx, b;
+    Mat Wh, Wx, b, h, c;
     Ptr<LSTMLayer> layer;
     std::vector<Mat> inputs, outputs;
 
@@ -460,12 +460,17 @@ public:
         Wh = Mat::ones(4 * numOut, numOut, CV_32F);
         Wx = Mat::ones(4 * numOut, numInp, CV_32F);
         b  = Mat::ones(4 * numOut, 1, CV_32F);
+        h  = Mat::ones(4, numOut, CV_32F);
+        c  = Mat::ones(4, numOut, CV_32F);
 
         LayerParams lp;
-        lp.blobs.resize(3);
+        lp.blobs.resize(5);
         lp.blobs[0] = Wh;
         lp.blobs[1] = Wx;
         lp.blobs[2] = b;
+        lp.blobs[3] = h;
+        lp.blobs[4] = c;
+
         lp.set<bool>("produce_cell_output", produceCellOutput);
         lp.set<bool>("use_timestamp_dim", useTimestampDim);
 
@@ -513,10 +518,12 @@ TEST_F(Layer_LSTM_Test, get_set_test)
 TEST(Layer_LSTM_Test_Accuracy_with_, CaffeRecurrent)
 {
     LayerParams lp;
-    lp.blobs.resize(3);
+    lp.blobs.resize(5);
     lp.blobs[0] = blobFromNPY(_tf("lstm.prototxt.w_2.npy"));  // Wh
     lp.blobs[1] = blobFromNPY(_tf("lstm.prototxt.w_0.npy"));  // Wx
     lp.blobs[2] = blobFromNPY(_tf("lstm.prototxt.w_1.npy"));  // bias
+    lp.blobs[3] = Mat::zeros(2, 17, CV_32F);                     // h_0
+    lp.blobs[4] = Mat::zeros(2, 17, CV_32F);                     // c_0
     Ptr<LSTMLayer> layer = LSTMLayer::create(lp);
 
     Mat inp = blobFromNPY(_tf("recurrent.input.npy"));
@@ -527,6 +534,68 @@ TEST(Layer_LSTM_Test_Accuracy_with_, CaffeRecurrent)
     normAssert(h_t_reference, outputs[0]);
 }
 
+TEST(Layer_LSTM_Test_Accuracy_with_, HiddenParams)
+{
+    Mat Wx = blobFromNPY(_tf("lstm.hidden.W.npy"));
+    Mat Wh = blobFromNPY(_tf("lstm.hidden.R.npy"));
+    Mat b = blobFromNPY(_tf("lstm.hidden.B.npy"));
+    Mat h0 = blobFromNPY(_tf("lstm.hidden.h0.npy"));
+    Mat c0 = blobFromNPY(_tf("lstm.hidden.c0.npy"));
+
+    const int numHidden = 3;
+    const int numDirs = Wx.size[0];
+    const int numFeatures = Wx.size[2];
+
+    b = b.reshape(1, b.size[0]);
+    Mat bx = b.colRange(0, b.cols / 2);
+    Mat bh = b.colRange(b.cols / 2, b.cols);
+    b = bx + bh;
+
+    // IFGO->IGFO
+    for (int k = 0; k < numDirs; ++k)
+    {
+        float* WxData = Wx.ptr<float>(k);
+        float* WhData = Wh.ptr<float>(k);
+        float* biasData = b.ptr<float>(k);
+        for (int j = 0; j < numHidden; ++j)
+        {
+            for (int i = 0; i < numFeatures; ++i)
+            {
+                std::swap(WxData[(numHidden + j) * numFeatures + i],
+                          WxData[(numHidden * 2 + j) * numFeatures + i]);
+            }
+            for (int i = 0; i < numHidden; ++i)
+            {
+                std::swap(WhData[(numHidden + j) * numHidden + i],
+                          WhData[(numHidden * 2 + j) * numHidden + i]);
+            }
+            std::swap(biasData[numHidden + j], biasData[numHidden * 2 + j]);
+        }
+    }
+
+    Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]);
+    Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]);
+    h0 = h0.reshape(1, h0.size[0] * h0.size[1]);
+    c0 = c0.reshape(1, c0.size[0] * c0.size[1]);
+
+    LayerParams lstmParams;
+    lstmParams.blobs.resize(5);
+    lstmParams.blobs[0] = Wh;
+    lstmParams.blobs[1] = Wx;
+    lstmParams.blobs[2] = b;
+    lstmParams.blobs[3] = h0;
+    lstmParams.blobs[4] = c0;
+    lstmParams.set("bidirectional", false);
+    Ptr<LSTMLayer> layer = LSTMLayer::create(lstmParams);
+
+    Mat inp = blobFromNPY(_tf("lstm.hidden.input.npy"));
+    std::vector<Mat> inputs(1, inp), outputs;
+    runLayer(layer, inputs, outputs);
+
+    Mat h_t_reference = blobFromNPY(_tf("lstm.hidden.output.npy"));
+    normAssert(h_t_reference, outputs[0]);
+}
+
 TEST(Layer_RNN_Test_Accuracy_with_, CaffeRecurrent)
 {
     Ptr<RNNLayer> layer = RNNLayer::create(LayerParams());
@@ -571,6 +640,9 @@ TEST(Layer_LSTM_Test_Accuracy_, Reverse)
     bias.at<float>(2, 0) = 1e10f;  // Output gate - always output everything
     bias.at<float>(3, 0) = 0.f;  // Update signal
 
+    cv::Mat hInternal = cv::Mat::zeros(1, 1, CV_32FC1);
+    cv::Mat cInternal = cv::Mat::zeros(1, 1, CV_32FC1);
+
     LayerParams lp;
     lp.set("reverse", true);
     lp.set("use_timestamp_dim", true);
@@ -578,6 +650,8 @@ TEST(Layer_LSTM_Test_Accuracy_, Reverse)
     lp.blobs.push_back(Wh);
     lp.blobs.push_back(Wx);
     lp.blobs.push_back(bias);
+    lp.blobs.push_back(hInternal);
+    lp.blobs.push_back(cInternal);
 
     cv::Ptr<cv::dnn::LSTMLayer> layer = LSTMLayer::create(lp);
     std::vector<cv::Mat> outputs;
diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp
index 69aac79cb5..8bfd864955 100644
--- a/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@@ -710,6 +710,16 @@ TEST_P(Test_ONNX_layers, LSTM_bidirectional)
     testONNXModels("lstm_bidirectional", npy, 0, 0, false, false);
 }
 
+TEST_P(Test_ONNX_layers, LSTM_hidden)
+{
+    testONNXModels("hidden_lstm", npy, 0, 0, false, false);
+}
+
+TEST_P(Test_ONNX_layers, LSTM_hidden_bidirectional)
+{
+    testONNXModels("hidden_lstm_bi", npy, 0, 0, false, false);
+}
+
 TEST_P(Test_ONNX_layers, Pad2d_Unfused)
 {
     testONNXModels("ReflectionPad2d");
diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp
index 35751b4824..53cc05bfc3 100644
--- a/modules/dnn/test/test_tf_importer.cpp
+++ b/modules/dnn/test/test_tf_importer.cpp
@@ -203,6 +203,16 @@ TEST_P(Test_TensorFlow_layers, padding)
     runTensorFlowNet("keras_pad_concat");
 }
 
+TEST_P(Test_TensorFlow_layers, padding_asymmetric)
+{
+    runTensorFlowNet("conv2d_asymmetric_pads_nchw");
+    runTensorFlowNet("conv2d_asymmetric_pads_nhwc");
+    runTensorFlowNet("max_pool2d_asymmetric_pads_nchw");
+    runTensorFlowNet("max_pool2d_asymmetric_pads_nhwc");
+    runTensorFlowNet("conv2d_backprop_input_asymmetric_pads_nchw");
+    runTensorFlowNet("conv2d_backprop_input_asymmetric_pads_nhwc");
+}
+
 TEST_P(Test_TensorFlow_layers, padding_same)
 {
     // Reference output values are in range [0.0006, 2.798]
diff --git a/modules/js/src/make_umd.py b/modules/js/src/make_umd.py
index bed6ee9bcc..1096a8eb31 100644
--- a/modules/js/src/make_umd.py
+++ b/modules/js/src/make_umd.py
@@ -95,7 +95,7 @@ def make_umd(opencvjs, cvjs):
     root.cv = factory();
   } else if (typeof importScripts === 'function') {
     // Web worker
-    root.cv = factory;
+    root.cv = factory();
   } else {
     // Other shells, e.g. d8
     root.cv = factory();