diff --git a/modules/dnn/cmake/OpenCVFindCBLAS.cmake b/modules/dnn/cmake/OpenCVFindCBLAS.cmake
index 5d2d0bc00..266c2bc35 100644
--- a/modules/dnn/cmake/OpenCVFindCBLAS.cmake
+++ b/modules/dnn/cmake/OpenCVFindCBLAS.cmake
@@ -16,7 +16,7 @@ if(${the_module}_WITH_BLAS)
     endif()
     if(NOT HAVE_BLAS)
         include(cmake/OpenCVFindMKL.cmake)
-        if(MKL_FOUND AND FALSE)
+        if(MKL_FOUND)
             set(BLAS_INCLUDE_DIR    ${MKL_INCLUDE_DIRS})
             set(BLAS_LIBRARIES      ${MKL_LIBRARIES}   )
             set(BLAS_CBLAS_H        "mkl_cblas.h"      )
diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp
index 3a4f3b1a2..67f71e8b0 100644
--- a/modules/dnn/include/opencv2/dnn/all_layers.hpp
+++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp
@@ -98,12 +98,12 @@ namespace dnn
         g_t &= tanh   &(W_{xg} x_t + W_{hg} h_{t-1} + b_g), \\
         @f}
         where @f$W_{x?}@f$, @f$W_{h?}@f$ and @f$b_{?}@f$ are learned weights represented as matrices:
-        @f$W_{x?} \in R^{N_c \times N_x}@f$, @f$W_h? \in R^{N_c \times N_h}@f$, @f$b_? \in R^{N_c}@f$.
+        @f$W_{x?} \in R^{N_h \times N_x}@f$, @f$W_{h?} \in R^{N_h \times N_h}@f$, @f$b_? \in R^{N_h}@f$.
 
         For simplicity and performance purposes we use @f$ W_x = [W_{xi}; W_{xf}; W_{xo}, W_{xg}] @f$
-        (i.e. @f$W_x@f$ is vertical contacentaion of @f$ W_{x?} @f$), @f$ W_x \in R^{4N_c \times N_x} @f$.
-        The same for @f$ W_h = [W_{hi}; W_{hf}; W_{ho}, W_{hg}], W_h \in R^{4N_c \times N_h} @f$
-        and for @f$ b = [b_i; b_f, b_o, b_g]@f$, @f$b \in R^{4N_c} @f$.
+        (i.e. @f$W_x@f$ is vertical contacentaion of @f$ W_{x?} @f$), @f$ W_x \in R^{4N_h \times N_x} @f$.
+        The same for @f$ W_h = [W_{hi}; W_{hf}; W_{ho}, W_{hg}], W_h \in R^{4N_h \times N_h} @f$
+        and for @f$ b = [b_i; b_f, b_o, b_g]@f$, @f$b \in R^{4N_h} @f$.
 
         @param Wh is matrix defining how previous output is transformed to internal gates (i.e. according to abovemtioned notation is @f$ W_h @f$)
         @param Wx is matrix defining how current input is transformed to internal gates (i.e. according to abovemtioned notation is @f$ W_x @f$)
@@ -111,6 +111,12 @@ namespace dnn
         */
         virtual void setWeights(const Blob &Wh, const Blob &Wx, const Blob &b) = 0;
 
+        /** @brief Specifies shape of output blob which will be [[`T`], `N`] + @p outTailShape.
+          * @details If this parameter is empty or unset then @p outTailShape = [`Wh`.size(0)] will be used,
+          * where `Wh` is parameter from setWeights().
+          */
+        virtual void setOutShape(const BlobShape &outTailShape = BlobShape::empty()) = 0;
+
         /** @brief Set @f$ h_{t-1} @f$ value that will be used in next forward() calls.
           * @details By-default @f$ h_{t-1} @f$ is inited by zeros and updated after each forward() call.
           */
@@ -145,12 +151,16 @@ namespace dnn
          * @param output contains computed outputs: @f$h_t@f$ (and @f$c_t@f$ if setProduceCellOutput() flag was set to true).
          *
          * If setUseTimstampsDim() is set to true then @p input[0] should has at least two dimensions with the following shape: [`T`, `N`, `[data dims]`],
-         * where `T` specifies number of timpestamps, `N` is number of independent streams (i.e. x_{t_0 + t}^{stream} is @p input[0][t, stream, ...]).
+         * where `T` specifies number of timpestamps, `N` is number of independent streams (i.e. @f$ x_{t_0 + t}^{stream} @f$ is stored inside @p input[0][t, stream, ...]).
          *
          * If setUseTimstampsDim() is set to fase then @p input[0] should contain single timestamp, its shape should has form [`N`, `[data dims]`] with at least one dimension.
-         * (i.e. x_{t}^{stream} = @p input[0][stream, ...]).
+         * (i.e. @f$ x_{t}^{stream} @f$ is stored inside @p input[0][stream, ...]).
         */
         void forward(std::vector<Blob*> &input, std::vector<Blob> &output);
+
+        int inputNameToIndex(String inputName);
+
+        int outputNameToIndex(String outputName);
     };
 
     //! Classical recurrent layer
diff --git a/modules/dnn/include/opencv2/dnn/blob.hpp b/modules/dnn/include/opencv2/dnn/blob.hpp
index 01c88c24e..72f644d8c 100644
--- a/modules/dnn/include/opencv2/dnn/blob.hpp
+++ b/modules/dnn/include/opencv2/dnn/blob.hpp
@@ -44,6 +44,7 @@
 #include <opencv2/core.hpp>
 #include <vector>
 #include <ostream>
+#include <iostream>
 
 namespace cv
 {
@@ -56,7 +57,7 @@ namespace dnn
     struct BlobShape
     {
         BlobShape();                                        //!< Creates [1, 1, 1, 1] shape @todo Make more clearer behavior.
-        BlobShape(int s0);                                  //!< Creates 1-dim shape [@p s0]
+        explicit BlobShape(int s0);                         //!< Creates 1-dim shape [@p s0]
         BlobShape(int s0, int s1);                          //!< @overload
         BlobShape(int s0, int s1, int s2);                  //!< @overload
         BlobShape(int num, int cn, int rows, int cols);     //!< Creates 4-dim shape [@p num, @p cn, @p rows, @p cols]
@@ -96,24 +97,35 @@ namespace dnn
          */
         int xsize(int axis) const;
 
+        /** @brief Converts @p axis index to canonical format (where 0 <= @p axis < dims()). */
+        int canonicalAxis(int axis) const;
+
         /** @brief Returns the product of all sizes of axes. */
-        ptrdiff_t total();
+        ptrdiff_t total() const;
+
+        /** @brief Computes the product of sizes of axes among the specified axes range [@p startAxis; @p endAxis).
+         * @details Negative axis indexing can be used. @sa Blob::total(int,int)
+         */
+        ptrdiff_t total(int startAxis, int endAxis = INT_MAX) const;
+
+        /** @brief Constructs new shape from axes in range [@p startAxis; @p endAxis).
+         * @details Negative axis indexing can be used. @sa Blob::total(int,int)
+         */
+        BlobShape slice(int startAxis, int endAxis = INT_MAX) const;
 
         /** @brief Returns pointer to the first element of continuous size array. */
         const int *ptr() const;
 
-        /** @brief Checks equality of two shapes. */
-        bool equal(const BlobShape &other) const;
+        bool equal(const BlobShape &other) const;       //!< Checks equality of two shapes.
+        bool operator== (const BlobShape &r) const;     //!< @sa equal()
 
-        bool operator== (const BlobShape &r) const;
+        BlobShape operator+ (const BlobShape &r) const; //!< Contacenates two shapes.
 
-        /** @brief Contacenates two shapes */
-        BlobShape operator+ (const BlobShape &r) const;
+        static BlobShape like(const Mat &m);    //!< Returns shape of passed Mat.
+        static BlobShape like(const UMat &m);   //!< Returns shape of passed UMat.
 
-        /** @brief Returns shape of passed Mat. */
-        static BlobShape like(const Mat &m);
-        /** @brief Returns shape of passed Mat. */
-        static BlobShape like(const UMat &m);
+        static BlobShape empty();               //!< Returns empty shape [].
+        bool isEmpty() const;                   //!< Returns true if shape is empty (i.e []).
 
 #ifdef CV_CXX_MOVE_SEMANTICS
         //TBD
@@ -183,7 +195,7 @@ namespace dnn
          */
         size_t total(int startAxis = 0, int endAxis = INT_MAX) const;
 
-        /** @brief Converts @p axis index to canonical format (where 0 <= axis < dims()). */
+        /** @brief Converts @p axis index to canonical format (where 0 <= @p axis < dims()). */
         int canonicalAxis(int axis) const;
 
         /** @brief Returns shape of the blob. */
diff --git a/modules/dnn/include/opencv2/dnn/blob.inl.hpp b/modules/dnn/include/opencv2/dnn/blob.inl.hpp
index b01283ddc..6e18716f5 100644
--- a/modules/dnn/include/opencv2/dnn/blob.inl.hpp
+++ b/modules/dnn/include/opencv2/dnn/blob.inl.hpp
@@ -150,7 +150,13 @@ inline int &BlobShape::operator[] (int axis)
     return sz[(axis < 0) ? axis + dims() : axis];
 }
 
-inline ptrdiff_t BlobShape::total()
+inline int BlobShape::canonicalAxis(int axis) const
+{
+    CV_Assert(-dims() <= axis && axis < dims());
+    return (axis < 0) ? axis + dims() : axis;
+}
+
+inline ptrdiff_t BlobShape::total() const
 {
     if (dims() == 0)
         return 0;
@@ -161,6 +167,42 @@ inline ptrdiff_t BlobShape::total()
     return res;
 }
 
+inline ptrdiff_t BlobShape::total(int startAxis, int endAxis) const
+{
+    if (isEmpty())
+        return 0;
+
+    if (endAxis == INT_MAX)
+        endAxis = dims();
+    else if (endAxis < 0)
+        endAxis += dims();
+    startAxis = (startAxis < 0) ? startAxis + dims() : startAxis;
+    CV_Assert(0 <= startAxis && startAxis <= endAxis && endAxis <= dims());
+
+    ptrdiff_t res = 1;
+    for (int i = startAxis; i < endAxis; i++)
+        res *= sz[i];
+    return res;
+}
+
+inline BlobShape BlobShape::slice(int startAxis, int endAxis) const
+{
+    if (isEmpty())
+        return BlobShape::empty();
+
+    if (endAxis == INT_MAX)
+        endAxis = dims();
+    else if (endAxis < 0)
+        endAxis += dims();
+    startAxis = (startAxis < 0) ? startAxis + dims() : startAxis;
+    CV_Assert(0 <= startAxis && startAxis <= endAxis && endAxis <= dims());
+
+    BlobShape res(endAxis - startAxis, (const int*)NULL);
+    for (int i = startAxis; i < endAxis; i++)
+        res[i - startAxis] = sz[i];
+    return res;
+}
+
 inline const int *BlobShape::ptr() const
 {
     return sz;
@@ -195,6 +237,16 @@ inline BlobShape BlobShape::like(const UMat &m)
     return BlobShape(m.dims, (const int*)m.size);
 }
 
+inline BlobShape BlobShape::empty()
+{
+    return BlobShape(0, (const int*)NULL);
+}
+
+inline bool BlobShape::isEmpty() const
+{
+    return dims() == 0;
+}
+
 CV_EXPORTS std::ostream &operator<< (std::ostream &stream, const BlobShape &shape);
 
 /////////////////////////////////////////////////////////////////////
diff --git a/modules/dnn/src/layers/recurrent_layers.cpp b/modules/dnn/src/layers/recurrent_layers.cpp
index f5a3f01fa..9ef93abef 100644
--- a/modules/dnn/src/layers/recurrent_layers.cpp
+++ b/modules/dnn/src/layers/recurrent_layers.cpp
@@ -69,7 +69,7 @@ static void tanh(const Mat &src, Mat &dst)
     else if (src.type() == CV_64F)
         tanh<double>(src, dst);
     else
-        CV_Error(Error::StsUnsupportedFormat, "Functions supports only floating point types");
+        CV_Error(Error::StsUnsupportedFormat, "Function supports only floating point types");
 }
 
 static void sigmoid(const Mat &src, Mat &dst)
@@ -86,6 +86,10 @@ class LSTMLayerImpl : public LSTMLayer
     int dtype;
     bool allocated;
 
+    BlobShape outTailShape;                 //shape of single output sample
+    BlobShape outTsMatShape, outTsShape;    //shape of N output samples
+    BlobShape outResShape;                  //shape of T timestamps and N output samples
+
     bool useTimestampDim;
     bool produceCellOutput;
 
@@ -97,6 +101,7 @@ public:
         useTimestampDim = true;
         produceCellOutput = false;
         allocated = false;
+        outTailShape = BlobShape::empty();
     }
 
     void setUseTimstampsDim(bool use)
@@ -113,14 +118,20 @@ public:
 
     void setC(const Blob &C)
     {
-        CV_Assert(!allocated || C.total() == cInternal.total());
-        C.matRefConst().copyTo(cInternal);
+        CV_Assert(cInternal.empty() || C.total() == cInternal.total());
+        if (!cInternal.empty())
+            C.reshaped(BlobShape::like(cInternal)).matRefConst().copyTo(cInternal);
+        else
+            C.matRefConst().copyTo(cInternal);
     }
 
     void setH(const Blob &H)
     {
-        CV_Assert(!allocated || H.total() == hInternal.total());
-        H.matRefConst().copyTo(hInternal);
+        CV_Assert(hInternal.empty() || H.total() == hInternal.total());
+        if (!hInternal.empty())
+            H.reshaped(BlobShape::like(hInternal)).matRefConst().copyTo(hInternal);
+        else
+            H.matRefConst().copyTo(hInternal);
     }
 
     Blob getC() const
@@ -128,8 +139,8 @@ public:
         CV_Assert(!cInternal.empty());
 
         //TODO: add convinient Mat -> Blob constructor
-        Blob res;
-        res.fill(BlobShape::like(cInternal), cInternal.type(), cInternal.data);
+        Blob res(outTsShape, cInternal.type());
+        res.fill(res.shape(), res.type(), cInternal.data);
         return res;
     }
 
@@ -137,11 +148,17 @@ public:
     {
         CV_Assert(!hInternal.empty());
 
-        Blob res;
-        res.fill(BlobShape::like(hInternal), hInternal.type(), hInternal.data);
+        Blob res(outTsShape, hInternal.type());
+        res.fill(res.shape(), res.type(), hInternal.data);
         return res;
     }
 
+    void setOutShape(const BlobShape &outTailShape_)
+    {
+        CV_Assert(!allocated || outTailShape_.total() == outTailShape.total());
+        outTailShape = outTailShape_;
+    }
+
     void setWeights(const Blob &Wh, const Blob &Wx, const Blob &bias)
     {
         CV_Assert(Wh.dims() == 2 && Wx.dims() == 2);
@@ -160,31 +177,64 @@ public:
     void allocate(const std::vector<Blob*> &input, std::vector<Blob> &output)
     {
         CV_Assert(blobs.size() == 3);
-        Blob &Wh = blobs[0], &Wx = blobs[1];
+        CV_Assert(input.size() == 1);
 
+        Blob &Wh = blobs[0], &Wx = blobs[1];
         numOut = Wh.size(1);
         numInp = Wx.size(1);
 
-        CV_Assert(input.size() == 1);
-        CV_Assert(input[0]->dims() > 2 && (int)input[0]->total(2) == numInp);
+        if (!outTailShape.isEmpty())
+            CV_Assert(outTailShape.total() == numOut);
+        else
+            outTailShape = BlobShape(numOut);
 
-        numTimeStamps = input[0]->size(0);
-        numSamples = input[0]->size(1);
-        dtype = input[0]->type();
+        if (useTimestampDim)
+        {
+            CV_Assert(input[0]->dims() >= 2 && (int)input[0]->total(2) == numInp);
+            numTimeStamps = input[0]->size(0);
+            numSamples = input[0]->size(1);
+            outResShape = BlobShape(numTimeStamps, numSamples) + outTailShape;
+        }
+        else
+        {
+            CV_Assert(input[0]->dims() >= 1 && (int)input[0]->total(1) == numInp);
+            numTimeStamps = 1;
+            numSamples = input[0]->size(0);
+            outResShape = BlobShape(numSamples) + outTailShape;
+        }
+        outTsMatShape = BlobShape(numSamples, numOut);
+        outTsShape = BlobShape(numSamples) + outTailShape;
 
+        dtype = input[0]->type();
         CV_Assert(dtype == CV_32F || dtype == CV_64F);
         CV_Assert(Wh.type() == dtype);
 
-        BlobShape outShape(numTimeStamps, numSamples, numOut);
-        output.resize(2);
-        output[0].create(outShape, dtype);
-        output[1].create(outShape, dtype);
+        output.resize( (produceCellOutput) ? 2 : 1 );
+        output[0].create(outResShape, dtype);
+        if (produceCellOutput)
+            output[1].create(outResShape, dtype);
 
-        hInternal.create(numSamples, numOut, dtype);
-        hInternal.setTo(0);
+        if (hInternal.empty())
+        {
+            hInternal.create(outTsMatShape.dims(), outTsMatShape.ptr(), dtype);
+            hInternal.setTo(0);
+        }
+        else
+        {
+            CV_Assert((int)hInternal.total() == numSamples*numOut);
+            hInternal = hInternal.reshape(1, outTsMatShape.dims(), outTsMatShape.ptr());
+        }
 
-        cInternal.create(numSamples, numOut, dtype);
-        cInternal.setTo(0);
+        if (cInternal.empty())
+        {
+            cInternal.create(outTsMatShape.dims(), outTsMatShape.ptr(), dtype);
+            cInternal.setTo(0);
+        }
+        else
+        {
+            CV_Assert((int)cInternal.total() == numSamples*numOut);
+            cInternal = cInternal.reshape(1, outTsMatShape.dims(), outTsMatShape.ptr());
+        }
 
         gates.create(numSamples, 4*numOut, dtype);
 
@@ -252,6 +302,22 @@ void LSTMLayer::forward(std::vector<Blob*>&, std::vector<Blob>&)
     CV_Error(Error::StsInternal, "This function should be unreached");
 }
 
+int LSTMLayer::inputNameToIndex(String inputName)
+{
+    if (inputName.toLowerCase() == "x")
+        return 0;
+    return -1;
+}
+
+int LSTMLayer::outputNameToIndex(String outputName)
+{
+    if (outputName.toLowerCase() == "h")
+        return 0;
+    else if (outputName.toLowerCase() == "c")
+        return 1;
+    return -1;
+}
+
 
 class RNNLayerImpl : public RNNLayer
 {
diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp
index 994ce7b70..68f472be2 100644
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@@ -181,78 +181,81 @@ enum RunLayerMode
 {
     ALLOC_ONLY          = 1,
     FORWARD_ONLY        = 2,
-    ALLOC_AND_FORWARD   = 3
+    ALLOC_AND_FORWARD   = ALLOC_ONLY | FORWARD_ONLY
 };
 
-void runLayer(Ptr<Layer> layer, std::vector<Blob> &inpBlobs, std::vector<Blob> &outBlobs, int mode=ALLOC_AND_FORWARD)
+typedef Ptr<std::vector<Blob*> > PtrToVecPtrBlob;
+
+PtrToVecPtrBlob
+runLayer(Ptr<Layer> layer, std::vector<Blob> &inpBlobs, std::vector<Blob> &outBlobs, int mode=ALLOC_AND_FORWARD)
 {
-    std::vector<Blob*> inpPtrs(inpBlobs.size());
+    PtrToVecPtrBlob inpPtrs( new std::vector<Blob*>() );
+    inpPtrs->reserve(inpBlobs.size());
     for (size_t i = 0; i < inpBlobs.size(); i++)
-        inpPtrs[i] = &inpBlobs[i];
+        inpPtrs->push_back(&inpBlobs[i]);
+
+    if (mode & ALLOC_ONLY) layer->allocate(*inpPtrs, outBlobs);
+    if (mode & FORWARD_ONLY) layer->forward(*inpPtrs, outBlobs);
 
-    if (mode & ALLOC_ONLY) layer->allocate(inpPtrs, outBlobs);
-    if (mode & FORWARD_ONLY) layer->forward(inpPtrs, outBlobs);
+    return inpPtrs;
 }
 
 class Layer_LSTM_Test : public ::testing::Test
 {
 public:
-    int Nx, Nc;
+    int numInp, numOut;
     Blob Wh, Wx, b;
     Ptr<LSTMLayer> layer;
-
     std::vector<Blob> inputs, outputs;
-    std::vector<Blob*> inputsPtr;
 
-    Layer_LSTM_Test(int _Nx = 31, int _Nc = 100)
+    Layer_LSTM_Test() {}
+
+    void init(const BlobShape &inpShape_, const BlobShape &outShape_)
     {
-        Nx = _Nx;
-        Nc = _Nc;
+        numInp = inpShape_.total();
+        numOut = outShape_.total();
 
-        Wh = Blob(BlobShape(4 * Nc, Nc));
-        Wx = Blob(BlobShape(4 * Nc, Nx));
-        b  = Blob(BlobShape(4 * Nc, 1));
+        Wh = Blob(BlobShape(4 * numOut, numOut));
+        Wx = Blob(BlobShape(4 * numOut, numInp));
+        b  = Blob(BlobShape(4 * numOut, 1));
 
         layer = LSTMLayer::create();
         layer->setWeights(Wh, Wx, b);
-    }
-
-    void allocateAndForward()
-    {
-        inputsPtr.clear();
-        for (size_t i = 0; i < inputs.size(); i++)
-            inputsPtr.push_back(&inputs[i]);
-
-        layer->allocate(inputsPtr, outputs);
-        layer->forward(inputsPtr, outputs);
+        layer->setOutShape(outShape_);
     }
 };
 
-TEST_F(Layer_LSTM_Test, BasicTest_1)
+TEST_F(Layer_LSTM_Test, get_set_test)
 {
-    inputs.push_back(Blob(BlobShape(1, 2, 3, Nx)));
-    allocateAndForward();
+    BlobShape TN(4);
+    BlobShape inpShape(5, 3, 2), inpResShape = TN + inpShape;
+    BlobShape outShape(3, 1, 2), outResShape = TN + outShape;
 
-    EXPECT_EQ(outputs.size(), 2);
-    EXPECT_EQ(outputs[0].shape(), BlobShape(1, 2, 3, Nc));
-    EXPECT_EQ(outputs[1].shape(), BlobShape(1, 2, 3, Nc));
-}
+    init(inpShape, outShape);
+    layer->setProduceCellOutput(true);
+    layer->setUseTimstampsDim(false);
+    layer->setOutShape(outShape);
 
-TEST_F(Layer_LSTM_Test, BasicTest_2)
-{
-    inputs.push_back(Blob(BlobShape(1, 2, 3, Nx)));
-    inputs.push_back(Blob(BlobShape(1, 2, 3, Nc)));
-    inputs.push_back(Blob(BlobShape(1, 2, 3, Nc)));
-    allocateAndForward();
+    layer->setC(Blob(outResShape));
+    layer->setH(Blob(outResShape));
 
-    EXPECT_EQ(outputs.size(), 2);
-    EXPECT_EQ(outputs[0].shape(), BlobShape(1, 2, 3, Nc));
-    EXPECT_EQ(outputs[1].shape(), BlobShape(1, 2, 3, Nc));
+    inputs.push_back(Blob(inpResShape));
+    runLayer(layer, inputs, outputs);
+
+    EXPECT_EQ(2, outputs.size());
+    EXPECT_EQ(outResShape, outputs[0].shape());
+    EXPECT_EQ(outResShape, outputs[1].shape());
+
+    EXPECT_EQ(outResShape, layer->getC().shape());
+    EXPECT_EQ(outResShape, layer->getH().shape());
+
+    EXPECT_EQ(0, layer->inputNameToIndex("x"));
+    EXPECT_EQ(0, layer->outputNameToIndex("h"));
+    EXPECT_EQ(1, layer->outputNameToIndex("c"));
 }
 
 TEST(Layer_LSTM_Test_Accuracy_Reference_with_, CaffeRecurrent)
 {
-
     Ptr<LSTMLayer> layer = LSTMLayer::create();
 
     Blob Wx = blobFromNPY(_tf("lstm.prototxt.w_0.npy"));
@@ -262,13 +265,11 @@ TEST(Layer_LSTM_Test_Accuracy_Reference_with_, CaffeRecurrent)
 
     Blob inp = blobFromNPY(_tf("blob.npy"));
     std::vector<Blob> inputs(1, inp), outputs;
-    runLayer(layer, inputs, outputs, ALLOC_ONLY | FORWARD_ONLY);
+    runLayer(layer, inputs, outputs);
 
     Blob &h_t_gathered = outputs[0];
     Blob h_t_reference = blobFromNPY(_tf("lstm.prototxt.h_1.npy"));
 
-    //h_t_gathered.reshape(h_t_reference.shape());
-
     normAssert(h_t_reference, h_t_gathered);
 }