diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index 67f71e8b0..4d722b615 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -172,28 +172,35 @@ namespace dnn /** Setups learned weights. - Recurrent-layer behavior on each step is defined by current input x_t, previous state h_t and learned weights as follows: + Recurrent-layer behavior on each step is defined by current input @f$ x_t @f$, previous state @f$ h_t @f$ and learned weights as follows: @f{eqnarray*}{ h_t &= tanh&(W_{hh} h_{t-1} + W_{xh} x_t + b_h), \\ o_t &= tanh&(W_{ho} h_t + b_o), @f} - @param Whh is @f$ W_{hh} @f$ matrix @param Wxh is @f$ W_{xh} @f$ matrix @param bh is @f$ b_{h} @f$ vector + @param Whh is @f$ W_{hh} @f$ matrix @param Who is @f$ W_{xo} @f$ matrix @param bo is @f$ b_{o} @f$ vector */ - virtual void setWeights(const Blob &Whh, const Blob &Wxh, const Blob &bh, const Blob &Who, const Blob &bo) = 0; + virtual void setWeights(const Blob &Wxh, const Blob &bh, const Blob &Whh, const Blob &Who, const Blob &bo) = 0; + + /** @brief If this flag is set to true then layer will produce @f$ h_t @f$ as second output. + * @details Shape of the second output is the same as first output. + */ + virtual void setProduceHiddenOutput(bool produce = false) = 0; /** Accepts two inputs @f$x_t@f$ and @f$h_{t-1}@f$ and compute two outputs @f$o_t@f$ and @f$h_t@f$. - @param input could contain inputs @f$x_t@f$ and @f$h_{t-1}@f$. - @param output should contain outputs @f$o_t@f$ and @f$h_t@f$. + @param input should contain packed input @f$x_t@f$. + @param output should contain output @f$o_t@f$ (and @f$h_t@f$ if setProduceHiddenOutput() is set to true). + + @p input[0] should have shape [`T`, `N`, `data_dims`] where `T` and `N` is number of timestamps and number of independent samples of @f$x_t@f$ respectively. - The first input @f$x_t@f$ is required whereas @f$h_{t-1}@f$ is optional. - If the second input @f$h_{t-1}@f$ isn't specified a layer will use internal @f$h_{t-1}@f$ from the previous calls, at the first call @f$h_{t-1}@f$ will be filled by zeros. + @p output[0] will have shape [`T`, `N`, @f$N_o@f$], where @f$N_o@f$ is number of rows in @f$ W_{xo} @f$ matrix. + If setProduceHiddenOutput() is set to true then @p output[1] will contain a Blob with shape [`T`, `N`, @f$N_h@f$], where @f$N_h@f$ is number of rows in @f$ W_{hh} @f$ matrix. */ void forward(std::vector &input, std::vector &output); }; diff --git a/modules/dnn/src/layers/op_blas.cpp b/modules/dnn/src/layers/op_blas.cpp index 90a3c7a18..8b09750d5 100644 --- a/modules/dnn/src/layers/op_blas.cpp +++ b/modules/dnn/src/layers/op_blas.cpp @@ -40,6 +40,7 @@ void gemmCPU(const Mat &A, const Mat &B, double alpha, Mat &C, double beta, int CV_Assert(A.isContinuous() && B.isContinuous() && C.isContinuous()); CV_Assert(A.type() == CV_32F || A.type() == CV_64F); CV_Assert(A.type() == B.type() && B.type() == C.type()); + CV_Assert(A.data != C.data && B.data != C.data); if (C.type() == CV_32F) { diff --git a/modules/dnn/src/layers/recurrent_layers.cpp b/modules/dnn/src/layers/recurrent_layers.cpp index 9ef93abef..0cd68e2b5 100644 --- a/modules/dnn/src/layers/recurrent_layers.cpp +++ b/modules/dnn/src/layers/recurrent_layers.cpp @@ -321,20 +321,28 @@ int LSTMLayer::outputNameToIndex(String outputName) class RNNLayerImpl : public RNNLayer { - int nX, nH, nO, nSamples; + int numX, numH, numO; + int numSamples, numTimestamps, numSamplesTotal; int dtype; Mat Whh, Wxh, bh; Mat Who, bo; - Mat hPrevInternal, dummyBiasOnes; + Mat hCurr, hPrev, dummyBiasOnes; + bool produceH; public: RNNLayerImpl() { type = "RNN"; + produceH = false; } - void setWeights(const Blob &W_hh, const Blob &W_xh, const Blob &b_h, const Blob &W_ho, const Blob &b_o) + void setProduceHiddenOutput(bool produce = false) + { + produceH = produce; + } + + void setWeights(const Blob &W_xh, const Blob &b_h, const Blob &W_hh, const Blob &W_ho, const Blob &b_o) { CV_Assert(W_hh.dims() == 2 && W_xh.dims() == 2); CV_Assert(W_hh.size(0) == W_xh.size(0) && W_hh.size(0) == W_hh.size(1) && (int)b_h.total() == W_xh.size(0)); @@ -342,9 +350,9 @@ public: CV_Assert(W_ho.size(1) == W_hh.size(1)); blobs.resize(5); - blobs[0] = W_hh; - blobs[1] = W_xh; - blobs[2] = b_h; + blobs[0] = W_xh; + blobs[1] = b_h; + blobs[2] = W_hh; blobs[3] = W_ho; blobs[4] = b_o; } @@ -353,72 +361,68 @@ public: { CV_Assert(input.size() >= 1 && input.size() <= 2); - Whh = blobs[0].matRefConst(); - Wxh = blobs[1].matRefConst(); - bh = blobs[2].matRefConst(); + Wxh = blobs[0].matRefConst(); + bh = blobs[1].matRefConst(); + Whh = blobs[2].matRefConst(); Who = blobs[3].matRefConst(); bo = blobs[4].matRefConst(); - nH = Wxh.rows; - nX = Wxh.cols; - nO = Who.rows; + numH = Wxh.rows; + numX = Wxh.cols; + numO = Who.rows; - CV_Assert(input[0]->size(-1) == Wxh.cols); - nSamples = input[0]->total(0, input[0]->dims() - 1); - BlobShape xShape = input[0]->shape(); - BlobShape hShape = xShape; - BlobShape oShape = xShape; - hShape[-1] = nH; - oShape[-1] = nO; - - if (input.size() == 2) - { - CV_Assert(input[1]->shape() == hShape); - } - else - { - hPrevInternal.create(nSamples, nH, input[0]->type()); - hPrevInternal.setTo(0); - } + CV_Assert(input[0]->dims() >= 2); + CV_Assert((int)input[0]->total(2) == numX); + CV_Assert(input[0]->type() == CV_32F || input[0]->type() == CV_64F); + dtype = input[0]->type(); + numTimestamps = input[0]->size(0); + numSamples = input[0]->size(1); + numSamplesTotal = numTimestamps * numSamples; - output.resize(2); - output[0].create(oShape, input[0]->type()); - output[1].create(hShape, input[0]->type()); + hCurr.create(numSamples, numH, dtype); + hPrev.create(numSamples, numH, dtype); + hPrev.setTo(0); - dummyBiasOnes.create(nSamples, 1, bh.type()); + dummyBiasOnes.create(numSamples, 1, dtype); dummyBiasOnes.setTo(1); - bh = bh.reshape(1, 1); //is 1 x nH mat - bo = bo.reshape(1, 1); //is 1 x nO mat + bh = bh.reshape(1, 1); //is 1 x numH Mat + bo = bo.reshape(1, 1); //is 1 x numO Mat + + reshapeOutput(output); + } + + void reshapeOutput(std::vector &output) + { + output.resize((produceH) ? 2 : 1); + output[0].create(BlobShape(numTimestamps, numSamples, numO), dtype); + if (produceH) + output[1].create(BlobShape(numTimestamps, numSamples, numH), dtype); } void forward(std::vector &input, std::vector &output) { - Mat xCurr = input[0]->matRefConst(); - Mat hPrev = (input.size() >= 2) ? input[1]->matRefConst() : hPrevInternal; - Mat oCurr = output[0].matRef(); - Mat hCurr = output[1].matRef(); - - //TODO: Check types - - int xsz[] = {nSamples, nX}; - int hsz[] = {nSamples, nH}; - int osz[] = {nSamples, nO}; - if (xCurr.dims != 2) xCurr = xCurr.reshape(1, 2, xsz); - if (hPrev.dims != 2) hPrev = hPrev.reshape(1, 2, hsz); - if (oCurr.dims != 2) oCurr = oCurr.reshape(1, 2, osz); - if (hCurr.dims != 2) hCurr = hCurr.reshape(1, 2, hsz); - - gemmCPU(hPrev, Whh, 1, hCurr, 0, GEMM_2_T); // W_{hh} * h_{prev} - gemmCPU(xCurr, Wxh, 1, hCurr, 1, GEMM_2_T); //+W_{xh} * x_{curr} - gemmCPU(dummyBiasOnes, bh, 1, hCurr, 1); //+bh - tanh(hCurr, hCurr); - - gemmCPU(hPrev, Who, 1, oCurr, 0, GEMM_2_T); // W_{ho} * h_{prev} - gemmCPU(dummyBiasOnes, bo, 1, oCurr, 1); //+b_o - tanh(oCurr, oCurr); - - if (input.size() < 2) //save h_{prev} - hCurr.copyTo(hPrevInternal); + Mat xTs = input[0]->reshaped(BlobShape(numSamplesTotal, numX)).matRefConst(); + Mat oTs = output[0].reshaped(BlobShape(numSamplesTotal, numO)).matRef(); + Mat hTs = (produceH) ? output[1].reshaped(BlobShape(numSamplesTotal, numH)).matRef() : Mat(); + + for (int ts = 0; ts < numTimestamps; ts++) + { + Range curRowRange = Range(ts * numSamples, (ts + 1) * numSamples); + Mat xCurr = xTs.rowRange(curRowRange); + + gemmCPU(hPrev, Whh, 1, hCurr, 0, GEMM_2_T); // W_{hh} * h_{prev} + gemmCPU(xCurr, Wxh, 1, hCurr, 1, GEMM_2_T); //+W_{xh} * x_{curr} + gemmCPU(dummyBiasOnes, bh, 1, hCurr, 1); //+bh + tanh(hCurr, hPrev); + + Mat oCurr = oTs.rowRange(curRowRange); + gemmCPU(hPrev, Who, 1, oCurr, 0, GEMM_2_T); // W_{ho} * h_{prev} + gemmCPU(dummyBiasOnes, bo, 1, oCurr, 1); //+b_o + tanh(oCurr, oCurr); + + if (produceH) + hPrev.copyTo(hTs.rowRange(curRowRange)); + } } }; diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index 68f472be2..5e9171265 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -254,7 +254,7 @@ TEST_F(Layer_LSTM_Test, get_set_test) EXPECT_EQ(1, layer->outputNameToIndex("c")); } -TEST(Layer_LSTM_Test_Accuracy_Reference_with_, CaffeRecurrent) +TEST(Layer_LSTM_Test_Accuracy_with_, CaffeRecurrent) { Ptr layer = LSTMLayer::create(); @@ -263,73 +263,70 @@ TEST(Layer_LSTM_Test_Accuracy_Reference_with_, CaffeRecurrent) Blob b = blobFromNPY(_tf("lstm.prototxt.w_1.npy")); layer->setWeights(Wh, Wx, b); - Blob inp = blobFromNPY(_tf("blob.npy")); + Blob inp = blobFromNPY(_tf("recurrent.input.npy")); std::vector inputs(1, inp), outputs; runLayer(layer, inputs, outputs); - Blob &h_t_gathered = outputs[0]; Blob h_t_reference = blobFromNPY(_tf("lstm.prototxt.h_1.npy")); + normAssert(h_t_reference, outputs[0]); +} + +TEST(Layer_RNN_Test_Accuracy_with_, CaffeRecurrent) +{ + Ptr layer = RNNLayer::create(); + + layer->setWeights( + blobFromNPY(_tf("rnn.prototxt.w_0.npy")), + blobFromNPY(_tf("rnn.prototxt.w_1.npy")), + blobFromNPY(_tf("rnn.prototxt.w_2.npy")), + blobFromNPY(_tf("rnn.prototxt.w_3.npy")), + blobFromNPY(_tf("rnn.prototxt.w_4.npy")) ); - normAssert(h_t_reference, h_t_gathered); + std::vector output, input(1, blobFromNPY(_tf("recurrent.input.npy"))); + runLayer(layer, input, output); + + Blob h_ref = blobFromNPY(_tf("rnn.prototxt.h_1.npy")); + normAssert(h_ref, output[0]); } class Layer_RNN_Test : public ::testing::Test { public: - int Nx, Nh, No; + int nX, nH, nO, nT, nS; Blob Whh, Wxh, bh, Who, bo; Ptr layer; std::vector inputs, outputs; - std::vector inputsPtr; - Layer_RNN_Test(int _Nx = 31, int _Nh = 64, int _No = 100) + Layer_RNN_Test() { - Nx = _Nx; - Nh = _Nh; - No = _No; - - Whh = Blob(BlobShape(Nh, Nh)); - Wxh = Blob(BlobShape(Nh, Nx)); - bh = Blob(BlobShape(Nh, 1)); - Who = Blob(BlobShape(No, Nh)); - bo = Blob(BlobShape(No, 1)); + nT = 3; + nS = 5; + nX = 31; + nH = 64; + nO = 100; + + Whh = Blob(BlobShape(nH, nH)); + Wxh = Blob(BlobShape(nH, nX)); + bh = Blob(BlobShape(nH, 1)); + Who = Blob(BlobShape(nO, nH)); + bo = Blob(BlobShape(nO, 1)); layer = RNNLayer::create(); - layer->setWeights(Whh, Wxh, bh, Who, bo); - } - - void allocateAndForward() - { - inputsPtr.clear(); - for (size_t i = 0; i < inputs.size(); i++) - inputsPtr.push_back(&inputs[i]); - - layer->allocate(inputsPtr, outputs); - layer->forward(inputsPtr, outputs); + layer->setProduceHiddenOutput(true); + layer->setWeights(Wxh, bh, Whh, Who, bo); } }; -TEST_F(Layer_RNN_Test, BasicTest_1) -{ - inputs.push_back(Blob(BlobShape(1, 2, 3, Nx))); - allocateAndForward(); - - EXPECT_EQ(outputs.size(), 2); - EXPECT_EQ(outputs[0].shape(), BlobShape(1, 2, 3, No)); - EXPECT_EQ(outputs[1].shape(), BlobShape(1, 2, 3, Nh)); -} - -TEST_F(Layer_RNN_Test, BasicTest_2) +TEST_F(Layer_RNN_Test, get_set_test) { - inputs.push_back(Blob(BlobShape(1, 2, 3, Nx))); - inputs.push_back(Blob(BlobShape(1, 2, 3, Nh))); - allocateAndForward(); + inputs.push_back(Blob(BlobShape(nT, nS, 1, nX))); + runLayer(layer, inputs, outputs); EXPECT_EQ(outputs.size(), 2); - EXPECT_EQ(outputs[0].shape(), BlobShape(1, 2, 3, No)); - EXPECT_EQ(outputs[1].shape(), BlobShape(1, 2, 3, Nh)); + EXPECT_EQ(outputs[0].shape(), BlobShape(nT, nS, nO)); + EXPECT_EQ(outputs[1].shape(), BlobShape(nT, nS, nH)); } }