dnn: Allow LSTM layer to operate in reverse direction

This is useful for bidirectional LSTMs.
Andrew Ryrie 6 years ago
parent 3289a0aff9
commit b88435fdc2
  1. 15
  2. 49

@ -92,6 +92,7 @@ class LSTMLayerImpl CV_FINAL : public LSTMLayer
bool produceCellOutput;
float forgetBias, cellClip;
bool useCellClip, usePeephole;
bool reverse; // If true, go in negative direction along the time axis
@ -133,6 +134,7 @@ public:
cellClip = params.get<float>("cell_clip", 0.0f);
useCellClip = params.get<bool>("use_cell_clip", false);
usePeephole = params.get<bool>("use_peephole", false);
reverse = params.get<bool>("reverse", false);
allocated = false;
@ -288,7 +290,18 @@ public:
Mat hOutTs = output[0].reshape(1, numSamplesTotal);
Mat cOutTs = produceCellOutput ? output[1].reshape(1, numSamplesTotal) : Mat();
for (int ts = 0; ts < numTimeStamps; ts++)
int tsStart, tsEnd, tsInc;
if (reverse) {
tsStart = numTimeStamps - 1;
tsEnd = -1;
tsInc = -1;
else {
tsStart = 0;
tsEnd = numTimeStamps;
tsInc = 1;
for (int ts = tsStart; ts != tsEnd; ts += tsInc)
Range curRowRange(ts*numSamples, (ts + 1)*numSamples);
Mat xCurr = xTs.rowRange(curRowRange);

@ -489,6 +489,55 @@ TEST(Layer_RNN_Test_Accuracy_with_, CaffeRecurrent)
normAssert(h_ref, output[0]);
TEST(Layer_LSTM_Test_Accuracy_, Reverse)
// This handcrafted setup calculates (approximately) the prefix sum of the
// input, assuming the inputs are suitably small.
cv::Mat input(2, 1, CV_32FC1);
input.at<float>(0, 0) = 1e-5f;
input.at<float>(1, 0) = 2e-5f;
cv::Mat Wx(4, 1, CV_32FC1);
Wx.at<float>(0, 0) = 0.f; // Input gate
Wx.at<float>(1, 0) = 0.f; // Forget gate
Wx.at<float>(2, 0) = 0.f; // Output gate
Wx.at<float>(3, 0) = 1.f; // Update signal
cv::Mat Wh(4, 1, CV_32FC1);
Wh.at<float>(0, 0) = 0.f; // Input gate
Wh.at<float>(1, 0) = 0.f; // Forget gate
Wh.at<float>(2, 0) = 0.f; // Output gate
Wh.at<float>(3, 0) = 0.f; // Update signal
cv::Mat bias(4, 1, CV_32FC1);
bias.at<float>(0, 0) = 1e10f; // Input gate - always allows input to c
bias.at<float>(1, 0) = 1e10f; // Forget gate - never forget anything on c
bias.at<float>(2, 0) = 1e10f; // Output gate - always output everything
bias.at<float>(3, 0) = 0.f; // Update signal
LayerParams lp;
lp.set("reverse", true);
lp.set("use_timestamp_dim", true);
cv::Ptr<cv::dnn::LSTMLayer> layer = LSTMLayer::create(lp);
std::vector<cv::Mat> outputs;
std::vector<cv::Mat> inputs;
runLayer(layer, inputs, outputs);
ASSERT_EQ(1, outputs.size());
cv::Mat out = outputs[0];
ASSERT_EQ(3, out.dims);
ASSERT_EQ(shape(2, 1, 1), shape(out));
float* data = reinterpret_cast<float*>(out.data);
EXPECT_NEAR(std::tanh(1e-5f) + std::tanh(2e-5f), data[0], 1e-10);
EXPECT_NEAR(std::tanh(2e-5f), data[1], 1e-10);
class Layer_RNN_Test : public ::testing::Test
