diff --git a/modules/core/include/opencv2/core/matx.hpp b/modules/core/include/opencv2/core/matx.hpp index 3c92e3a21d..162ce6e7f8 100644 --- a/modules/core/include/opencv2/core/matx.hpp +++ b/modules/core/include/opencv2/core/matx.hpp @@ -372,6 +372,14 @@ public: Vec(const Vec<_Tp, cn>& v); static Vec all(_Tp alpha); + static Vec ones(); + static Vec randn(_Tp a, _Tp b); + static Vec randu(_Tp a, _Tp b); + static Vec zeros(); +#ifdef CV_CXX11 + static Vec diag(_Tp alpha) = delete; + static Vec eye() = delete; +#endif //! per-element multiplication Vec mul(const Vec<_Tp, cn>& v) const; @@ -1053,6 +1061,18 @@ Vec<_Tp, cn> Vec<_Tp, cn>::all(_Tp alpha) return v; } +template inline +Vec<_Tp, cn> Vec<_Tp, cn>::ones() +{ + return Vec::all(1); +} + +template inline +Vec<_Tp, cn> Vec<_Tp, cn>::zeros() +{ + return Vec::all(0); +} + template inline Vec<_Tp, cn> Vec<_Tp, cn>::mul(const Vec<_Tp, cn>& v) const { diff --git a/modules/core/include/opencv2/core/operations.hpp b/modules/core/include/opencv2/core/operations.hpp index bde28c49b2..43a9eb8603 100644 --- a/modules/core/include/opencv2/core/operations.hpp +++ b/modules/core/include/opencv2/core/operations.hpp @@ -230,6 +230,22 @@ Matx<_Tp,m,n> Matx<_Tp,m,n>::randn(_Tp a, _Tp b) return M; } +template inline +Vec<_Tp, cn> Vec<_Tp, cn>::randu(_Tp a, _Tp b) +{ + Vec<_Tp,cn> V; + cv::randu(V, Scalar(a), Scalar(b)); + return V; +} + +template inline +Vec<_Tp, cn> Vec<_Tp, cn>::randn(_Tp a, _Tp b) +{ + Vec<_Tp,cn> V; + cv::randn(V, Scalar(a), Scalar(b)); + return V; +} + template inline Matx<_Tp, n, m> Matx<_Tp, m, n>::inv(int method, bool *p_is_ok /*= NULL*/) const { diff --git a/modules/core/include/opencv2/core/vsx_utils.hpp b/modules/core/include/opencv2/core/vsx_utils.hpp index 5cbc066784..79a1074d59 100644 --- a/modules/core/include/opencv2/core/vsx_utils.hpp +++ b/modules/core/include/opencv2/core/vsx_utils.hpp @@ -324,6 +324,7 @@ VSX_IMPL_1RG(vec_udword2, vec_float4, xvcvspuxds, vec_ctulo) #define VSX_IMPL_CONVERT(rt, rg, fnm) \ VSX_FINLINE(rt) fnm(const rg& a) { return __builtin_convertvector(a, rt); } +#ifndef vec_permi #if __clang_major__ < 5 // implement vec_permi in a dirty way # define VSX_IMPL_CLANG_4_PERMI(Tvec) \ @@ -351,12 +352,14 @@ VSX_FINLINE(rt) fnm(const rg& a) { return __builtin_convertvector(a, rt); } // vec_xxpermdi is missing little-endian supports in clang 4 just like gcc4 # define vec_permi(a, b, c) vec_xxpermdi(b, a, (3 ^ (((c) & 1) << 1 | (c) >> 1))) #endif // __clang_major__ < 5 +#endif // shift left double by word immediate #ifndef vec_sldw # define vec_sldw vec_xxsldwi #endif +#if __clang_major__ < 13 // Implement vec_rsqrt since clang only supports vec_rsqrte #ifndef vec_rsqrt VSX_FINLINE(vec_float4) vec_rsqrt(const vec_float4& a) @@ -380,6 +383,7 @@ VSX_FINLINE(vec_udword2) vec_promote(unsigned long long a, int b) ret[b & 1] = a; return ret; } +#endif // vec_popcnt should return unsigned but clang has different thought just like gcc in vec_vpopcnt #define VSX_IMPL_POPCNTU(Tvec, Tvec2, ucast) \ diff --git a/modules/core/src/mathfuncs.cpp b/modules/core/src/mathfuncs.cpp index 8e9a07da19..056be63a71 100644 --- a/modules/core/src/mathfuncs.cpp +++ b/modules/core/src/mathfuncs.cpp @@ -270,6 +270,9 @@ void cartToPolar( InputArray src1, InputArray src2, { CV_INSTRUMENT_REGION(); + CV_Assert(src1.getObj() != dst1.getObj() && src1.getObj() != dst2.getObj() && + src2.getObj() != dst1.getObj() && src2.getObj() != dst2.getObj()); + CV_OCL_RUN(dst1.isUMat() && dst2.isUMat(), ocl_cartToPolar(src1, src2, dst1, dst2, angleInDegrees)) @@ -564,6 +567,9 @@ void polarToCart( InputArray src1, InputArray src2, { CV_INSTRUMENT_REGION(); + CV_Assert(src1.getObj() != dst1.getObj() && src1.getObj() != dst2.getObj() && + src2.getObj() != dst1.getObj() && src2.getObj() != dst2.getObj()); + int type = src2.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); CV_Assert((depth == CV_32F || depth == CV_64F) && (src1.empty() || src1.type() == type)); diff --git a/modules/core/test/test_arithm.cpp b/modules/core/test/test_arithm.cpp index 06d295f694..6d50b5a8f7 100644 --- a/modules/core/test/test_arithm.cpp +++ b/modules/core/test/test_arithm.cpp @@ -2620,5 +2620,36 @@ TEST(Core_Magnitude, regression_19506) } } +TEST(Core_CartPolar, inplace) +{ + RNG& rng = TS::ptr()->get_rng(); + cv::Mat1d A[2] = {cv::Mat1d(10, 10), cv::Mat1d(10, 10)}; + cv::Mat1d B[2], C[2]; + cv::UMat uA[2]; + + for(int i = 0; i < 2; ++i) + { + cvtest::randUni(rng, A[i], Scalar::all(-1000), Scalar::all(1000)); + A[i].copyTo(uA[i]); + } + + // Reverse + cv::cartToPolar(A[0], A[1], B[0], B[1], false); + cv::polarToCart(B[0], B[1], C[0], C[1], false); + EXPECT_MAT_NEAR(A[0], C[0], 2); + EXPECT_MAT_NEAR(A[1], C[1], 2); + + // Inplace + EXPECT_THROW(cv::polarToCart(B[0], B[1], B[0], B[1], false), cv::Exception); + EXPECT_THROW(cv::polarToCart(B[0], B[1], B[1], B[0], false), cv::Exception); + EXPECT_THROW(cv::cartToPolar(A[0], A[1], A[0], A[1], false), cv::Exception); + EXPECT_THROW(cv::cartToPolar(A[0], A[1], A[1], A[0], false), cv::Exception); + // Inplace OCL + EXPECT_THROW(cv::polarToCart(uA[0], uA[1], uA[0], uA[1]), cv::Exception); + EXPECT_THROW(cv::polarToCart(uA[0], uA[1], uA[1], uA[0]), cv::Exception); + EXPECT_THROW(cv::cartToPolar(uA[0], uA[1], uA[0], uA[1]), cv::Exception); + EXPECT_THROW(cv::cartToPolar(uA[0], uA[1], uA[0], uA[1]), cv::Exception); + +} }} // namespace diff --git a/modules/core/test/test_mat.cpp b/modules/core/test/test_mat.cpp index e32d9782a4..6c885a4dce 100644 --- a/modules/core/test/test_mat.cpp +++ b/modules/core/test/test_mat.cpp @@ -2371,6 +2371,18 @@ TEST(Mat, ptrVecni_20044) EXPECT_EQ(int(6), *(ci)); } + +TEST(Mat, VecMatx_4650) +{ + // Makes sure the following compiles. + cv::Vec3b a; + a = cv::Vec3b::ones(); + a = cv::Vec3b::zeros(); + a = cv::Vec3b::randn(0, 10); + a = cv::Vec3b::randu(0, 10); +} + + TEST(Mat, reverse_iterator_19967) { // empty iterator (#16855) @@ -2449,4 +2461,5 @@ TEST(Mat, reverse_iterator_19967) } + }} // namespace diff --git a/modules/dnn/src/layers/recurrent_layers.cpp b/modules/dnn/src/layers/recurrent_layers.cpp index dbb3e2700a..19e32e2b61 100644 --- a/modules/dnn/src/layers/recurrent_layers.cpp +++ b/modules/dnn/src/layers/recurrent_layers.cpp @@ -103,7 +103,7 @@ static ActivationFunction get_activation_function(const String& activation) { class LSTMLayerImpl CV_FINAL : public LSTMLayer { - int numTimeStamps, numSamples; + int numTimeStamps, numSamples, numHidden; bool allocated; MatShape outTailShape; //shape of single output sample @@ -127,6 +127,10 @@ class LSTMLayerImpl CV_FINAL : public LSTMLayer bool useAVX2; #endif + // CUDA needs input blobs to be rearranged in a specific way, but some transformations + // in ONNXImporter are destructive, so we keep a copy. + std::vector originalBlobs; + public: LSTMLayerImpl(const LayerParams& params) @@ -140,6 +144,13 @@ public: { setParamsFrom(params); + if (params.get("is_onnx", false)) + { + // collect copies of onnx blobs + originalBlobs.insert(originalBlobs.begin(), blobs.begin(), blobs.begin() + 3); + blobs.erase(blobs.begin(), blobs.begin() + 3); + } + bidirectional = params.get("bidirectional", false); if (!blobs.empty()) { @@ -181,6 +192,7 @@ public: useCellClip = params.get("use_cell_clip", false); usePeephole = params.get("use_peephole", false); reverse = params.get("reverse", false); + numHidden = params.get("hidden_size", 1); CV_Assert(!reverse || !bidirectional); // read activations @@ -269,8 +281,21 @@ public: outResShape.insert(outResShape.end(), outTailShape_.begin(), outTailShape_.end()); outResShape.back() *= (1 + static_cast(bidirectional)); - size_t noutputs = produceCellOutput ? 2 : 1; - outputs.assign(noutputs, outResShape); + outputs.assign(1, outResShape); + if (produceCellOutput) + { + // the producer is ONNX, so CellState is different + if (!originalBlobs.empty()) + { + int shp[] = {(1 + static_cast(bidirectional)), _numSamples, numHidden}; + MatShape newShape(shp, shp + sizeof(shp)/sizeof(shp[0])); + outputs.push_back(newShape); + } + else + { + outputs.push_back(outResShape); + } + } internals.assign(1, shape(_numSamples, _numOut)); // hInternal internals.push_back(shape(_numSamples, _numOut)); // cInternal @@ -335,14 +360,39 @@ public: outputs_arr.getMatVector(output); internals_arr.getMatVector(internals); + Mat cOut = produceCellOutput ? output[0].clone() : Mat(); + const bool needYcTransform = !originalBlobs.empty(); // if the producer is onnx const int numDirs = 1 + static_cast(bidirectional); for (int i = 0; i < numDirs; ++i) { - const Mat &Wh = blobs[0].rowRange(i * blobs[0].rows / numDirs, (i + 1) * blobs[0].rows / numDirs); - const Mat &Wx = blobs[1].rowRange(i * blobs[1].rows / numDirs, (i + 1) * blobs[1].rows / numDirs); - const Mat &bias = blobs[2].colRange(i * blobs[2].cols / numDirs, (i + 1) * blobs[2].cols / numDirs); - const Mat &h_0 = blobs[3].rowRange(i * blobs[3].rows / numDirs, (i + 1) * blobs[3].rows / numDirs); - const Mat &c_0 = blobs[4].rowRange(i * blobs[4].rows / numDirs, (i + 1) * blobs[4].rows / numDirs); + Mat Wh = blobs[0]; + Mat Wx = blobs[1]; + Mat bias = blobs[2]; + Mat h_0 = blobs[3]; + Mat c_0 = blobs[4]; + Mat pI, pF, pO; + + Wh = Wh.rowRange(i * Wh.rows / numDirs, (i + 1) * Wh.rows / numDirs); + Wx = Wx.rowRange(i * Wx.rows / numDirs, (i + 1) * Wx.rows / numDirs); + bias = bias.colRange(i * bias.cols / numDirs, (i + 1) * bias.cols / numDirs); + h_0 = h_0.rowRange(i * h_0.rows / numDirs, (i + 1) * h_0.rows / numDirs); + c_0 = c_0.rowRange(i * c_0.rows / numDirs, (i + 1) * c_0.rows / numDirs); + + if (usePeephole) + { + pI = blobs[5]; + pF = blobs[6]; + pO = blobs[7]; + + pI = pI.rowRange(i * pI.rows / numDirs, (i + 1) * pI.rows / numDirs); + pI = pI.colRange(i * pI.cols / numDirs, (i + 1) * pI.cols / numDirs); + + pF = pF.rowRange(i * pF.rows / numDirs, (i + 1) * pF.rows / numDirs); + pF = pF.colRange(i * pF.cols / numDirs, (i + 1) * pF.cols / numDirs); + + pO = pO.rowRange(i * pO.rows / numDirs, (i + 1) * pO.rows / numDirs); + pO = pO.colRange(i * pO.cols / numDirs, (i + 1) * pO.cols / numDirs); + } int numOut = Wh.size[1]; Mat hInternal = internals[0], cInternal = internals[1], @@ -356,7 +406,12 @@ public: Mat hOutTs = output[0].reshape(1, numSamplesTotal); hOutTs = hOutTs.colRange(i * hOutTs.cols / numDirs, (i + 1) * hOutTs.cols / numDirs); - Mat cOutTs = produceCellOutput ? output[1].reshape(1, numSamplesTotal) : Mat(); + Mat cOutTs; + if (produceCellOutput) + { + cOutTs = cOut.reshape(1, numSamplesTotal); + cOutTs = cOutTs.colRange(i * cOutTs.cols / numDirs, (i + 1) * cOutTs.cols / numDirs); + } #if CV_TRY_AVX2 || CV_TRY_AVX bool canUseAvx = gates.isContinuous() && bias.isContinuous() @@ -471,8 +526,8 @@ public: if (usePeephole) { Mat gatesIF = gates.colRange(0, 2*numOut); - gemm(cInternal, blobs[5], 1, gateI, 1, gateI); - gemm(cInternal, blobs[6], 1, gateF, 1, gateF); + gemm(cInternal, pI, 1, gateI, 1, gateI); + gemm(cInternal, pF, 1, gateF, 1, gateF); f_activation(gatesIF, gatesIF); } else @@ -495,7 +550,7 @@ public: } if (usePeephole) { - gemm(cInternal, blobs[7], 1, gateO, 1, gateO); + gemm(cInternal, pO, 1, gateO, 1, gateO); f_activation(gateO, gateO); } @@ -509,6 +564,78 @@ public: cInternal.copyTo(cOutTs.rowRange(curRowRange)); } } + + if (needYcTransform && produceCellOutput) + { + fixCellState(cOut, numDirs); + } + if (produceCellOutput) + { + cOut.copyTo(output[1]); + } + } + + void fixCellState(Mat& cOut, int numDirs) + { + // seq, batch, dirs, hidden + int shp[] = {0, numSamples, numDirs, numHidden}; + cOut = cOut.reshape(1, sizeof(shp)/sizeof(shp[0]), shp); + + // permute to {0, 2, 1, 3}; + std::vector newShape = shape(cOut); + std::swap(newShape[1], newShape[2]); + cv::Mat newCellState(newShape, CV_32FC1); + const float* src = cOut.ptr(); + float* dst = newCellState.ptr(); + size_t sj = newCellState.size[3]; + size_t sk = newCellState.size[2] * sj; + size_t si = newCellState.size[1] * sk; + for (size_t i = 0; i < newCellState.size[0]; i++) + { + for (size_t j = 0; j < newCellState.size[2]; j++) + { + for (size_t k = 0; k < newCellState.size[1]; k++) + { + std::memcpy(dst, src, sizeof(float) * newCellState.size[3]); + src += cOut.size[3]; + dst += sk; + } + dst = dst + sj - si; + } + dst = dst + si - sk; + } + + cOut = newCellState; + + if (numDirs == 1) + { + // Slice: Yh = Y[-1, :, :, :] + Range ranges[] = {cv::Range(cOut.size[0] - 1, cOut.size[0]), cv::Range::all(), cv::Range::all(), cv::Range::all()}; + cOut = cOut(ranges); + // Reshape: 1x1xBxH -> 1xBxH + int shp[] = {1, numSamples, numHidden}; + cOut = cOut.reshape(1, sizeof(shp)/sizeof(shp[0]), shp); + } + else + { + // Slice: SxDxBxH -> last sequence, first direction + Range ranges1[] = {cv::Range(cOut.size[0] - 1, cOut.size[0]), cv::Range(0, 1), cv::Range::all(), cv::Range::all()}; + Mat part1 = cOut(ranges1); + + // Slice: SxDxBxH -> first sequence, last direction + Range ranges2[] = {cv::Range(0, 1), cv::Range(cOut.size[1] - 1, cOut.size[1]), cv::Range::all(), cv::Range::all()}; + Mat part2 = cOut(ranges2); + + int shp[] = {1, part1.size[2] * part1.size[3]}; + part1 = part1.reshape(1, sizeof(shp)/sizeof(shp[0]), shp); + part2 = part2.reshape(1, sizeof(shp)/sizeof(shp[0]), shp); + + vconcat(part1, part2, cOut); + + // Reshape: 1x2xBxH -> 2xBxH + int finalShape[] = {2, numSamples, numHidden}; + cOut = cOut.reshape(1, sizeof(finalShape)/sizeof(finalShape[0]), finalShape); + } } }; diff --git a/modules/dnn/src/net_impl.cpp b/modules/dnn/src/net_impl.cpp index 24f9c8cb2c..315de13213 100644 --- a/modules/dnn/src/net_impl.cpp +++ b/modules/dnn/src/net_impl.cpp @@ -1130,8 +1130,12 @@ void Net::Impl::getLayerShapesRecursively(int id, LayersShapesMap& inOutShapes) if (it == inOutShapes.end() || it->second.out.empty()) { getLayerShapesRecursively(layerId, inOutShapes); + it = inOutShapes.find(layerId); + CV_Assert(it != inOutShapes.end()); } - const MatShape& shape = inOutShapes[layerId].out[inputLayerIds[i].oid]; + const int out_port = inputLayerIds[i].oid; + CV_CheckLT(out_port, (int)it->second.out.size(), ""); + const MatShape& shape = it->second.out[out_port]; layerShapes.in.push_back(shape); } } diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 5713c025be..b43bb5a390 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -79,6 +79,14 @@ class ONNXImporter void expandMid(const std::string& prefix, opencv_onnx::NodeProto& node_proto, const std::string& input, size_t n); void addNegation(const LayerParams& layerParams, opencv_onnx::NodeProto& node_proto, int input_id); + void lstm_extractConsts(LayerParams& layerParams, const opencv_onnx::NodeProto& lstm_proto, size_t idx, int* blobShape_, int size); + void lstm_add_reshape(const std::string& input_name, const std::string& output_name, int* layerShape, size_t n); + std::string lstm_add_slice(int index, const std::string& input_name, int* begin, int* end, size_t n); + std::string lstm_fix_dims(LayerParams& layerParams, const opencv_onnx::NodeProto& lstm_proto, + int batch_size, int num_directions, int hidden_size, bool need_y, const std::string& y_name, + const int index); + void lstm_add_transform(int num_directions, int batch_size, int hidden_size, + int index, const std::string& input_name, const std::string& output_name); public: ONNXImporter(Net& net, const char *onnxFile); ONNXImporter(Net& net, const char* buffer, size_t sizeBuffer); @@ -1555,45 +1563,24 @@ void ONNXImporter::parseConstant(LayerParams& layerParams, const opencv_onnx::No addConstant(node_proto.output(0), layerParams.blobs[0]); } -void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) +void transformBlobs(std::vector& blobs) { - opencv_onnx::NodeProto node_proto = node_proto_; - const std::string output_name = node_proto.output(0); - LayerParams lstmParams = layerParams; - lstmParams.name += "/lstm"; - - // https://pytorch.org/docs/stable/nn.html#lstm - CV_Assert(node_proto.input_size() >= 7); - Mat Wx = getBlob(node_proto, 1); - Mat Wh = getBlob(node_proto, 2); - Mat b = getBlob(node_proto, 3); + Mat Wx = blobs[0]; + Mat Wh = blobs[1]; + Mat b = blobs[2]; + std::vector cudaWorkaround; + cudaWorkaround.push_back(Wx.clone()); + cudaWorkaround.push_back(Wh.clone()); + cudaWorkaround.push_back(b.clone()); - const int numHidden = lstmParams.get("hidden_size"); + const int numHidden = Wh.size[2]; const int numDirs = Wx.size[0]; // Is 1 for forward only and 2 for bidirectional LSTM. const int numFeatures = Wx.size[2]; - // Following checks are deduced from the IFGO->IGFO loop below - // Wx is numDirs X numHidden*3 X numFeatures - // Wh is numDirs X numHidden*3 X numHidden - CV_CheckLE(numHidden * 3, Wx.size[1], "Wx should have beat least 3x hidden_size in dimension 1"); - CV_CheckLE(numHidden * 3, Wh.size[1], "Wh should have be at least 3x hidden_size in dimension 1"); - CV_CheckLE(numHidden, Wh.size[2], "Wh should have be at least hidden_size in dimension 2"); - - Mat h0, c0; - if (!node_proto.input(5).empty()) { - h0 = getBlob(node_proto, 5); - h0 = h0.reshape(1, h0.size[0] * h0.size[1]); - } else { - // initial_h attribute can be empty in case of keras2onnx producer. fill it with zeros - h0 = Mat::zeros(numDirs * numFeatures, numHidden, CV_32FC1); - } - if (!node_proto.input(6).empty()) { - c0 = getBlob(node_proto, 6); - c0 = c0.reshape(1, c0.size[0] * c0.size[1]); - } else { - // initial_c attribute can be empty in case of keras2onnx producer. fill it with zeros - c0 = Mat::zeros(numDirs * numFeatures, numHidden, CV_32FC1); - } + Mat h0 = blobs[3]; + h0 = h0.reshape(1, h0.size[0] * h0.size[1]); + Mat c0 = blobs[4]; + c0 = c0.reshape(1, c0.size[0] * c0.size[1]); b = b.reshape(1, b.size[0]); Mat bx = b.colRange(0, b.cols / 2); @@ -1627,31 +1614,245 @@ void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodePr Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]); Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]); + blobs[0] = Wh; + blobs[1] = Wx; + blobs[2] = b.reshape(1, 1); + blobs[3] = h0; + blobs[4] = c0; - lstmParams.blobs.resize(5); - lstmParams.blobs[0] = Wh; - lstmParams.blobs[1] = Wx; - lstmParams.blobs[2] = b; - lstmParams.blobs[3] = h0; - lstmParams.blobs[4] = c0; + if (blobs.size() == 5) { + // so that future patch removing copies can leave all indexing as is + blobs.insert(blobs.begin(), cudaWorkaround.begin(), cudaWorkaround.end()); + return; + } - // read direction attribute - lstmParams.set("reverse", lstmParams.get("direction", "") == "reverse"); - lstmParams.set("bidirectional", lstmParams.get("direction", "") == "bidirectional"); + Mat P = blobs[5]; + blobs[5] = P.colRange(0, numHidden); + blobs[5] = blobs[5].clone().reshape(1, blobs[5].total()); // Single column. + blobs[5] = Mat::diag(blobs[5]); - node_proto.set_output(0, lstmParams.name); // set different name so output shapes will be registered on that name - addLayer(lstmParams, node_proto); + blobs.push_back(P.colRange(numHidden, 2 * numHidden)); + blobs[6] = blobs[6].clone().reshape(1, blobs[6].total()); // Single column. + blobs[6] = Mat::diag(blobs[6]); - MatShape lstmShape = outShapes[node_proto.output(0)]; + blobs.push_back(P.colRange(2 * numHidden, 3 * numHidden)); + blobs[7] = blobs[7].clone().reshape(1, blobs[7].total()); // Single column. + blobs[7] = Mat::diag(blobs[7]); - // Add fake 1 as it is done in ONNX - lstmShape.insert(lstmShape.begin() + 1, 1); + // so that future patch removing copies can leave all indexing as is + blobs.insert(blobs.begin(), cudaWorkaround.begin(), cudaWorkaround.end()); +} - layerParams.type = "Reshape"; - layerParams.set("dim", DictValue::arrayInt(&lstmShape[0], lstmShape.size())); - node_proto.set_input(0, lstmParams.name); // redirect input to LSTM - node_proto.set_output(0, output_name); // keep origin LSTM's name - addLayer(layerParams, node_proto); +void ONNXImporter::lstm_extractConsts(LayerParams& layerParams, const opencv_onnx::NodeProto& lstm_proto, size_t idx, int* blobShape_, int size) +{ + MatShape blobShape(blobShape_, blobShape_ + size); + Mat blob; + if (idx < lstm_proto.input_size() && !lstm_proto.input(idx).empty()) + { + blob = getBlob(lstm_proto, idx); + CV_Assert(shape(blob) == blobShape); + } + else + { + blob = Mat(blobShape, CV_32FC1, 0.); + } + layerParams.blobs.push_back(blob); +}; + +void ONNXImporter::lstm_add_reshape(const std::string& input_name, const std::string& output_name, int* layerShape, size_t n) +{ + LayerParams reshapeLp; + reshapeLp.name = cv::format("%s/reshape", input_name.c_str()); + reshapeLp.type = "Reshape"; + CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end()); + + reshapeLp.set("dim", DictValue::arrayInt(layerShape, n)); + + opencv_onnx::NodeProto reshape_proto; + reshape_proto.add_input(input_name); + reshape_proto.add_output(output_name); + addLayer(reshapeLp, reshape_proto); +}; + +std::string ONNXImporter::lstm_add_slice(int index, const std::string& input_name, int* begin, int* end, size_t n) +{ + LayerParams sliceLP; + sliceLP.name = cv::format("%s/slice_%d", input_name.c_str(), index); + sliceLP.type = "Slice"; + CV_Assert(layer_id.find(sliceLP.name) == layer_id.end()); + + sliceLP.set("begin", DictValue::arrayInt(begin, n)); + sliceLP.set("end", DictValue::arrayInt(end, n)); + sliceLP.set("axis", 0); + + opencv_onnx::NodeProto slice_proto; + slice_proto.add_input(input_name); + slice_proto.add_output(sliceLP.name); + addLayer(sliceLP, slice_proto); + + return slice_proto.output(0); +}; + +std::string ONNXImporter::lstm_fix_dims(LayerParams& layerParams, const opencv_onnx::NodeProto& lstm_proto, + int batch_size, int num_directions, int hidden_size, bool need_y, const std::string& y_name, + const int index) +{ + std::string reshape_output = cv::format("%s/reshape_%d", layerParams.name.c_str(), index); + + // reshape from Seq, Batch, Dirs*Hidden to Seq, Batch, Dirs, Hidden + // to not confuse reshape with dynamic first dimension, zero means 'leave unchanged' + int layerShape[] = {0, batch_size, num_directions, hidden_size}; + lstm_add_reshape(lstm_proto.output(index), reshape_output, layerShape, sizeof(layerShape) / sizeof(layerShape[0])); + + // permute from Seq, Batch, Dirs, Hidden to Seq, Dirs, Batch, Hidden + LayerParams permuteLP; + permuteLP.name = reshape_output + "/permute"; + permuteLP.type = "Permute"; + CV_Assert(layer_id.find(permuteLP.name) == layer_id.end()); + + int order[] = {0, 2, 1, 3}; + permuteLP.set("order", DictValue::arrayInt(order, 4)); + + opencv_onnx::NodeProto permute_proto; + permute_proto.add_input(reshape_output); + permute_proto.add_output((need_y && index == 0) ? y_name : static_cast(permuteLP.name)); + addLayer(permuteLP, permute_proto); + + return permute_proto.output(0); +}; + +void ONNXImporter::lstm_add_transform(int num_directions, int batch_size, int hidden_size, + int index, const std::string& input_name, const std::string& output_name) +{ + if (num_directions == 1) + { + // Slice: Yh = Y[-1, :, :, :] + int begin[] = {-1}, end[] = {INT_MAX}; + std::string slice_output = lstm_add_slice(index, input_name, begin, end, sizeof(begin) / sizeof(begin[0])); + + // Reshape: 1x1xBxH -> 1xBxH + int layerShape[] = {1, batch_size, hidden_size}; + lstm_add_reshape(slice_output, output_name, layerShape, sizeof(layerShape) / sizeof(layerShape[0])); + } + else + { + // Slice: SxDxBxH -> last sequence, first direction + int begin0[] = {-1, 0}, end0[] = {INT_MAX, 1}; + std::string slice_0 = lstm_add_slice(0, input_name, begin0, end0, sizeof(begin0) / sizeof(begin0[0])); + + // Slice: SxDxBxH -> first sequence, last direction + int begin1[] = {0, -1}, end1[] = {1, INT_MAX}; + std::string slice_1 = lstm_add_slice(1, input_name, begin1, end1, sizeof(begin1) / sizeof(begin1[0])); + + LayerParams concatLP; + concatLP.name = cv::format("%s/concat", input_name.c_str()); + concatLP.type = "Concat"; + CV_Assert(layer_id.find(concatLP.name) == layer_id.end()); + + concatLP.set("axis", 1); // 1x1xBxH -> 1x2xBxH + + opencv_onnx::NodeProto concat_proto; + concat_proto.add_input(slice_0); + concat_proto.add_input(slice_1); + concat_proto.add_output(concatLP.name); + addLayer(concatLP, concat_proto); + + // Reshape: 1x2xBxH -> 2xBxH + int layerShape[] = {2, batch_size, hidden_size}; + lstm_add_reshape(concat_proto.output(0), output_name, layerShape, sizeof(layerShape) / sizeof(layerShape[0])); + } +}; + +void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) +{ + opencv_onnx::NodeProto lstm_proto = node_proto_; + layerParams.name += "/lstm"; + + // https://github.com/onnx/onnx/blob/main/docs/Operators.md#LSTM + CV_Assert(lstm_proto.input_size() >= 3); + for (size_t i = 1; i < 3; ++i) + { + const std::string& name = lstm_proto.input(i); + CV_Assert(!name.empty() && constBlobs.count(name) == 1); + } + + IterShape_t shapeIt = outShapes.find(lstm_proto.input(0)); + CV_Assert(shapeIt != outShapes.end()); + const MatShape x_shape = shapeIt->second; + + const int seq_length = x_shape[0]; + const int batch_size = x_shape[1]; + const int input_size = x_shape[2]; + const int hidden_size = layerParams.get("hidden_size"); + const int num_directions = constBlobs[lstm_proto.input(1)].size[0]; + + int w_size[] = {num_directions, 4*hidden_size, input_size}; + lstm_extractConsts(layerParams, lstm_proto, 1, w_size, sizeof(w_size) / sizeof(w_size[0])); // W + + int r_size[] = {num_directions, 4*hidden_size, hidden_size}; + lstm_extractConsts(layerParams, lstm_proto, 2, r_size, sizeof(r_size) / sizeof(r_size[0])); // R + + int b_size[] = {num_directions, 8*hidden_size}; + lstm_extractConsts(layerParams, lstm_proto, 3, b_size, sizeof(b_size) / sizeof(b_size[0])); // B + + if (4 < lstm_proto.input_size() && !lstm_proto.input(4).empty()) + { + Mat blob = getBlob(lstm_proto, 4); + CV_Assert(blob.total() == batch_size); + for (MatIterator_ it = blob.begin(); it != blob.end(); ++it) + { + CV_Assert(*it == seq_length); + } + } + + int h_size[] = {num_directions, batch_size, hidden_size}; + lstm_extractConsts(layerParams, lstm_proto, 5, h_size, sizeof(h_size) / sizeof(h_size[0])); // initial_h + + int c_size[] = {num_directions, batch_size, hidden_size}; + lstm_extractConsts(layerParams, lstm_proto, 6, c_size, sizeof(c_size) / sizeof(c_size[0])); // initial_c + + if (lstm_proto.input_size() > 7 && !lstm_proto.input(7).empty()) + { + layerParams.set("use_peephole", true); + int p_size[] = {num_directions, 3 * hidden_size}; + lstm_extractConsts(layerParams, lstm_proto, 7, p_size, sizeof(p_size) / sizeof(p_size[0])); // P + } + + transformBlobs(layerParams.blobs); + + layerParams.set("is_onnx", true); + layerParams.set("reverse", layerParams.get("direction", "") == "reverse"); + layerParams.set("bidirectional", layerParams.get("direction", "") == "bidirectional"); + + bool need_yc = lstm_proto.output_size() > 2 && !lstm_proto.output(2).empty(); + bool need_yh = lstm_proto.output_size() > 1 && !lstm_proto.output(1).empty(); + bool need_y = lstm_proto.output_size() > 0 && !lstm_proto.output(0).empty(); + + const std::string y_name = need_y ? lstm_proto.output(0) : ""; + const std::string yh_name = need_yh ? lstm_proto.output(1) : ""; + const std::string yc_name = need_yc ? lstm_proto.output(2) : ""; + + layerParams.set("produce_cell_output", need_yc); + + lstm_proto.clear_output(); + if (need_y || need_yh) + { + // give random names to LSTMLayer's outputs because every output needs postprocessing + lstm_proto.add_output(cv::format("%s_y", layerParams.name.c_str())); + } + if (need_yc) + { + lstm_proto.add_output(yc_name); + } + + addLayer(layerParams, lstm_proto); + + std::string y_output = lstm_fix_dims(layerParams, lstm_proto, batch_size, num_directions, hidden_size, need_y, + y_name, 0); + if (need_yh) + { + lstm_add_transform(num_directions, batch_size, hidden_size, 0, y_output, yh_name); + } } void ONNXImporter::parseGRU(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 4918c72d10..be493c87d0 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -1032,12 +1032,14 @@ TEST_P(Test_ONNX_layers, LSTM_Activations) testONNXModels("lstm_cntk_tanh", pb, 0, 0, false, false); } -TEST_P(Test_ONNX_layers, LSTM) +// disabled due to poor handling of 1-d mats +TEST_P(Test_ONNX_layers, DISABLED_LSTM) { testONNXModels("lstm", npy, 0, 0, false, false); } -TEST_P(Test_ONNX_layers, LSTM_bidirectional) +// disabled due to poor handling of 1-d mats +TEST_P(Test_ONNX_layers, DISABLED_LSTM_bidirectional) { testONNXModels("lstm_bidirectional", npy, 0, 0, false, false); } @@ -1062,6 +1064,13 @@ TEST_P(Test_ONNX_layers, GRU_bidirectional) testONNXModels("gru_bi", npy, 0, 0, false, false); } +TEST_P(Test_ONNX_layers, LSTM_cell) +{ + testONNXModels("lstm_cell_forward", npy, 0, 0, false, false); + testONNXModels("lstm_cell_bidirectional", npy, 0, 0, false, false); + testONNXModels("lstm_cell_with_peepholes", npy, 0, 0, false, false); +} + TEST_P(Test_ONNX_layers, Pad2d_Unfused) { testONNXModels("ReflectionPad2d"); diff --git a/modules/ts/include/opencv2/ts/ocl_test.hpp b/modules/ts/include/opencv2/ts/ocl_test.hpp index 11572e9f48..717eb7b14c 100644 --- a/modules/ts/include/opencv2/ts/ocl_test.hpp +++ b/modules/ts/include/opencv2/ts/ocl_test.hpp @@ -89,7 +89,7 @@ extern int test_loop_times; #define EXPECT_MAT_NORM(mat, eps) \ do \ { \ - EXPECT_LE(TestUtils::checkNorm1(mat), eps) \ + EXPECT_LE(cvtest::ocl::TestUtils::checkNorm1(mat), eps) \ } while ((void)0, 0) #undef EXPECT_MAT_NEAR @@ -98,7 +98,7 @@ do \ { \ ASSERT_EQ(mat1.type(), mat2.type()); \ ASSERT_EQ(mat1.size(), mat2.size()); \ - EXPECT_LE(TestUtils::checkNorm2(mat1, mat2), eps) \ + EXPECT_LE(cvtest::ocl::TestUtils::checkNorm2(mat1, mat2), eps) \ << "Size: " << mat1.size() << std::endl; \ } while ((void)0, 0) @@ -107,7 +107,7 @@ do \ { \ ASSERT_EQ((mat1).type(), (mat2).type()); \ ASSERT_EQ((mat1).size(), (mat2).size()); \ - EXPECT_LE(TestUtils::checkNormRelative((mat1), (mat2)), eps) \ + EXPECT_LE(cvtest::ocl::TestUtils::checkNormRelative((mat1), (mat2)), eps) \ << "Size: " << (mat1).size() << std::endl; \ } while ((void)0, 0) @@ -146,7 +146,7 @@ do \ { \ ASSERT_EQ(name ## _roi.type(), u ## name ## _roi.type()); \ ASSERT_EQ(name ## _roi.size(), u ## name ## _roi.size()); \ - EXPECT_LE(TestUtils::checkNorm2(name ## _roi, u ## name ## _roi), eps) \ + EXPECT_LE(cvtest::ocl::TestUtils::checkNorm2(name ## _roi, u ## name ## _roi), eps) \ << "Size: " << name ## _roi.size() << std::endl; \ Point _offset; \ Size _wholeSize; \ @@ -155,7 +155,7 @@ do \ _mask(Rect(_offset, name ## _roi.size())).setTo(Scalar::all(0)); \ ASSERT_EQ(name.type(), u ## name.type()); \ ASSERT_EQ(name.size(), u ## name.size()); \ - EXPECT_LE(TestUtils::checkNorm2(name, u ## name, _mask), eps) \ + EXPECT_LE(cvtest::ocl::TestUtils::checkNorm2(name, u ## name, _mask), eps) \ << "Size: " << name ## _roi.size() << std::endl; \ } while ((void)0, 0) @@ -183,7 +183,7 @@ do \ { \ ASSERT_EQ(name ## _roi.type(), u ## name ## _roi.type()); \ ASSERT_EQ(name ## _roi.size(), u ## name ## _roi.size()); \ - EXPECT_LE(TestUtils::checkNormRelativeSparse(name ## _roi, u ## name ## _roi), eps) \ + EXPECT_LE(cvtest::ocl::TestUtils::checkNormRelativeSparse(name ## _roi, u ## name ## _roi), eps) \ << "Size: " << name ## _roi.size() << std::endl; \ Point _offset; \ Size _wholeSize; \ @@ -192,7 +192,7 @@ do \ _mask(Rect(_offset, name ## _roi.size())).setTo(Scalar::all(0)); \ ASSERT_EQ(name.type(), u ## name.type()); \ ASSERT_EQ(name.size(), u ## name.size()); \ - EXPECT_LE(TestUtils::checkNormRelativeSparse(name, u ## name, _mask), eps) \ + EXPECT_LE(cvtest::ocl::TestUtils::checkNormRelativeSparse(name, u ## name, _mask), eps) \ << "Size: " << name ## _roi.size() << std::endl; \ } while ((void)0, 0)