diff --git a/modules/dnn/src/layers/fully_connected_layer.cpp b/modules/dnn/src/layers/fully_connected_layer.cpp index 2114d42e5b..3f3b62b106 100644 --- a/modules/dnn/src/layers/fully_connected_layer.cpp +++ b/modules/dnn/src/layers/fully_connected_layer.cpp @@ -75,32 +75,34 @@ public: FullyConnectedLayerImpl(const LayerParams& params) { setParamsFrom(params); - CV_Assert(1 <= blobs.size() && blobs.size() <= 2); - - int numOutput = params.get("num_output"); - int innerSize = (int)blobs[0].total() / numOutput; bias = params.get("bias_term", true); axis = params.get("axis", 1); + if (!blobs.empty()) + { + CV_Assert(1 <= blobs.size() && blobs.size() <= 2); + int numOutput = params.get("num_output"); + int innerSize = (int)blobs[0].total() / numOutput; - CV_Assert(blobs[0].dims >= 2 && (size_t)(innerSize * numOutput) == blobs[0].total()); - CV_Assert(!bias || (blobs.size() == 2 && (size_t)numOutput == blobs[1].total())); + CV_Assert(blobs[0].dims >= 2 && (size_t)(innerSize * numOutput) == blobs[0].total()); + CV_Assert(!bias || (blobs.size() == 2 && (size_t)numOutput == blobs[1].total())); - weightsMat = blobs[0] = blobs[0].reshape(1, numOutput); - int vecsize = weightsMat.cols; - if( vecsize % VEC_ALIGN != 0 ) - { - int vecsize_aligned = (int)alignSize(vecsize, VEC_ALIGN); - Mat weightsBuf(weightsMat.rows, vecsize_aligned, weightsMat.type()); - Mat wpadding = weightsBuf.colRange(vecsize, vecsize_aligned); - wpadding.setTo(Scalar::all(0.)); - weightsMat = weightsBuf.colRange(0, vecsize); - blobs[0].copyTo(weightsMat); - } + weightsMat = blobs[0] = blobs[0].reshape(1, numOutput); + int vecsize = weightsMat.cols; + if (vecsize % VEC_ALIGN != 0) + { + int vecsize_aligned = (int)alignSize(vecsize, VEC_ALIGN); + Mat weightsBuf(weightsMat.rows, vecsize_aligned, weightsMat.type()); + Mat wpadding = weightsBuf.colRange(vecsize, vecsize_aligned); + wpadding.setTo(Scalar::all(0.)); + weightsMat = weightsBuf.colRange(0, vecsize); + blobs[0].copyTo(weightsMat); + } - if (bias) - biasMat = blobs[1] = blobs[1].reshape(1, 1); - else - biasMat = Mat::zeros(1, numOutput, weightsMat.type()); + if (bias) + biasMat = blobs[1] = blobs[1].reshape(1, 1); + else + biasMat = Mat::zeros(1, numOutput, weightsMat.type()); + } } bool getMemoryShapes(const std::vector &inputs, @@ -108,20 +110,35 @@ public: std::vector &outputs, std::vector &) const CV_OVERRIDE { - CV_Assert(inputs.size() == 1); - CV_Assert(1 <= blobs.size() && blobs.size() <= 2); - CV_Assert(blobs[0].dims == 2); + int numOutput, cAxis; + if (blobs.empty()) + { + CV_CheckEQ(inputs.size(), (size_t)2, ""); + numOutput = inputs[1].back(); + cAxis = inputs[0].size() - 1; + CV_CheckEQ(numOutput, inputs[0][cAxis - 1], ""); + int dims = inputs[0].size(); + CV_CheckEQ(inputs[1].size(), (size_t)dims, ""); + CV_CheckGE(dims, 2, ""); + for (int i = 0; i < dims - 2; i++) + CV_CheckEQ(inputs[0][i], inputs[1][i], ""); + CV_CheckEQ(inputs[0].back(), inputs[1][dims - 2], ""); + } + else + { + CV_CheckEQ(inputs.size(), (size_t)1, ""); + CV_CheckEQ(blobs[0].dims, 2, ""); + numOutput = blobs[0].size[0]; + CV_Assert(!bias || (size_t)numOutput == blobs[1].total()); + cAxis = clamp(axis, inputs[0]); + } - int cAxis = clamp(axis, inputs[0]); - int numOutput = blobs[0].size[0]; MatShape outShape(cAxis + 1); for (int i = 0; i < cAxis; ++i) outShape[i] = inputs[0][i]; outShape.back() = numOutput; - outputs.resize(inputs.size(), outShape); - - CV_Assert(!bias || (size_t)numOutput == blobs[1].total()); + outputs.resize(1, outShape); return false; } @@ -129,7 +146,8 @@ public: { return backendId == DNN_BACKEND_OPENCV || (backendId == DNN_BACKEND_HALIDE && haveHalide() && axis == 1) || - ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine() && axis == 1); + (((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && !blobs.empty()) || + backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && axis == 1); } virtual bool setActivation(const Ptr& layer) CV_OVERRIDE @@ -288,6 +306,51 @@ public: inps.getUMatVector(inputs); outs.getUMatVector(outputs); + if (inputs.size() == 2) + { + int dims = outputs[0].dims; + int m = inputs[0].size[dims - 2]; + int n = inputs[0].size[dims - 1]; + int k = inputs[1].size[dims - 1]; + int rows = inputs[0].total() / (m * n); + + MatShape sh_A = shape(rows, m * n); + MatShape sh_B = shape(rows, n * k); + MatShape sh_C = shape(rows, m * k); + UMat inp = inputs[0].reshape(1, sh_A.size(), &sh_A[0]); + UMat weight = inputs[1].reshape(1, sh_B.size(), &sh_B[0]); + UMat out = outputs[0].reshape(1, sh_C.size(), &sh_C[0]); + + UMat A, B, C, A_fp32, B_fp32, C_fp32; + for (int i = 0; i < rows; ++i) + { + A = inp.row(i).reshape(1, m); + B = weight.row(i).reshape(1, n); + C = out.row(i).reshape(1, m); + + if (use_half) + { + convertFp16(A, A_fp32); + convertFp16(B, B_fp32); + convertFp16(C, C_fp32); + } + else + { + A_fp32 = A; + B_fp32 = B; + C_fp32 = C; + } + cv::gemm(A_fp32, B_fp32, 1, noArray(), 0, C_fp32); + if (use_half) + { + convertFp16(A_fp32, A); + convertFp16(B_fp32, B); + convertFp16(C_fp32, C); + } + } + return true; + } + int axisCan = clamp(axis, inputs[0].dims); int numOutput = blobs[0].size[0]; int innerSize = blobs[0].size[1]; @@ -407,16 +470,42 @@ public: inputs_arr.getMatVector(input); outputs_arr.getMatVector(output); - int axisCan = clamp(axis, input[0].dims); - int outerSize = input[0].total(0, axisCan); - - for (size_t i = 0; i < input.size(); i++) + if (!blobs.empty()) { - Mat srcMat = input[i].reshape(1, outerSize); - Mat dstMat = output[i].reshape(1, outerSize); + int axisCan = clamp(axis, input[0].dims); + int outerSize = input[0].total(0, axisCan); - const int nstripes = getNumThreads(); - FullyConnected::run(srcMat, weightsMat, biasMat, dstMat, activ.get(), nstripes); + for (size_t i = 0; i < input.size(); i++) + { + Mat srcMat = input[i].reshape(1, outerSize); + Mat dstMat = output[i].reshape(1, outerSize); + + const int nstripes = getNumThreads(); + FullyConnected::run(srcMat, weightsMat, biasMat, dstMat, activ.get(), nstripes); + } + } + else + { + float* inpData = input[0].ptr(); + float* weightData = input[1].ptr(); + float* outData = output[0].ptr(); + + int dims = output[0].dims; + int numSlice = output[0].total() / output[0].total(dims - 2); + int m = input[0].size[dims - 2]; + int n = input[0].size[dims - 1]; + int k = input[1].size[dims - 1]; + for (int i = 0; i < numSlice; i++) + { + Mat inpSlice(m, n, CV_32F, inpData); + Mat weightSlice(n, k, CV_32F, weightData); + Mat outSlice(m, k, CV_32F, outData); + + outSlice = inpSlice * weightSlice; + inpData += inpSlice.total(); + weightData += weightSlice.total(); + outData += outSlice.total(); + } } } @@ -467,20 +556,28 @@ public: const std::vector >& nodes) CV_OVERRIDE { auto& ieInpNode = nodes[0].dynamicCast()->node; - int batch = ieInpNode->get_shape()[0]; + std::shared_ptr matmul; - std::vector data = {(size_t)batch, (size_t)blobs[0].size[1]}; - auto new_shape = std::make_shared(ngraph::element::i64, ngraph::Shape{2}, data.data()); - auto inp = std::make_shared(ieInpNode, new_shape, true); + if (nodes.size() == 2) + { + auto& inp2 = nodes[1].dynamicCast()->node; + matmul = std::make_shared(ieInpNode, inp2, false, false); + } + else + { + std::vector data = {(size_t)ieInpNode->get_shape()[0], (size_t)blobs[0].size[1]}; + auto new_shape = std::make_shared(ngraph::element::i64, ngraph::Shape{2}, data.data()); + auto inp = std::make_shared(ieInpNode, new_shape, true); + + std::vector weight_shape{(size_t)blobs[0].size[0], (size_t)blobs[0].size[1]}; + auto ieWeights = std::make_shared(ngraph::element::f32, weight_shape, blobs[0].data); + matmul = std::make_shared(inp, ieWeights, false, true); + } - std::vector weight_shape{(size_t)blobs[0].size[0], (size_t)blobs[0].size[1]}; - auto ieWeights = std::make_shared(ngraph::element::f32, weight_shape, blobs[0].data); - auto matmul = std::make_shared(inp, ieWeights, false, true); if (bias) { auto bias_node = std::make_shared(ngraph::element::f32, ngraph::Shape{(size_t)blobs[1].size[1]}, blobs[1].data); - auto fc = std::make_shared(matmul, bias_node, ngraph::op::AutoBroadcastType::NUMPY); - return Ptr(new InfEngineNgraphNode(fc)); + matmul = std::make_shared(matmul, bias_node, ngraph::op::AutoBroadcastType::NUMPY); } return Ptr(new InfEngineNgraphNode(matmul)); } diff --git a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp index fe96927840..ff474224cc 100644 --- a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp +++ b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp @@ -154,6 +154,73 @@ private: int axis; }; +class NormalizeSubgraph1 : public Subgraph +{ +public: + NormalizeSubgraph1() : axis(1) + { + input = addNodeToMatch(""); + norm = addNodeToMatch("ReduceL2", input); + addNodeToMatch("Div", input, norm); + setFusedNode("Normalize", input); + } + + virtual bool match(const Ptr& net, int nodeId, + std::vector& matchedNodesIds, + std::vector& targetNodesIds) CV_OVERRIDE + { + if (Subgraph::match(net, nodeId, matchedNodesIds, targetNodesIds)) + { + Ptr norm = net->getNode(matchedNodesIds[0]); + opencv_onnx::NodeProto* node = norm.dynamicCast()->node; + + for (int i = 0; i < node->attribute_size(); i++) + { + opencv_onnx::AttributeProto attr = node->attribute(i); + if (attr.name() != "axes") + continue; + if (attr.ints_size() != 1) + CV_Error(Error::StsNotImplemented, format("Unexpected number of axes: %d", attr.ints_size())); + axis = attr.ints(0); + return true; + } + CV_Error(Error::StsNotImplemented, "Missed axes attribute"); + } + return false; + } + + virtual void finalize(const Ptr&, + const Ptr& fusedNode, + std::vector >&) CV_OVERRIDE + { + opencv_onnx::NodeProto* node = fusedNode.dynamicCast()->node; + opencv_onnx::AttributeProto* axis_attr = node->add_attribute(); + axis_attr->set_name("axis"); + axis_attr->set_i(axis); + + opencv_onnx::AttributeProto* end_axis_attr = node->add_attribute(); + end_axis_attr->set_name("end_axis"); + end_axis_attr->set_i(axis); + } + +protected: + int input, norm; + int axis; +}; + + +class NormalizeSubgraph2 : public NormalizeSubgraph1 +{ +public: + NormalizeSubgraph2() : NormalizeSubgraph1() + { + int clip = addNodeToMatch("Clip", norm); + int shape = addNodeToMatch("Shape", input); + int expand = addNodeToMatch("Expand", clip, shape); + addNodeToMatch("Div", input, expand); + } +}; + class GatherCastSubgraph : public Subgraph { public: @@ -299,6 +366,8 @@ void simplifySubgraphs(opencv_onnx::GraphProto& net) subgraphs.push_back(makePtr()); subgraphs.push_back(makePtr()); subgraphs.push_back(makePtr()); + subgraphs.push_back(makePtr()); + subgraphs.push_back(makePtr()); simplifySubgraphs(Ptr(new ONNXGraphWrapper(net)), subgraphs); } diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 6a6f485047..ec2d2c4c25 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -391,19 +391,71 @@ void ONNXImporter::populateNet(Net dstNet) CV_Error(Error::StsNotImplemented, "Unsupported mode of ReduceMean operation."); MatShape inpShape = outShapes[node_proto.input(0)]; - if (inpShape.size() != 4 && inpShape.size() != 5) - CV_Error(Error::StsNotImplemented, "Unsupported input shape of reduce_mean operation."); - DictValue axes = layerParams.get("axes"); - CV_Assert(axes.size() <= inpShape.size() - 2); - std::vector kernel_size(inpShape.size() - 2, 1); - for (int i = 0; i < axes.size(); i++) { - int axis = axes.get(i); - CV_Assert_N(axis >= 2 + i, axis < inpShape.size()); - kernel_size[axis - 2] = inpShape[axis]; + if (inpShape.size() == 3 && axes.size() <= 2) + { + int axis = axes.get(0); + CV_CheckNE(axis, 0, ""); + outShapes[layerParams.name] = inpShape; + outShapes[layerParams.name][axis] = 1; + + LayerParams reshapeLp; + reshapeLp.name = layerParams.name + "/reshape"; + reshapeLp.type = "Reshape"; + CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end()); + reshapeLp.set("axis", 0); + reshapeLp.set("num_axes", 1); + int newShape[] = {1, -1}; + reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 2)); + + opencv_onnx::NodeProto proto; + proto.add_input(node_proto.input(0)); + proto.add_output(reshapeLp.name); + addLayer(dstNet, reshapeLp, proto, layer_id, outShapes); + + LayerParams avgLp; + avgLp.name = layerParams.name + "/avg"; + avgLp.type = "Pooling"; + CV_Assert(layer_id.find(avgLp.name) == layer_id.end()); + avgLp.set("pool", "ave"); + if (axes.size() == 2) + { + CV_CheckEQ(axes.get(0), 1, "Unsupported ReduceMean mode"); + CV_CheckEQ(axes.get(1), 2, "Unsupported ReduceMean mode"); + avgLp.set("global_pooling", true); + outShapes[layerParams.name][axes.get(1)] = 1; + } + else + { + avgLp.set(axis == 2 ? "global_pooling_w" : "global_pooling_h", true); + avgLp.set(axis == 2 ? "kernel_h" : "kernel_w", 1); + } + + node_proto.set_input(0, reshapeLp.name); + node_proto.set_output(0, avgLp.name); + addLayer(dstNet, avgLp, node_proto, layer_id, outShapes); + + layerParams.type = "Flatten"; + layerParams.set("axis", 0); + layerParams.set("end_axis", 1); + + node_proto.set_input(0, avgLp.name); + node_proto.set_output(0, layerParams.name); } + else + { + if (inpShape.size() != 4 && inpShape.size() != 5) + CV_Error(Error::StsNotImplemented, "Unsupported input shape of reduce_mean operation."); - layerParams.set("kernel_size", DictValue::arrayInt(&kernel_size[0], kernel_size.size())); + CV_Assert(axes.size() <= inpShape.size() - 2); + std::vector kernel_size(inpShape.size() - 2, 1); + for (int i = 0; i < axes.size(); i++) { + int axis = axes.get(i); + CV_Assert_N(axis >= 2 + i, axis < inpShape.size()); + kernel_size[axis - 2] = inpShape[axis]; + } + layerParams.set("kernel_size", DictValue::arrayInt(&kernel_size[0], kernel_size.size())); + } } } else if (layer_type == "Slice") @@ -825,10 +877,14 @@ void ONNXImporter::populateNet(Net dstNet) { CV_Assert(node_proto.input_size() == 2); layerParams.type = "InnerProduct"; - Mat blob = getBlob(node_proto, constBlobs, 1); - layerParams.blobs.push_back(blob.t()); layerParams.set("bias_term", false); - layerParams.set("num_output", layerParams.blobs[0].size[0]); + + if (constBlobs.find(node_proto.input(1)) != constBlobs.end()) + { + Mat blob = getBlob(node_proto, constBlobs, 1); + layerParams.blobs.push_back(blob.t()); + layerParams.set("num_output", layerParams.blobs[0].size[0]); + } } else if (layer_type == "Mul" || layer_type == "Div") { @@ -977,22 +1033,6 @@ void ONNXImporter::populateNet(Net dstNet) continue; } } - else if (layer_type == "ReduceL2") - { - CV_Assert_N(node_proto.input_size() == 1, layerParams.has("axes")); - CV_Assert(graph_proto.node_size() > li + 1 && graph_proto.node(li + 1).op_type() == "Div"); - ++li; - node_proto = graph_proto.node(li); - layerParams.name = node_proto.output(0); - layerParams.type = "Normalize"; - - DictValue axes_dict = layerParams.get("axes"); - if (axes_dict.size() != 1) - CV_Error(Error::StsNotImplemented, "Multidimensional reduceL2"); - int axis = axes_dict.getIntValue(0); - layerParams.set("axis",axis); - layerParams.set("end_axis", axis); - } else if (layer_type == "Squeeze") { CV_Assert_N(node_proto.input_size() == 1, layerParams.has("axes")); @@ -1080,6 +1120,78 @@ void ONNXImporter::populateNet(Net dstNet) layerParams.type = "Reshape"; layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size())); } + else if (layer_type == "Expand") + { + CV_CheckEQ(node_proto.input_size(), 2, ""); + CV_Assert(constBlobs.find(node_proto.input(1)) != constBlobs.end()); + Mat newShapeMat = getBlob(node_proto, constBlobs, 1); + MatShape targetShape(newShapeMat.ptr(), newShapeMat.ptr() + newShapeMat.total()); + + shapeIt = outShapes.find(node_proto.input(0)); + CV_Assert(shapeIt != outShapes.end()); + MatShape inpShape = shapeIt->second; + CV_CheckEQ(inpShape.size(), targetShape.size(), "Unsupported Expand op with different dims"); + + std::vector broadcast_axes; + for (int i = 0; i < targetShape.size(); i++) + { + if (targetShape[i] != inpShape[i]) + { + if (inpShape[i] == 1) + broadcast_axes.push_back(i); + else + CV_Error(Error::StsError, format("Could not be broadcast by axis: %d", i)); + } + } + + if (broadcast_axes.size() == 2 && + broadcast_axes[0] == broadcast_axes[1] - 1 && broadcast_axes[1] == inpShape.size() - 1) + { + LayerParams constParams; + constParams.name = layerParams.name + "/const"; + CV_Assert(layer_id.find(constParams.name) == layer_id.end()); + constParams.type = "Const"; + + Mat inp = Mat::ones(newShapeMat.total(), newShapeMat.ptr(), CV_32F); + constParams.blobs.push_back(inp); + + opencv_onnx::NodeProto proto; + proto.add_output(constParams.name); + addLayer(dstNet, constParams, proto, layer_id, outShapes); + + layerParams.type = "Scale"; + layerParams.set("bias_term", false); + node_proto.set_input(0, constParams.name); + node_proto.set_input(1, shapeIt->first); + } + else if (broadcast_axes.size() == 1 && broadcast_axes[0] <= 1) + { + String base_name = layerParams.name + "/copy_"; + std::vector input_names; + for (int j = 0; j < targetShape[broadcast_axes[0]]; j++) + { + std::ostringstream ss; + ss << j; + LayerParams copyLP; + copyLP.name = base_name + ss.str(); + copyLP.type = "Identity"; + CV_Assert(layer_id.find(copyLP.name) == layer_id.end()); + input_names.push_back(copyLP.name); + + node_proto.set_output(0, copyLP.name); + addLayer(dstNet, copyLP, node_proto, layer_id, outShapes); + } + node_proto.clear_input(); + for (int i = 0; i < input_names.size(); i++) + { + node_proto.add_input(input_names[i]); + } + layerParams.set("axis", broadcast_axes[0]); + layerParams.type = "Concat"; + } + else + CV_Error(Error::StsNotImplemented, "Unsupported Expand op"); + } else if (layer_type == "Reshape") { CV_Assert(node_proto.input_size() == 2 || layerParams.has("shape")); diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 67067dec12..9743f5d2a1 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -179,6 +179,8 @@ TEST_P(Test_ONNX_layers, Shape) TEST_P(Test_ONNX_layers, ReduceMean) { testONNXModels("reduce_mean"); + testONNXModels("reduce_mean_axis1"); + testONNXModels("reduce_mean_axis2"); } TEST_P(Test_ONNX_layers, ReduceMean3D) @@ -308,6 +310,30 @@ TEST_P(Test_ONNX_layers, Multiplication) testONNXModels("mul"); } +TEST_P(Test_ONNX_layers, MatMul) +{ + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + + testONNXModels("matmul_2d"); + testONNXModels("matmul_3d"); + testONNXModels("matmul_4d"); +} + +TEST_P(Test_ONNX_layers, Expand) +{ + testONNXModels("expand_batch"); + testONNXModels("expand_channels"); +} + +TEST_P(Test_ONNX_layers, ExpandHW) +{ + // ngraph::op::v1::Multiply bug + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + testONNXModels("expand_hw"); +} + TEST_P(Test_ONNX_layers, Constant) { #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2018050000) @@ -413,6 +439,7 @@ TEST_P(Test_ONNX_layers, Squeeze) TEST_P(Test_ONNX_layers, ReduceL2) { testONNXModels("reduceL2"); + testONNXModels("reduceL2_subgraph"); } TEST_P(Test_ONNX_layers, Split)