From 4aef9b1c93020f28626005ee5f3b9936ecd24f7e Mon Sep 17 00:00:00 2001 From: fengyuentau Date: Mon, 19 Sep 2022 18:38:03 +0800 Subject: [PATCH] dnn: support yolov7 (not simplified) --- modules/dnn/src/onnx/onnx_importer.cpp | 86 +++++++++++++++++++++++-- modules/dnn/test/test_onnx_importer.cpp | 84 ++++++++++++++++++++++++ 2 files changed, 163 insertions(+), 7 deletions(-) diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 259fd29eeb..7aed220a86 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -180,6 +180,7 @@ private: void parseCumSum (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseElementWise (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseDepthToSpace (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseRange (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseSimpleLayers (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); // Domain: com.microsoft @@ -2427,9 +2428,6 @@ void ONNXImporter::parseExpand(LayerParams& layerParams, const opencv_onnx::Node if (!haveVariables) { - if (broadcast_axes.size() > 1) - CV_Error(Error::StsNotImplemented, "Expand op doesn't support multiple axes for constant input"); - if (broadcast_axes.empty()) { addConstant(output_name, getBlob(node_proto, 0)); @@ -2437,10 +2435,15 @@ void ONNXImporter::parseExpand(LayerParams& layerParams, const opencv_onnx::Node } Mat input = getBlob(node_proto, 0); - input = input.reshape(0, total(inpShape, 0, broadcast_axes[0])); - Mat output = cv::repeat(input, 1, targetShape[broadcast_axes[0]]); - output = output.reshape(0, targetShape); - addConstant(output_name, output); + MatShape subTargetShape = inpShape; + for (auto broadcast_axis : broadcast_axes) + { + subTargetShape[broadcast_axis] = targetShape[broadcast_axis]; + input = input.reshape(0, total(inpShape, 0, broadcast_axis)); + Mat output = cv::repeat(input, 1, subTargetShape[broadcast_axis]); + input = output.reshape(0, subTargetShape); + } + addConstant(output_name, input); return; } @@ -2497,6 +2500,12 @@ void ONNXImporter::parseReshape(LayerParams& layerParams, const opencv_onnx::Nod std::vector inputs(1, getBlob(node_proto, 0)), outputs; runLayer(layerParams, inputs, outputs); addConstant(node_proto.output(0), outputs[0]); + if (constBlobsExtraInfo.find(node_proto.input(0)) != constBlobsExtraInfo.end()) + { + const int real_ndims_input0 = getBlobExtraInfo(node_proto, 0).real_ndims; + if (real_ndims_input0 == 1 && blob.total() == 1 && blob.at() == -1) // 1D tensor as input0 (data), and shape is -1 + constBlobsExtraInfo.insert(std::make_pair(node_proto.output(0), TensorInfo(1))); + } return; } } @@ -2548,7 +2557,14 @@ void ONNXImporter::parseShape(LayerParams& layerParams, const opencv_onnx::NodeP CV_Assert(shapeIt != outShapes.end()); const MatShape& inpShape = shapeIt->second; + bool isInput1D = false; + if (constBlobsExtraInfo.find(node_proto.input(0)) != constBlobsExtraInfo.end()) + if (getBlobExtraInfo(node_proto, 0).real_ndims == 1) + isInput1D = true; + int dims = static_cast(inpShape.size()); + if (isInput1D) + dims = 1; Mat shapeMat(dims, 1, CV_32S); bool isDynamicShape = false; for (int j = 0; j < dims; ++j) @@ -3080,8 +3096,63 @@ void ONNXImporter::parseDepthToSpace(LayerParams& layerParams, const opencv_onnx addLayer(layerParams, node_proto); } +// Currently we only support range with all constant inputs +void ONNXImporter::parseRange(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + CV_Assert(node_proto.input_size() == 3); // 0 - start, 1 - limit, 2 - delta + layerParams.type = "Range"; + + std::vector const_id; + for (int i = 0; i < node_proto.input_size(); i++) + if (layer_id.find(node_proto.input(i)) == layer_id.end()) + const_id.push_back(i); + + // only supports the case which all inputs are constant + CV_Assert(const_id.size() == 3); + + Mat startMat = getBlob(node_proto, 0); + CV_Assert(startMat.type() == CV_32SC1); + int start = startMat.at(0); + + Mat limitMat = getBlob(node_proto, 1); + CV_Assert(limitMat.type() == CV_32SC1); + int limit = limitMat.at(0); + + Mat deltaMat = getBlob(node_proto, 2); + CV_Assert(deltaMat.type() == CV_32SC1); + int delta = deltaMat.at(0); + + int number_of_elements = std::max(int(std::ceil((limit - start) / delta)), 0); + Mat r(number_of_elements, 1, CV_32SC1); + for (int i = 0; i < number_of_elements; i++) + { + r.at(i) = start + (i * delta); + } + addConstant(node_proto.output(0), r); + constBlobsExtraInfo.insert(std::make_pair(node_proto.output(0), TensorInfo(1))); +} + void ONNXImporter::parseSimpleLayers(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { + bool is_all_input_const = true; + for (int i = 0; i < node_proto.input_size(); i++) + { + if (layer_id.find(node_proto.input(i)) != layer_id.end()) + { + is_all_input_const = false; + break; + } + } + if (is_all_input_const && node_proto.output_size() == 1) + { + std::vector input, output; + for (int i = 0; i < node_proto.input_size(); i++) + input.push_back(getBlob(node_proto, i)); + runLayer(layerParams, input, output); + addConstant(node_proto.output(0), output[0]); + return; + } + for (int j = 0; j < node_proto.input_size(); j++) { if (layer_id.find(node_proto.input(j)) == layer_id.end()) layerParams.blobs.push_back(getBlob(node_proto, j)); @@ -3685,6 +3756,7 @@ void ONNXImporter::buildDispatchMap_ONNX_AI(int opset_version) dispatch["Equal"] = dispatch["Greater"] = dispatch["Less"] = dispatch["Pow"] = dispatch["Add"] = dispatch["Sub"] = dispatch["Mul"] = dispatch["Div"] = &ONNXImporter::parseElementWise; dispatch["Sum"] = dispatch["Min"] = dispatch["Max"] = &ONNXImporter::parseElementWise; + dispatch["Range"] = &ONNXImporter::parseRange; std::vector simpleLayers{"Acos", "Acosh", "Asin", "Asinh", "Atan", "Atanh", "Ceil", "Celu", "Cos", "Cosh", "Dropout", "Erf", "Exp", "Floor", "HardSigmoid", "HardSwish", diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 554c4b7e66..d956f67490 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -2330,6 +2330,90 @@ TEST_P(Test_ONNX_layers, CumSum) testONNXModels("cumsum_3d_dim_2"); } +// This test is mainly to test: +// 1. identity node with constant input +// 2. limited support to range operator (all inputs are constant) +// 3. parseExpand with multiple broadcast axes +// 4. 1D mat dimension issue with the output of range operator +TEST_P(Test_ONNX_layers, YOLOv7) +{ + std::string weightPath = _tf("models/yolov7_not_simplified.onnx"); + std::string imgPath = _tf("../dog_orig_size.png"); + + Size targetSize{640, 640}; + float conf_threshold = 0.3; + float iou_threshold = 0.5; + + // Reference, which is collected with input size of 640x640 + std::vector refClassIds{1, 16, 7}; + std::vector refScores{0.9614331f, 0.9589417f, 0.8679074f}; + // [x1, y1, x2, y2] x 3 + std::vector refBoxes{Rect2d(105.973236f, 150.16716f, 472.59012f, 466.48834f), + Rect2d(109.97953f, 246.17862f, 259.83676f, 600.76624f), + Rect2d(385.96185f, 83.02809f, 576.07355f, 189.82793f)}; + + Mat img = imread(imgPath); + Mat inp = blobFromImage(img, 1/255.0, targetSize, Scalar(0, 0, 0), true, false); + + Net net = readNet(weightPath); + + net.setInput(inp); + std::vector outs; + net.forward(outs, net.getUnconnectedOutLayersNames()); + + Mat preds = outs[3].reshape(1, outs[3].size[1]); // [1, 25200, 85] + + // Retrieve + std::vector classIds; + std::vector confidences; + std::vector boxes; + // each row is [cx, cy, w, h, conf_obj, conf_class1, ..., conf_class80] + for (int i = 0; i < preds.rows; ++i) + { + // filter out non objects + float obj_conf = preds.row(i).at(4); + if (obj_conf < conf_threshold) + continue; + + // get class id and conf + Mat scores = preds.row(i).colRange(5, preds.cols); + double conf; + Point maxLoc; + minMaxLoc(scores, 0, &conf, 0, &maxLoc); + conf *= obj_conf; + if (conf < conf_threshold) + continue; + + // get bbox coords + float* det = preds.ptr(i); + double cx = det[0]; + double cy = det[1]; + double w = det[2]; + double h = det[3]; + // [x1, y1, x2, y2] + boxes.push_back(Rect2d(cx - 0.5 * w, cy - 0.5 * h, + cx + 0.5 * w, cy + 0.5 * h)); + classIds.push_back(maxLoc.x); + confidences.push_back(conf); + } + + // NMS + std::vector keep_idx; + NMSBoxes(boxes, confidences, conf_threshold, iou_threshold, keep_idx); + + std::vector keep_classIds; + std::vector keep_confidences; + std::vector keep_boxes; + for (auto i : keep_idx) + { + keep_classIds.push_back(classIds[i]); + keep_confidences.push_back(confidences[i]); + keep_boxes.push_back(boxes[i]); + } + + normAssertDetections(refClassIds, refScores, refBoxes, keep_classIds, keep_confidences, keep_boxes); +} + INSTANTIATE_TEST_CASE_P(/**/, Test_ONNX_nets, dnnBackendsAndTargets()); }} // namespace