diff --git a/modules/dnn/src/darknet/darknet_io.cpp b/modules/dnn/src/darknet/darknet_io.cpp index 4915538ff7..99715df829 100644 --- a/modules/dnn/src/darknet/darknet_io.cpp +++ b/modules/dnn/src/darknet/darknet_io.cpp @@ -470,7 +470,7 @@ namespace cv { fused_layer_names.push_back(last_layer); } - void setYolo(int classes, const std::vector& mask, const std::vector& anchors, float thresh, float nms_threshold, float scale_x_y) + void setYolo(int classes, const std::vector& mask, const std::vector& anchors, float thresh, float nms_threshold, float scale_x_y, int new_coords) { cv::dnn::LayerParams region_param; region_param.name = "Region-name"; @@ -484,6 +484,7 @@ namespace cv { region_param.set("thresh", thresh); region_param.set("nms_threshold", nms_threshold); region_param.set("scale_x_y", scale_x_y); + region_param.set("new_coords", new_coords); std::vector usedAnchors(numAnchors * 2); for (int i = 0; i < numAnchors; ++i) @@ -882,6 +883,7 @@ namespace cv { float thresh = getParam(layer_params, "thresh", 0.2); float nms_threshold = getParam(layer_params, "nms_threshold", 0.0); float scale_x_y = getParam(layer_params, "scale_x_y", 1.0); + int new_coords = getParam(layer_params, "new_coords", 0); std::string anchors_values = getParam(layer_params, "anchors", std::string()); CV_Assert(!anchors_values.empty()); @@ -894,7 +896,7 @@ namespace cv { CV_Assert(classes > 0 && num_of_anchors > 0 && (num_of_anchors * 2) == anchors_vec.size()); setParams.setPermute(false); - setParams.setYolo(classes, mask_vec, anchors_vec, thresh, nms_threshold, scale_x_y); + setParams.setYolo(classes, mask_vec, anchors_vec, thresh, nms_threshold, scale_x_y, new_coords); } else { CV_Error(cv::Error::StsParseError, "Unknown layer type: " + layer_type); diff --git a/modules/dnn/src/layers/region_layer.cpp b/modules/dnn/src/layers/region_layer.cpp index 4a8cb724d6..578b0d7dec 100644 --- a/modules/dnn/src/layers/region_layer.cpp +++ b/modules/dnn/src/layers/region_layer.cpp @@ -64,6 +64,7 @@ class RegionLayerImpl CV_FINAL : public RegionLayer public: int coords, classes, anchors, classfix; float thresh, nmsThreshold, scale_x_y; + int new_coords; bool useSoftmax, useLogistic; #ifdef HAVE_OPENCL UMat blob_umat; @@ -83,6 +84,7 @@ public: useLogistic = params.get("logistic", false); nmsThreshold = params.get("nms_threshold", 0.4); scale_x_y = params.get("scale_x_y", 1.0); // Yolov4 + new_coords = params.get("new_coords", 0); // Yolov4x-mish CV_Assert(nmsThreshold >= 0.); CV_Assert(coords == 4); @@ -113,7 +115,7 @@ public: { #ifdef HAVE_DNN_NGRAPH if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - return INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2020_2) && preferableTarget != DNN_TARGET_MYRIAD; + return INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2020_2) && preferableTarget != DNN_TARGET_MYRIAD && new_coords == 0; #endif return backendId == DNN_BACKEND_OPENCV; } @@ -259,26 +261,28 @@ public: const float *srcData = inpBlob.ptr(); float *dstData = outBlob.ptr(); - // logistic activation for t0, for each grid cell (X x Y x Anchor-index) - for (int i = 0; i < batch_size*rows*cols*anchors; ++i) { - int index = cell_size*i; - float x = srcData[index + 4]; - dstData[index + 4] = logistic_activate(x); // logistic activation - } - - if (useSoftmax) { // Yolo v2 + if (new_coords == 0) { + // logistic activation for t0, for each grid cell (X x Y x Anchor-index) for (int i = 0; i < batch_size*rows*cols*anchors; ++i) { int index = cell_size*i; - softmax_activate(srcData + index + 5, classes, 1, dstData + index + 5); + float x = srcData[index + 4]; + dstData[index + 4] = logistic_activate(x); // logistic activation } - } - else if (useLogistic) { // Yolo v3 - for (int i = 0; i < batch_size*rows*cols*anchors; ++i){ - int index = cell_size*i; - const float* input = srcData + index + 5; - float* output = dstData + index + 5; - for (int c = 0; c < classes; ++c) - output[c] = logistic_activate(input[c]); + + if (useSoftmax) { // Yolo v2 + for (int i = 0; i < batch_size*rows*cols*anchors; ++i) { + int index = cell_size*i; + softmax_activate(srcData + index + 5, classes, 1, dstData + index + 5); + } + } + else if (useLogistic) { // Yolo v3 + for (int i = 0; i < batch_size*rows*cols*anchors; ++i){ + int index = cell_size*i; + const float* input = srcData + index + 5; + float* output = dstData + index + 5; + for (int c = 0; c < classes; ++c) + output[c] = logistic_activate(input[c]); + } } } for (int b = 0; b < batch_size; ++b) @@ -290,20 +294,46 @@ public: int index = (y*cols + x)*anchors + a; // index for each grid-cell & anchor int p_index = index_sample_offset + index * cell_size + 4; float scale = dstData[p_index]; - if (classfix == -1 && scale < .5) scale = 0; // if(t0 < 0.5) t0 = 0; + if (classfix == -1 && scale < .5) + { + scale = 0; // if(t0 < 0.5) t0 = 0; + } int box_index = index_sample_offset + index * cell_size; - float x_tmp = (logistic_activate(srcData[box_index + 0]) - 0.5f) * scale_x_y + 0.5f; - float y_tmp = (logistic_activate(srcData[box_index + 1]) - 0.5f) * scale_x_y + 0.5f; - dstData[box_index + 0] = (x + x_tmp) / cols; - dstData[box_index + 1] = (y + y_tmp) / rows; - dstData[box_index + 2] = exp(srcData[box_index + 2]) * biasData[2 * a] / wNorm; - dstData[box_index + 3] = exp(srcData[box_index + 3]) * biasData[2 * a + 1] / hNorm; - - int class_index = index_sample_offset + index * cell_size + 5; - for (int j = 0; j < classes; ++j) { - float prob = scale*dstData[class_index + j]; // prob = IoU(box, object) = t0 * class-probability - dstData[class_index + j] = (prob > thresh) ? prob : 0; // if (IoU < threshold) IoU = 0; + if (new_coords == 1) { + float x_tmp = (srcData[box_index + 0] - 0.5f) * scale_x_y + 0.5f; + float y_tmp = (srcData[box_index + 1] - 0.5f) * scale_x_y + 0.5f; + dstData[box_index + 0] = (x + x_tmp) / cols; + dstData[box_index + 1] = (y + y_tmp) / rows; + dstData[box_index + 2] = (srcData[box_index + 2]) * (srcData[box_index + 2]) * 4 * biasData[2 * a] / wNorm; + dstData[box_index + 3] = (srcData[box_index + 3]) * (srcData[box_index + 3]) * 4 * biasData[2 * a + 1] / hNorm; + + scale = srcData[p_index]; + if (classfix == -1 && scale < thresh) + { + scale = 0; // if(t0 < 0.5) t0 = 0; + } + + int class_index = index_sample_offset + index * cell_size + 5; + for (int j = 0; j < classes; ++j) { + float prob = scale*srcData[class_index + j]; // prob = IoU(box, object) = t0 * class-probability + dstData[class_index + j] = (prob > thresh) ? prob : 0; // if (IoU < threshold) IoU = 0; + } + } + else + { + float x_tmp = (logistic_activate(srcData[box_index + 0]) - 0.5f) * scale_x_y + 0.5f; + float y_tmp = (logistic_activate(srcData[box_index + 1]) - 0.5f) * scale_x_y + 0.5f; + dstData[box_index + 0] = (x + x_tmp) / cols; + dstData[box_index + 1] = (y + y_tmp) / rows; + dstData[box_index + 2] = exp(srcData[box_index + 2]) * biasData[2 * a] / wNorm; + dstData[box_index + 3] = exp(srcData[box_index + 3]) * biasData[2 * a + 1] / hNorm; + + int class_index = index_sample_offset + index * cell_size + 5; + for (int j = 0; j < classes; ++j) { + float prob = scale*dstData[class_index + j]; // prob = IoU(box, object) = t0 * class-probability + dstData[class_index + j] = (prob > thresh) ? prob : 0; // if (IoU < threshold) IoU = 0; + } } } if (nmsThreshold > 0) { diff --git a/modules/dnn/test/test_darknet_importer.cpp b/modules/dnn/test/test_darknet_importer.cpp index 9983d99ef5..ea700573e6 100644 --- a/modules/dnn/test/test_darknet_importer.cpp +++ b/modules/dnn/test/test_darknet_importer.cpp @@ -681,6 +681,78 @@ TEST_P(Test_Darknet_nets, YOLOv4_tiny) #endif } +TEST_P(Test_Darknet_nets, YOLOv4x_mish) +{ + applyTestTag(CV_TEST_TAG_LONG, (target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_1GB : CV_TEST_TAG_MEMORY_2GB)); + +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) // nGraph compilation failure + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif +#if defined(INF_ENGINE_RELEASE) + if (target == DNN_TARGET_MYRIAD) // NC_OUT_OF_MEMORY + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + + // batchId, classId, confidence, left, top, right, bottom + const int N0 = 3; + const int N1 = 5; + static const float ref_[/* (N0 + N1) * 7 */] = { +0, 16, 0.925536f, 0.17188f, 0.386832f, 0.406138f, 0.941696f, +0, 1, 0.912028f, 0.162125f, 0.208863f, 0.741316f, 0.729332f, +0, 7, 0.841018f, 0.608953f, 0.128653f, 0.900692f, 0.295657f, + +1, 2, 0.925697f, 0.650438f, 0.458118f, 0.813927f, 0.661775f, +1, 0, 0.882156f, 0.203644f, 0.365763f, 0.265473f, 0.632195f, +1, 2, 0.848857f, 0.451044f, 0.462997f, 0.496629f, 0.522719f, +1, 9, 0.736015f, 0.374503f, 0.316029f, 0.399358f, 0.392883f, +1, 9, 0.727129f, 0.662469f, 0.373687f, 0.687877f, 0.441335f, + }; + Mat ref(N0 + N1, 7, CV_32FC1, (void*)ref_); + + double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.006 : 8e-5; + double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.042 : 3e-4; + + std::string config_file = "yolov4x-mish.cfg"; + std::string weights_file = "yolov4x-mish.weights"; + +#if defined(INF_ENGINE_RELEASE) + if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || + backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_MYRIAD && + getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) + { + scoreDiff = 0.04; + iouDiff = 0.2; + } +#endif + + { + SCOPED_TRACE("batch size 1"); + testDarknetModel(config_file, weights_file, ref.rowRange(0, N0), scoreDiff, iouDiff); + } + + { + SCOPED_TRACE("batch size 2"); + +#if defined(INF_ENGINE_RELEASE) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + { + if (target == DNN_TARGET_OPENCL) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + else if (target == DNN_TARGET_OPENCL_FP16 && INF_ENGINE_VER_MAJOR_LE(202010000)) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + else if (target == DNN_TARGET_MYRIAD && + getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X); + } +#endif + + testDarknetModel(config_file, weights_file, ref, scoreDiff, iouDiff); + } +} + INSTANTIATE_TEST_CASE_P(/**/, Test_Darknet_nets, dnnBackendsAndTargets());