diff --git a/modules/objdetect/src/face_detect.cpp b/modules/objdetect/src/face_detect.cpp index 10259a32e6..17c982d92f 100644 --- a/modules/objdetect/src/face_detect.cpp +++ b/modules/objdetect/src/face_detect.cpp @@ -6,6 +6,7 @@ #include "opencv2/imgproc.hpp" #include "opencv2/core.hpp" + #ifdef HAVE_OPENCV_DNN #include "opencv2/dnn.hpp" #endif @@ -27,6 +28,8 @@ public: int top_k, int backend_id, int target_id) + :divisor(32), + strides({8, 16, 32}) { net = dnn::readNet(model, config); CV_Assert(!net.empty()); @@ -37,18 +40,20 @@ public: inputW = input_size.width; inputH = input_size.height; + padW = (int((inputW - 1) / divisor) + 1) * divisor; + padH = (int((inputH - 1) / divisor) + 1) * divisor; + scoreThreshold = score_threshold; nmsThreshold = nms_threshold; topK = top_k; - - generatePriors(); } void setInputSize(const Size& input_size) override { inputW = input_size.width; inputH = input_size.height; - generatePriors(); + padW = ((inputW - 1) / divisor + 1) * divisor; + padH = ((inputH - 1) / divisor + 1) * divisor; } Size getInputSize() override @@ -97,12 +102,14 @@ public: return 0; } CV_CheckEQ(input_image.size(), Size(inputW, inputH), "Size does not match. Call setInputSize(size) if input size does not match the preset size"); + // Pad input_image with divisor 32 + Mat pad_image = padWithDivisor(input_image); // Build blob from input image - Mat input_blob = dnn::blobFromImage(input_image); + Mat input_blob = dnn::blobFromImage(pad_image); // Forward - std::vector output_names = { "loc", "conf", "iou" }; + std::vector output_names = { "cls_8", "cls_16", "cls_32", "obj_8", "obj_16", "obj_32", "bbox_8", "bbox_16", "bbox_32", "kps_8", "kps_16", "kps_32" }; std::vector output_blobs; net.setInput(input_blob); net.forward(output_blobs, output_names); @@ -113,126 +120,70 @@ public: return 1; } private: - void generatePriors() - { - // Calculate shapes of different scales according to the shape of input image - Size feature_map_2nd = { - int(int((inputW+1)/2)/2), int(int((inputH+1)/2)/2) - }; - Size feature_map_3rd = { - int(feature_map_2nd.width/2), int(feature_map_2nd.height/2) - }; - Size feature_map_4th = { - int(feature_map_3rd.width/2), int(feature_map_3rd.height/2) - }; - Size feature_map_5th = { - int(feature_map_4th.width/2), int(feature_map_4th.height/2) - }; - Size feature_map_6th = { - int(feature_map_5th.width/2), int(feature_map_5th.height/2) - }; - - std::vector feature_map_sizes; - feature_map_sizes.push_back(feature_map_3rd); - feature_map_sizes.push_back(feature_map_4th); - feature_map_sizes.push_back(feature_map_5th); - feature_map_sizes.push_back(feature_map_6th); - - // Fixed params for generating priors - const std::vector> min_sizes = { - {10.0f, 16.0f, 24.0f}, - {32.0f, 48.0f}, - {64.0f, 96.0f}, - {128.0f, 192.0f, 256.0f} - }; - CV_Assert(min_sizes.size() == feature_map_sizes.size()); // just to keep vectors in sync - const std::vector steps = { 8, 16, 32, 64 }; - - // Generate priors - priors.clear(); - for (size_t i = 0; i < feature_map_sizes.size(); ++i) - { - Size feature_map_size = feature_map_sizes[i]; - std::vector min_size = min_sizes[i]; - - for (int _h = 0; _h < feature_map_size.height; ++_h) - { - for (int _w = 0; _w < feature_map_size.width; ++_w) - { - for (size_t j = 0; j < min_size.size(); ++j) - { - float s_kx = min_size[j] / inputW; - float s_ky = min_size[j] / inputH; - - float cx = (_w + 0.5f) * steps[i] / inputW; - float cy = (_h + 0.5f) * steps[i] / inputH; - - Rect2f prior = { cx, cy, s_kx, s_ky }; - priors.push_back(prior); - } - } - } - } - } - Mat postProcess(const std::vector& output_blobs) { - // Extract from output_blobs - Mat loc = output_blobs[0]; - Mat conf = output_blobs[1]; - Mat iou = output_blobs[2]; - - // Decode from deltas and priors - const std::vector variance = {0.1f, 0.2f}; - float* loc_v = (float*)(loc.data); - float* conf_v = (float*)(conf.data); - float* iou_v = (float*)(iou.data); Mat faces; - // (tl_x, tl_y, w, h, re_x, re_y, le_x, le_y, nt_x, nt_y, rcm_x, rcm_y, lcm_x, lcm_y, score) - // 'tl': top left point of the bounding box - // 're': right eye, 'le': left eye - // 'nt': nose tip - // 'rcm': right corner of mouth, 'lcm': left corner of mouth - Mat face(1, 15, CV_32FC1); - for (size_t i = 0; i < priors.size(); ++i) { - // Get score - float clsScore = conf_v[i*2+1]; - float iouScore = iou_v[i]; - // Clamp - if (iouScore < 0.f) { - iouScore = 0.f; - } - else if (iouScore > 1.f) { - iouScore = 1.f; + for (size_t i = 0; i < strides.size(); ++i) { + int cols = int(padW / strides[i]); + int rows = int(padH / strides[i]); + + // Extract from output_blobs + Mat cls = output_blobs[i]; + Mat obj = output_blobs[i + strides.size() * 1]; + Mat bbox = output_blobs[i + strides.size() * 2]; + Mat kps = output_blobs[i + strides.size() * 3]; + + // Decode from predictions + float* cls_v = (float*)(cls.data); + float* obj_v = (float*)(obj.data); + float* bbox_v = (float*)(bbox.data); + float* kps_v = (float*)(kps.data); + + // (tl_x, tl_y, w, h, re_x, re_y, le_x, le_y, nt_x, nt_y, rcm_x, rcm_y, lcm_x, lcm_y, score) + // 'tl': top left point of the bounding box + // 're': right eye, 'le': left eye + // 'nt': nose tip + // 'rcm': right corner of mouth, 'lcm': left corner of mouth + Mat face(1, 15, CV_32FC1); + + for(int r = 0; r < rows; ++r) { + for(int c = 0; c < cols; ++c) { + size_t idx = r * cols + c; + + // Get score + float cls_score = cls_v[idx]; + float obj_score = obj_v[idx]; + + // Clamp + cls_score = MIN(cls_score, 1.f); + cls_score = MAX(cls_score, 0.f); + obj_score = MIN(obj_score, 1.f); + obj_score = MAX(obj_score, 0.f); + float score = std::sqrt(cls_score * obj_score); + face.at(0, 14) = score; + + // Get bounding box + float cx = ((c + bbox_v[idx * 4 + 0]) * strides[i]); + float cy = ((r + bbox_v[idx * 4 + 1]) * strides[i]); + float w = exp(bbox_v[idx * 4 + 2]) * strides[i]; + float h = exp(bbox_v[idx * 4 + 3]) * strides[i]; + + float x1 = cx - w / 2.f; + float y1 = cy - h / 2.f; + + face.at(0, 0) = x1; + face.at(0, 1) = y1; + face.at(0, 2) = w; + face.at(0, 3) = h; + + // Get landmarks + for(int n = 0; n < 5; ++n) { + face.at(0, 4 + 2 * n) = (kps_v[idx * 10 + 2 * n] + c) * strides[i]; + face.at(0, 4 + 2 * n + 1) = (kps_v[idx * 10 + 2 * n + 1]+ r) * strides[i]; + } + faces.push_back(face); + } } - float score = std::sqrt(clsScore * iouScore); - face.at(0, 14) = score; - - // Get bounding box - float cx = (priors[i].x + loc_v[i*14+0] * variance[0] * priors[i].width) * inputW; - float cy = (priors[i].y + loc_v[i*14+1] * variance[0] * priors[i].height) * inputH; - float w = priors[i].width * exp(loc_v[i*14+2] * variance[0]) * inputW; - float h = priors[i].height * exp(loc_v[i*14+3] * variance[1]) * inputH; - float x1 = cx - w / 2; - float y1 = cy - h / 2; - face.at(0, 0) = x1; - face.at(0, 1) = y1; - face.at(0, 2) = w; - face.at(0, 3) = h; - - // Get landmarks - face.at(0, 4) = (priors[i].x + loc_v[i*14+ 4] * variance[0] * priors[i].width) * inputW; // right eye, x - face.at(0, 5) = (priors[i].y + loc_v[i*14+ 5] * variance[0] * priors[i].height) * inputH; // right eye, y - face.at(0, 6) = (priors[i].x + loc_v[i*14+ 6] * variance[0] * priors[i].width) * inputW; // left eye, x - face.at(0, 7) = (priors[i].y + loc_v[i*14+ 7] * variance[0] * priors[i].height) * inputH; // left eye, y - face.at(0, 8) = (priors[i].x + loc_v[i*14+ 8] * variance[0] * priors[i].width) * inputW; // nose tip, x - face.at(0, 9) = (priors[i].y + loc_v[i*14+ 9] * variance[0] * priors[i].height) * inputH; // nose tip, y - face.at(0, 10) = (priors[i].x + loc_v[i*14+10] * variance[0] * priors[i].width) * inputW; // right corner of mouth, x - face.at(0, 11) = (priors[i].y + loc_v[i*14+11] * variance[0] * priors[i].height) * inputH; // right corner of mouth, y - face.at(0, 12) = (priors[i].x + loc_v[i*14+12] * variance[0] * priors[i].width) * inputW; // left corner of mouth, x - face.at(0, 13) = (priors[i].y + loc_v[i*14+13] * variance[0] * priors[i].height) * inputH; // left corner of mouth, y - - faces.push_back(face); } if (faces.rows > 1) @@ -265,16 +216,27 @@ private: return faces; } } + + Mat padWithDivisor(InputArray& input_image) + { + int bottom = padH - inputH; + int right = padW - inputW; + Mat pad_image; + copyMakeBorder(input_image, pad_image, 0, bottom, 0, right, BORDER_CONSTANT, 0); + return pad_image; + } private: dnn::Net net; int inputW; int inputH; + int padW; + int padH; + const int divisor; + int topK; float scoreThreshold; float nmsThreshold; - int topK; - - std::vector priors; + const std::vector strides; }; #endif diff --git a/modules/objdetect/test/test_face.cpp b/modules/objdetect/test/test_face.cpp index d33032fa2f..e55401c061 100644 --- a/modules/objdetect/test/test_face.cpp +++ b/modules/objdetect/test/test_face.cpp @@ -65,20 +65,16 @@ TEST(Objdetect_face_detection, regression) { // Pre-set params float scoreThreshold = 0.7f; - float matchThreshold = 0.9f; - float l2disThreshold = 5.0f; + float matchThreshold = 0.7f; + float l2disThreshold = 15.0f; int numLM = 5; int numCoords = 4 + 2 * numLM; // Load ground truth labels std::map gt = blobFromTXT(findDataFile("dnn_face/detection/cascades_labels.txt"), numCoords); - // for (auto item: gt) - // { - // std::cout << item.first << " " << item.second.size() << std::endl; - // } // Initialize detector - std::string model = findDataFile("dnn/onnx/models/yunet-202202.onnx", false); + std::string model = findDataFile("dnn/onnx/models/yunet-202303.onnx", false); Ptr faceDetector = FaceDetectorYN::create(model, "", Size(300, 300)); faceDetector->setScoreThreshold(0.7f); @@ -137,6 +133,7 @@ TEST(Objdetect_face_detection, regression) lmMatched[lmIdx] = true; } } + break; } EXPECT_TRUE(boxMatched) << "In image " << item.first << ", cannot match resBox " << resBox << " with any ground truth."; if (boxMatched) @@ -178,7 +175,7 @@ TEST(Objdetect_face_recognition, regression) } // Initialize detector - std::string detect_model = findDataFile("dnn/onnx/models/yunet-202202.onnx", false); + std::string detect_model = findDataFile("dnn/onnx/models/yunet-202303.onnx", false); Ptr faceDetector = FaceDetectorYN::create(detect_model, "", Size(150, 150), score_thresh, nms_thresh); std::string recog_model = findDataFile("dnn/onnx/models/face_recognizer_fast.onnx", false);