#include #include #include #include #include using namespace cv; using namespace std; static void visualize(Mat& input, int frame, Mat& faces, double fps, int thickness = 2) { std::string fpsString = cv::format("FPS : %.2f", (float)fps); if (frame >= 0) cout << "Frame " << frame << ", "; cout << "FPS: " << fpsString << endl; for (int i = 0; i < faces.rows; i++) { // Print results cout << "Face " << i << ", top-left coordinates: (" << faces.at(i, 0) << ", " << faces.at(i, 1) << "), " << "box width: " << faces.at(i, 2) << ", box height: " << faces.at(i, 3) << ", " << "score: " << cv::format("%.2f", faces.at(i, 14)) << endl; // Draw bounding box rectangle(input, Rect2i(int(faces.at(i, 0)), int(faces.at(i, 1)), int(faces.at(i, 2)), int(faces.at(i, 3))), Scalar(0, 255, 0), thickness); // Draw landmarks circle(input, Point2i(int(faces.at(i, 4)), int(faces.at(i, 5))), 2, Scalar(255, 0, 0), thickness); circle(input, Point2i(int(faces.at(i, 6)), int(faces.at(i, 7))), 2, Scalar(0, 0, 255), thickness); circle(input, Point2i(int(faces.at(i, 8)), int(faces.at(i, 9))), 2, Scalar(0, 255, 0), thickness); circle(input, Point2i(int(faces.at(i, 10)), int(faces.at(i, 11))), 2, Scalar(255, 0, 255), thickness); circle(input, Point2i(int(faces.at(i, 12)), int(faces.at(i, 13))), 2, Scalar(0, 255, 255), thickness); } putText(input, fpsString, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0), 2); } int main(int argc, char** argv) { CommandLineParser parser(argc, argv, "{help h | | Print this message}" "{image1 i1 | | Path to the input image1. Omit for detecting through VideoCapture}" "{image2 i2 | | Path to the input image2. When image1 and image2 parameters given then the program try to find a face on both images and runs face recognition algorithm}" "{video v | 0 | Path to the input video}" "{scale sc | 1.0 | Scale factor used to resize input video frames}" "{fd_model fd | yunet.onnx | Path to the model. Download yunet.onnx in https://github.com/ShiqiYu/libfacedetection.train/tree/master/tasks/task1/onnx }" "{fr_model fr | face_recognizer_fast.onnx | Path to the face recognition model. Download the model at https://drive.google.com/file/d/1ClK9WiB492c5OZFKveF3XiHCejoOxINW/view}" "{score_threshold | 0.9 | Filter out faces of score < score_threshold}" "{nms_threshold | 0.3 | Suppress bounding boxes of iou >= nms_threshold}" "{top_k | 5000 | Keep top_k bounding boxes before NMS}" "{save s | false | Set true to save results. This flag is invalid when using camera}" ); if (parser.has("help")) { parser.printMessage(); return 0; } String fd_modelPath = parser.get("fd_model"); String fr_modelPath = parser.get("fr_model"); float scoreThreshold = parser.get("score_threshold"); float nmsThreshold = parser.get("nms_threshold"); int topK = parser.get("top_k"); bool save = parser.get("save"); double cosine_similar_thresh = 0.363; double l2norm_similar_thresh = 1.128; //! [initialize_FaceDetectorYN] // Initialize FaceDetectorYN Ptr detector = FaceDetectorYN::create(fd_modelPath, "", Size(320, 320), scoreThreshold, nmsThreshold, topK); //! [initialize_FaceDetectorYN] TickMeter tm; // If input is an image if (parser.has("image1")) { String input1 = parser.get("image1"); Mat image1 = imread(samples::findFile(input1)); if (image1.empty()) { std::cerr << "Cannot read image: " << input1 << std::endl; return 2; } tm.start(); //! [inference] // Set input size before inference detector->setInputSize(image1.size()); Mat faces1; detector->detect(image1, faces1); if (faces1.rows < 1) { std::cerr << "Cannot find a face in " << input1 << std::endl; return 1; } //! [inference] tm.stop(); // Draw results on the input image visualize(image1, -1, faces1, tm.getFPS()); // Save results if save is true if (save) { cout << "Saving result.jpg...\n"; imwrite("result.jpg", image1); } // Visualize results imshow("image1", image1); pollKey(); // handle UI events to show content if (parser.has("image2")) { String input2 = parser.get("image2"); Mat image2 = imread(samples::findFile(input2)); if (image2.empty()) { std::cerr << "Cannot read image2: " << input2 << std::endl; return 2; } tm.reset(); tm.start(); detector->setInputSize(image2.size()); Mat faces2; detector->detect(image2, faces2); if (faces2.rows < 1) { std::cerr << "Cannot find a face in " << input2 << std::endl; return 1; } tm.stop(); visualize(image2, -1, faces2, tm.getFPS()); if (save) { cout << "Saving result2.jpg...\n"; imwrite("result2.jpg", image2); } imshow("image2", image2); pollKey(); //! [initialize_FaceRecognizerSF] // Initialize FaceRecognizerSF Ptr faceRecognizer = FaceRecognizerSF::create(fr_modelPath, ""); //! [initialize_FaceRecognizerSF] //! [facerecognizer] // Aligning and cropping facial image through the first face of faces detected. Mat aligned_face1, aligned_face2; faceRecognizer->alignCrop(image1, faces1.row(0), aligned_face1); faceRecognizer->alignCrop(image2, faces2.row(0), aligned_face2); // Run feature extraction with given aligned_face Mat feature1, feature2; faceRecognizer->feature(aligned_face1, feature1); feature1 = feature1.clone(); faceRecognizer->feature(aligned_face2, feature2); feature2 = feature2.clone(); //! [facerecognizer] //! [match] double cos_score = faceRecognizer->match(feature1, feature2, FaceRecognizerSF::DisType::FR_COSINE); double L2_score = faceRecognizer->match(feature1, feature2, FaceRecognizerSF::DisType::FR_NORM_L2); //! [match] if (cos_score >= cosine_similar_thresh) { std::cout << "They have the same identity;"; } else { std::cout << "They have different identities;"; } std::cout << " Cosine Similarity: " << cos_score << ", threshold: " << cosine_similar_thresh << ". (higher value means higher similarity, max 1.0)\n"; if (L2_score <= l2norm_similar_thresh) { std::cout << "They have the same identity;"; } else { std::cout << "They have different identities."; } std::cout << " NormL2 Distance: " << L2_score << ", threshold: " << l2norm_similar_thresh << ". (lower value means higher similarity, min 0.0)\n"; } cout << "Press any key to exit..." << endl; waitKey(0); } else { int frameWidth, frameHeight; float scale = parser.get("scale"); VideoCapture capture; std::string video = parser.get("video"); if (video.size() == 1 && isdigit(video[0])) capture.open(parser.get("video")); else capture.open(samples::findFileOrKeep(video)); // keep GStreamer pipelines if (capture.isOpened()) { frameWidth = int(capture.get(CAP_PROP_FRAME_WIDTH) * scale); frameHeight = int(capture.get(CAP_PROP_FRAME_HEIGHT) * scale); cout << "Video " << video << ": width=" << frameWidth << ", height=" << frameHeight << endl; } else { cout << "Could not initialize video capturing: " << video << "\n"; return 1; } detector->setInputSize(Size(frameWidth, frameHeight)); cout << "Press 'SPACE' to save frame, any other key to exit..." << endl; int nFrame = 0; for (;;) { // Get frame Mat frame; if (!capture.read(frame)) { cerr << "Can't grab frame! Stop\n"; break; } resize(frame, frame, Size(frameWidth, frameHeight)); // Inference Mat faces; tm.start(); detector->detect(frame, faces); tm.stop(); Mat result = frame.clone(); // Draw results on the input image visualize(result, nFrame, faces, tm.getFPS()); // Visualize results imshow("Live", result); int key = waitKey(1); bool saveFrame = save; if (key == ' ') { saveFrame = true; key = 0; // handled } if (saveFrame) { std::string frame_name = cv::format("frame_%05d.png", nFrame); std::string result_name = cv::format("result_%05d.jpg", nFrame); cout << "Saving '" << frame_name << "' and '" << result_name << "' ...\n"; imwrite(frame_name, frame); imwrite(result_name, result); } ++nFrame; if (key > 0) break; } cout << "Processed " << nFrame << " frames" << endl; } cout << "Done." << endl; return 0; }