# include <opencv2/dnn.hpp>
# include <opencv2/imgproc.hpp>
# include <opencv2/highgui.hpp>
# include <opencv2/objdetect.hpp>
# include <iostream>
using namespace cv ;
using namespace std ;
static
void visualize ( Mat & input , int frame , Mat & faces , double fps , int thickness = 2 )
{
std : : string fpsString = cv : : format ( " FPS : %.2f " , ( float ) fps ) ;
if ( frame > = 0 )
cout < < " Frame " < < frame < < " , " ;
cout < < " FPS: " < < fpsString < < endl ;
for ( int i = 0 ; i < faces . rows ; i + + )
{
// Print results
cout < < " Face " < < i
< < " , top-left coordinates: ( " < < faces . at < float > ( i , 0 ) < < " , " < < faces . at < float > ( i , 1 ) < < " ), "
< < " box width: " < < faces . at < float > ( i , 2 ) < < " , box height: " < < faces . at < float > ( i , 3 ) < < " , "
< < " score: " < < cv : : format ( " %.2f " , faces . at < float > ( i , 14 ) )
< < endl ;
// Draw bounding box
rectangle ( input , Rect2i ( int ( faces . at < float > ( i , 0 ) ) , int ( faces . at < float > ( i , 1 ) ) , int ( faces . at < float > ( i , 2 ) ) , int ( faces . at < float > ( i , 3 ) ) ) , Scalar ( 0 , 255 , 0 ) , thickness ) ;
// Draw landmarks
circle ( input , Point2i ( int ( faces . at < float > ( i , 4 ) ) , int ( faces . at < float > ( i , 5 ) ) ) , 2 , Scalar ( 255 , 0 , 0 ) , thickness ) ;
circle ( input , Point2i ( int ( faces . at < float > ( i , 6 ) ) , int ( faces . at < float > ( i , 7 ) ) ) , 2 , Scalar ( 0 , 0 , 255 ) , thickness ) ;
circle ( input , Point2i ( int ( faces . at < float > ( i , 8 ) ) , int ( faces . at < float > ( i , 9 ) ) ) , 2 , Scalar ( 0 , 255 , 0 ) , thickness ) ;
circle ( input , Point2i ( int ( faces . at < float > ( i , 10 ) ) , int ( faces . at < float > ( i , 11 ) ) ) , 2 , Scalar ( 255 , 0 , 255 ) , thickness ) ;
circle ( input , Point2i ( int ( faces . at < float > ( i , 12 ) ) , int ( faces . at < float > ( i , 13 ) ) ) , 2 , Scalar ( 0 , 255 , 255 ) , thickness ) ;
}
putText ( input , fpsString , Point ( 0 , 15 ) , FONT_HERSHEY_SIMPLEX , 0.5 , Scalar ( 0 , 255 , 0 ) , 2 ) ;
}
int main ( int argc , char * * argv )
{
CommandLineParser parser ( argc , argv ,
" {help h | | Print this message} "
" {image1 i1 | | Path to the input image1. Omit for detecting through VideoCapture} "
" {image2 i2 | | Path to the input image2. When image1 and image2 parameters given then the program try to find a face on both images and runs face recognition algorithm} "
" {video v | 0 | Path to the input video} "
" {scale sc | 1.0 | Scale factor used to resize input video frames} "
" {fd_model fd | face_detection_yunet_2021dec.onnx| Path to the model. Download yunet.onnx in https://github.com/opencv/opencv_zoo/tree/master/models/face_detection_yunet} "
" {fr_model fr | face_recognition_sface_2021dec.onnx | Path to the face recognition model. Download the model at https://github.com/opencv/opencv_zoo/tree/master/models/face_recognition_sface} "
" {score_threshold | 0.9 | Filter out faces of score < score_threshold} "
" {nms_threshold | 0.3 | Suppress bounding boxes of iou >= nms_threshold} "
" {top_k | 5000 | Keep top_k bounding boxes before NMS} "
" {save s | false | Set true to save results. This flag is invalid when using camera} "
) ;
if ( parser . has ( " help " ) )
{
parser . printMessage ( ) ;
return 0 ;
}
String fd_modelPath = parser . get < String > ( " fd_model " ) ;
String fr_modelPath = parser . get < String > ( " fr_model " ) ;
float scoreThreshold = parser . get < float > ( " score_threshold " ) ;
float nmsThreshold = parser . get < float > ( " nms_threshold " ) ;
int topK = parser . get < int > ( " top_k " ) ;
bool save = parser . get < bool > ( " save " ) ;
float scale = parser . get < float > ( " scale " ) ;
double cosine_similar_thresh = 0.363 ;
double l2norm_similar_thresh = 1.128 ;
//! [initialize_FaceDetectorYN]
// Initialize FaceDetectorYN
Ptr < FaceDetectorYN > detector = FaceDetectorYN : : create ( fd_modelPath , " " , Size ( 320 , 320 ) , scoreThreshold , nmsThreshold , topK ) ;
//! [initialize_FaceDetectorYN]
TickMeter tm ;
// If input is an image
if ( parser . has ( " image1 " ) )
{
String input1 = parser . get < String > ( " image1 " ) ;
Mat image1 = imread ( samples : : findFile ( input1 ) ) ;
if ( image1 . empty ( ) )
{
std : : cerr < < " Cannot read image: " < < input1 < < std : : endl ;
return 2 ;
}
int imageWidth = int ( image1 . cols * scale ) ;
int imageHeight = int ( image1 . rows * scale ) ;
resize ( image1 , image1 , Size ( imageWidth , imageHeight ) ) ;
tm . start ( ) ;
//! [inference]
// Set input size before inference
detector - > setInputSize ( image1 . size ( ) ) ;
Mat faces1 ;
detector - > detect ( image1 , faces1 ) ;
if ( faces1 . rows < 1 )
{
std : : cerr < < " Cannot find a face in " < < input1 < < std : : endl ;
return 1 ;
}
//! [inference]
tm . stop ( ) ;
// Draw results on the input image
visualize ( image1 , - 1 , faces1 , tm . getFPS ( ) ) ;
// Save results if save is true
if ( save )
{
cout < < " Saving result.jpg... \n " ;
imwrite ( " result.jpg " , image1 ) ;
}
// Visualize results
imshow ( " image1 " , image1 ) ;
pollKey ( ) ; // handle UI events to show content
if ( parser . has ( " image2 " ) )
{
String input2 = parser . get < String > ( " image2 " ) ;
Mat image2 = imread ( samples : : findFile ( input2 ) ) ;
if ( image2 . empty ( ) )
{
std : : cerr < < " Cannot read image2: " < < input2 < < std : : endl ;
return 2 ;
}
tm . reset ( ) ;
tm . start ( ) ;
detector - > setInputSize ( image2 . size ( ) ) ;
Mat faces2 ;
detector - > detect ( image2 , faces2 ) ;
if ( faces2 . rows < 1 )
{
std : : cerr < < " Cannot find a face in " < < input2 < < std : : endl ;
return 1 ;
}
tm . stop ( ) ;
visualize ( image2 , - 1 , faces2 , tm . getFPS ( ) ) ;
if ( save )
{
cout < < " Saving result2.jpg... \n " ;
imwrite ( " result2.jpg " , image2 ) ;
}
imshow ( " image2 " , image2 ) ;
pollKey ( ) ;
//! [initialize_FaceRecognizerSF]
// Initialize FaceRecognizerSF
Ptr < FaceRecognizerSF > faceRecognizer = FaceRecognizerSF : : create ( fr_modelPath , " " ) ;
//! [initialize_FaceRecognizerSF]
//! [facerecognizer]
// Aligning and cropping facial image through the first face of faces detected.
Mat aligned_face1 , aligned_face2 ;
faceRecognizer - > alignCrop ( image1 , faces1 . row ( 0 ) , aligned_face1 ) ;
faceRecognizer - > alignCrop ( image2 , faces2 . row ( 0 ) , aligned_face2 ) ;
// Run feature extraction with given aligned_face
Mat feature1 , feature2 ;
faceRecognizer - > feature ( aligned_face1 , feature1 ) ;
feature1 = feature1 . clone ( ) ;
faceRecognizer - > feature ( aligned_face2 , feature2 ) ;
feature2 = feature2 . clone ( ) ;
//! [facerecognizer]
//! [match]
double cos_score = faceRecognizer - > match ( feature1 , feature2 , FaceRecognizerSF : : DisType : : FR_COSINE ) ;
double L2_score = faceRecognizer - > match ( feature1 , feature2 , FaceRecognizerSF : : DisType : : FR_NORM_L2 ) ;
//! [match]
if ( cos_score > = cosine_similar_thresh )
{
std : : cout < < " They have the same identity; " ;
}
else
{
std : : cout < < " They have different identities; " ;
}
std : : cout < < " Cosine Similarity: " < < cos_score < < " , threshold: " < < cosine_similar_thresh < < " . (higher value means higher similarity, max 1.0) \n " ;
if ( L2_score < = l2norm_similar_thresh )
{
std : : cout < < " They have the same identity; " ;
}
else
{
std : : cout < < " They have different identities. " ;
}
std : : cout < < " NormL2 Distance: " < < L2_score < < " , threshold: " < < l2norm_similar_thresh < < " . (lower value means higher similarity, min 0.0) \n " ;
}
cout < < " Press any key to exit... " < < endl ;
waitKey ( 0 ) ;
}
else
{
int frameWidth , frameHeight ;
VideoCapture capture ;
std : : string video = parser . get < string > ( " video " ) ;
if ( video . size ( ) = = 1 & & isdigit ( video [ 0 ] ) )
capture . open ( parser . get < int > ( " video " ) ) ;
else
capture . open ( samples : : findFileOrKeep ( video ) ) ; // keep GStreamer pipelines
if ( capture . isOpened ( ) )
{
frameWidth = int ( capture . get ( CAP_PROP_FRAME_WIDTH ) * scale ) ;
frameHeight = int ( capture . get ( CAP_PROP_FRAME_HEIGHT ) * scale ) ;
cout < < " Video " < < video
< < " : width= " < < frameWidth
< < " , height= " < < frameHeight
< < endl ;
}
else
{
cout < < " Could not initialize video capturing: " < < video < < " \n " ;
return 1 ;
}
detector - > setInputSize ( Size ( frameWidth , frameHeight ) ) ;
cout < < " Press 'SPACE' to save frame, any other key to exit... " < < endl ;
int nFrame = 0 ;
for ( ; ; )
{
// Get frame
Mat frame ;
if ( ! capture . read ( frame ) )
{
cerr < < " Can't grab frame! Stop \n " ;
break ;
}
resize ( frame , frame , Size ( frameWidth , frameHeight ) ) ;
// Inference
Mat faces ;
tm . start ( ) ;
detector - > detect ( frame , faces ) ;
tm . stop ( ) ;
Mat result = frame . clone ( ) ;
// Draw results on the input image
visualize ( result , nFrame , faces , tm . getFPS ( ) ) ;
// Visualize results
imshow ( " Live " , result ) ;
int key = waitKey ( 1 ) ;
bool saveFrame = save ;
if ( key = = ' ' )
{
saveFrame = true ;
key = 0 ; // handled
}
if ( saveFrame )
{
std : : string frame_name = cv : : format ( " frame_%05d.png " , nFrame ) ;
std : : string result_name = cv : : format ( " result_%05d.jpg " , nFrame ) ;
cout < < " Saving ' " < < frame_name < < " ' and ' " < < result_name < < " ' ... \n " ;
imwrite ( frame_name , frame ) ;
imwrite ( result_name , result ) ;
}
+ + nFrame ;
if ( key > 0 )
break ;
}
cout < < " Processed " < < nFrame < < " frames " < < endl ;
}
cout < < " Done. " < < endl ;
return 0 ;
}