Open Source Computer Vision Library
https://opencv.org/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
317 lines
11 KiB
317 lines
11 KiB
// This file is part of OpenCV project. |
|
// It is subject to the license terms in the LICENSE file found in the top-level directory |
|
// of this distribution and at http://opencv.org/license.html. |
|
|
|
#include "precomp.hpp" |
|
|
|
#include "opencv2/imgproc.hpp" |
|
#include "opencv2/core.hpp" |
|
|
|
#ifdef HAVE_OPENCV_DNN |
|
#include "opencv2/dnn.hpp" |
|
#endif |
|
|
|
#include <algorithm> |
|
|
|
namespace cv |
|
{ |
|
|
|
#ifdef HAVE_OPENCV_DNN |
|
class FaceDetectorYNImpl : public FaceDetectorYN |
|
{ |
|
public: |
|
FaceDetectorYNImpl(const String& model, |
|
const String& config, |
|
const Size& input_size, |
|
float score_threshold, |
|
float nms_threshold, |
|
int top_k, |
|
int backend_id, |
|
int target_id) |
|
:divisor(32), |
|
strides({8, 16, 32}) |
|
{ |
|
net = dnn::readNet(model, config); |
|
CV_Assert(!net.empty()); |
|
|
|
net.setPreferableBackend(backend_id); |
|
net.setPreferableTarget(target_id); |
|
|
|
inputW = input_size.width; |
|
inputH = input_size.height; |
|
|
|
padW = (int((inputW - 1) / divisor) + 1) * divisor; |
|
padH = (int((inputH - 1) / divisor) + 1) * divisor; |
|
|
|
scoreThreshold = score_threshold; |
|
nmsThreshold = nms_threshold; |
|
topK = top_k; |
|
} |
|
|
|
FaceDetectorYNImpl(const String& framework, |
|
const std::vector<uchar>& bufferModel, |
|
const std::vector<uchar>& bufferConfig, |
|
const Size& input_size, |
|
float score_threshold, |
|
float nms_threshold, |
|
int top_k, |
|
int backend_id, |
|
int target_id) |
|
:divisor(32), |
|
strides({8, 16, 32}) |
|
{ |
|
net = dnn::readNet(framework, bufferModel, bufferConfig); |
|
CV_Assert(!net.empty()); |
|
|
|
net.setPreferableBackend(backend_id); |
|
net.setPreferableTarget(target_id); |
|
|
|
inputW = input_size.width; |
|
inputH = input_size.height; |
|
|
|
padW = (int((inputW - 1) / divisor) + 1) * divisor; |
|
padH = (int((inputH - 1) / divisor) + 1) * divisor; |
|
|
|
scoreThreshold = score_threshold; |
|
nmsThreshold = nms_threshold; |
|
topK = top_k; |
|
} |
|
|
|
void setInputSize(const Size& input_size) override |
|
{ |
|
inputW = input_size.width; |
|
inputH = input_size.height; |
|
padW = ((inputW - 1) / divisor + 1) * divisor; |
|
padH = ((inputH - 1) / divisor + 1) * divisor; |
|
} |
|
|
|
Size getInputSize() override |
|
{ |
|
Size input_size; |
|
input_size.width = inputW; |
|
input_size.height = inputH; |
|
return input_size; |
|
} |
|
|
|
void setScoreThreshold(float score_threshold) override |
|
{ |
|
scoreThreshold = score_threshold; |
|
} |
|
|
|
float getScoreThreshold() override |
|
{ |
|
return scoreThreshold; |
|
} |
|
|
|
void setNMSThreshold(float nms_threshold) override |
|
{ |
|
nmsThreshold = nms_threshold; |
|
} |
|
|
|
float getNMSThreshold() override |
|
{ |
|
return nmsThreshold; |
|
} |
|
|
|
void setTopK(int top_k) override |
|
{ |
|
topK = top_k; |
|
} |
|
|
|
int getTopK() override |
|
{ |
|
return topK; |
|
} |
|
|
|
int detect(InputArray input_image, OutputArray faces) override |
|
{ |
|
// TODO: more checkings should be done? |
|
if (input_image.empty()) |
|
{ |
|
return 0; |
|
} |
|
CV_CheckEQ(input_image.size(), Size(inputW, inputH), "Size does not match. Call setInputSize(size) if input size does not match the preset size"); |
|
|
|
Mat input_blob; |
|
if(input_image.kind() == _InputArray::UMAT) { |
|
// Pad input_image with divisor 32 |
|
UMat pad_image; |
|
padWithDivisor(input_image, pad_image); |
|
// Build blob from input image |
|
input_blob = dnn::blobFromImage(pad_image); |
|
} else { |
|
// Pad input_image with divisor 32 |
|
Mat pad_image; |
|
padWithDivisor(input_image, pad_image); |
|
// Build blob from input image |
|
input_blob = dnn::blobFromImage(pad_image); |
|
} |
|
// Forward |
|
std::vector<String> output_names = { "cls_8", "cls_16", "cls_32", "obj_8", "obj_16", "obj_32", "bbox_8", "bbox_16", "bbox_32", "kps_8", "kps_16", "kps_32" }; |
|
std::vector<Mat> output_blobs; |
|
net.setInput(input_blob); |
|
net.forward(output_blobs, output_names); |
|
|
|
// Post process |
|
Mat results = postProcess(output_blobs); |
|
results.convertTo(faces, CV_32FC1); |
|
return 1; |
|
} |
|
private: |
|
Mat postProcess(const std::vector<Mat>& output_blobs) |
|
{ |
|
Mat faces; |
|
for (size_t i = 0; i < strides.size(); ++i) { |
|
int cols = int(padW / strides[i]); |
|
int rows = int(padH / strides[i]); |
|
|
|
// Extract from output_blobs |
|
Mat cls = output_blobs[i]; |
|
Mat obj = output_blobs[i + strides.size() * 1]; |
|
Mat bbox = output_blobs[i + strides.size() * 2]; |
|
Mat kps = output_blobs[i + strides.size() * 3]; |
|
|
|
// Decode from predictions |
|
float* cls_v = (float*)(cls.data); |
|
float* obj_v = (float*)(obj.data); |
|
float* bbox_v = (float*)(bbox.data); |
|
float* kps_v = (float*)(kps.data); |
|
|
|
// (tl_x, tl_y, w, h, re_x, re_y, le_x, le_y, nt_x, nt_y, rcm_x, rcm_y, lcm_x, lcm_y, score) |
|
// 'tl': top left point of the bounding box |
|
// 're': right eye, 'le': left eye |
|
// 'nt': nose tip |
|
// 'rcm': right corner of mouth, 'lcm': left corner of mouth |
|
Mat face(1, 15, CV_32FC1); |
|
|
|
for(int r = 0; r < rows; ++r) { |
|
for(int c = 0; c < cols; ++c) { |
|
size_t idx = r * cols + c; |
|
|
|
// Get score |
|
float cls_score = cls_v[idx]; |
|
float obj_score = obj_v[idx]; |
|
|
|
// Clamp |
|
cls_score = MIN(cls_score, 1.f); |
|
cls_score = MAX(cls_score, 0.f); |
|
obj_score = MIN(obj_score, 1.f); |
|
obj_score = MAX(obj_score, 0.f); |
|
float score = std::sqrt(cls_score * obj_score); |
|
face.at<float>(0, 14) = score; |
|
|
|
// Checking if the score meets the threshold before adding the face |
|
if (score < scoreThreshold) |
|
continue; |
|
// Get bounding box |
|
float cx = ((c + bbox_v[idx * 4 + 0]) * strides[i]); |
|
float cy = ((r + bbox_v[idx * 4 + 1]) * strides[i]); |
|
float w = exp(bbox_v[idx * 4 + 2]) * strides[i]; |
|
float h = exp(bbox_v[idx * 4 + 3]) * strides[i]; |
|
|
|
float x1 = cx - w / 2.f; |
|
float y1 = cy - h / 2.f; |
|
|
|
face.at<float>(0, 0) = x1; |
|
face.at<float>(0, 1) = y1; |
|
face.at<float>(0, 2) = w; |
|
face.at<float>(0, 3) = h; |
|
|
|
// Get landmarks |
|
for(int n = 0; n < 5; ++n) { |
|
face.at<float>(0, 4 + 2 * n) = (kps_v[idx * 10 + 2 * n] + c) * strides[i]; |
|
face.at<float>(0, 4 + 2 * n + 1) = (kps_v[idx * 10 + 2 * n + 1]+ r) * strides[i]; |
|
} |
|
faces.push_back(face); |
|
} |
|
} |
|
} |
|
|
|
if (faces.rows > 1) |
|
{ |
|
// Retrieve boxes and scores |
|
std::vector<Rect2i> faceBoxes; |
|
std::vector<float> faceScores; |
|
for (int rIdx = 0; rIdx < faces.rows; rIdx++) |
|
{ |
|
faceBoxes.push_back(Rect2i(int(faces.at<float>(rIdx, 0)), |
|
int(faces.at<float>(rIdx, 1)), |
|
int(faces.at<float>(rIdx, 2)), |
|
int(faces.at<float>(rIdx, 3)))); |
|
faceScores.push_back(faces.at<float>(rIdx, 14)); |
|
} |
|
|
|
std::vector<int> keepIdx; |
|
dnn::NMSBoxes(faceBoxes, faceScores, scoreThreshold, nmsThreshold, keepIdx, 1.f, topK); |
|
|
|
// Get NMS results |
|
Mat nms_faces; |
|
for (int idx: keepIdx) |
|
{ |
|
nms_faces.push_back(faces.row(idx)); |
|
} |
|
return nms_faces; |
|
} |
|
else |
|
{ |
|
return faces; |
|
} |
|
} |
|
|
|
void padWithDivisor(InputArray input_image, OutputArray pad_image) |
|
{ |
|
int bottom = padH - inputH; |
|
int right = padW - inputW; |
|
copyMakeBorder(input_image, pad_image, 0, bottom, 0, right, BORDER_CONSTANT, 0); |
|
} |
|
private: |
|
dnn::Net net; |
|
|
|
int inputW; |
|
int inputH; |
|
int padW; |
|
int padH; |
|
const int divisor; |
|
int topK; |
|
float scoreThreshold; |
|
float nmsThreshold; |
|
const std::vector<int> strides; |
|
}; |
|
#endif |
|
|
|
Ptr<FaceDetectorYN> FaceDetectorYN::create(const String& model, |
|
const String& config, |
|
const Size& input_size, |
|
const float score_threshold, |
|
const float nms_threshold, |
|
const int top_k, |
|
const int backend_id, |
|
const int target_id) |
|
{ |
|
#ifdef HAVE_OPENCV_DNN |
|
return makePtr<FaceDetectorYNImpl>(model, config, input_size, score_threshold, nms_threshold, top_k, backend_id, target_id); |
|
#else |
|
CV_UNUSED(model); CV_UNUSED(config); CV_UNUSED(input_size); CV_UNUSED(score_threshold); CV_UNUSED(nms_threshold); CV_UNUSED(top_k); CV_UNUSED(backend_id); CV_UNUSED(target_id); |
|
CV_Error(cv::Error::StsNotImplemented, "cv::FaceDetectorYN requires enabled 'dnn' module."); |
|
#endif |
|
} |
|
|
|
Ptr<FaceDetectorYN> FaceDetectorYN::create(const String& framework, |
|
const std::vector<uchar>& bufferModel, |
|
const std::vector<uchar>& bufferConfig, |
|
const Size& input_size, |
|
const float score_threshold, |
|
const float nms_threshold, |
|
const int top_k, |
|
const int backend_id, |
|
const int target_id) |
|
{ |
|
#ifdef HAVE_OPENCV_DNN |
|
return makePtr<FaceDetectorYNImpl>(framework, bufferModel, bufferConfig, input_size, score_threshold, nms_threshold, top_k, backend_id, target_id); |
|
#else |
|
CV_UNUSED(bufferModel); CV_UNUSED(bufferConfig); CV_UNUSED(input_size); CV_UNUSED(score_threshold); CV_UNUSED(nms_threshold); CV_UNUSED(top_k); CV_UNUSED(backend_id); CV_UNUSED(target_id); |
|
CV_Error(cv::Error::StsNotImplemented, "cv::FaceDetectorYN requires enabled 'dnn' module."); |
|
#endif |
|
} |
|
|
|
} // namespace cv
|
|
|