|
|
|
@ -6,6 +6,7 @@ |
|
|
|
|
|
|
|
|
|
#include "opencv2/imgproc.hpp" |
|
|
|
|
#include "opencv2/core.hpp" |
|
|
|
|
|
|
|
|
|
#ifdef HAVE_OPENCV_DNN |
|
|
|
|
#include "opencv2/dnn.hpp" |
|
|
|
|
#endif |
|
|
|
@ -27,6 +28,8 @@ public: |
|
|
|
|
int top_k, |
|
|
|
|
int backend_id, |
|
|
|
|
int target_id) |
|
|
|
|
:divisor(32), |
|
|
|
|
strides({8, 16, 32}) |
|
|
|
|
{ |
|
|
|
|
net = dnn::readNet(model, config); |
|
|
|
|
CV_Assert(!net.empty()); |
|
|
|
@ -37,18 +40,20 @@ public: |
|
|
|
|
inputW = input_size.width; |
|
|
|
|
inputH = input_size.height; |
|
|
|
|
|
|
|
|
|
padW = (int((inputW - 1) / divisor) + 1) * divisor; |
|
|
|
|
padH = (int((inputH - 1) / divisor) + 1) * divisor; |
|
|
|
|
|
|
|
|
|
scoreThreshold = score_threshold; |
|
|
|
|
nmsThreshold = nms_threshold; |
|
|
|
|
topK = top_k; |
|
|
|
|
|
|
|
|
|
generatePriors(); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void setInputSize(const Size& input_size) override |
|
|
|
|
{ |
|
|
|
|
inputW = input_size.width; |
|
|
|
|
inputH = input_size.height; |
|
|
|
|
generatePriors(); |
|
|
|
|
padW = ((inputW - 1) / divisor + 1) * divisor; |
|
|
|
|
padH = ((inputH - 1) / divisor + 1) * divisor; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
Size getInputSize() override |
|
|
|
@ -97,12 +102,14 @@ public: |
|
|
|
|
return 0; |
|
|
|
|
} |
|
|
|
|
CV_CheckEQ(input_image.size(), Size(inputW, inputH), "Size does not match. Call setInputSize(size) if input size does not match the preset size"); |
|
|
|
|
// Pad input_image with divisor 32
|
|
|
|
|
Mat pad_image = padWithDivisor(input_image); |
|
|
|
|
|
|
|
|
|
// Build blob from input image
|
|
|
|
|
Mat input_blob = dnn::blobFromImage(input_image); |
|
|
|
|
Mat input_blob = dnn::blobFromImage(pad_image); |
|
|
|
|
|
|
|
|
|
// Forward
|
|
|
|
|
std::vector<String> output_names = { "loc", "conf", "iou" }; |
|
|
|
|
std::vector<String> output_names = { "cls_8", "cls_16", "cls_32", "obj_8", "obj_16", "obj_32", "bbox_8", "bbox_16", "bbox_32", "kps_8", "kps_16", "kps_32" }; |
|
|
|
|
std::vector<Mat> output_blobs; |
|
|
|
|
net.setInput(input_blob); |
|
|
|
|
net.forward(output_blobs, output_names); |
|
|
|
@ -113,126 +120,70 @@ public: |
|
|
|
|
return 1; |
|
|
|
|
} |
|
|
|
|
private: |
|
|
|
|
void generatePriors() |
|
|
|
|
{ |
|
|
|
|
// Calculate shapes of different scales according to the shape of input image
|
|
|
|
|
Size feature_map_2nd = { |
|
|
|
|
int(int((inputW+1)/2)/2), int(int((inputH+1)/2)/2) |
|
|
|
|
}; |
|
|
|
|
Size feature_map_3rd = { |
|
|
|
|
int(feature_map_2nd.width/2), int(feature_map_2nd.height/2) |
|
|
|
|
}; |
|
|
|
|
Size feature_map_4th = { |
|
|
|
|
int(feature_map_3rd.width/2), int(feature_map_3rd.height/2) |
|
|
|
|
}; |
|
|
|
|
Size feature_map_5th = { |
|
|
|
|
int(feature_map_4th.width/2), int(feature_map_4th.height/2) |
|
|
|
|
}; |
|
|
|
|
Size feature_map_6th = { |
|
|
|
|
int(feature_map_5th.width/2), int(feature_map_5th.height/2) |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
std::vector<Size> feature_map_sizes; |
|
|
|
|
feature_map_sizes.push_back(feature_map_3rd); |
|
|
|
|
feature_map_sizes.push_back(feature_map_4th); |
|
|
|
|
feature_map_sizes.push_back(feature_map_5th); |
|
|
|
|
feature_map_sizes.push_back(feature_map_6th); |
|
|
|
|
|
|
|
|
|
// Fixed params for generating priors
|
|
|
|
|
const std::vector<std::vector<float>> min_sizes = { |
|
|
|
|
{10.0f, 16.0f, 24.0f}, |
|
|
|
|
{32.0f, 48.0f}, |
|
|
|
|
{64.0f, 96.0f}, |
|
|
|
|
{128.0f, 192.0f, 256.0f} |
|
|
|
|
}; |
|
|
|
|
CV_Assert(min_sizes.size() == feature_map_sizes.size()); // just to keep vectors in sync
|
|
|
|
|
const std::vector<int> steps = { 8, 16, 32, 64 }; |
|
|
|
|
|
|
|
|
|
// Generate priors
|
|
|
|
|
priors.clear(); |
|
|
|
|
for (size_t i = 0; i < feature_map_sizes.size(); ++i) |
|
|
|
|
{ |
|
|
|
|
Size feature_map_size = feature_map_sizes[i]; |
|
|
|
|
std::vector<float> min_size = min_sizes[i]; |
|
|
|
|
|
|
|
|
|
for (int _h = 0; _h < feature_map_size.height; ++_h) |
|
|
|
|
{ |
|
|
|
|
for (int _w = 0; _w < feature_map_size.width; ++_w) |
|
|
|
|
{ |
|
|
|
|
for (size_t j = 0; j < min_size.size(); ++j) |
|
|
|
|
{ |
|
|
|
|
float s_kx = min_size[j] / inputW; |
|
|
|
|
float s_ky = min_size[j] / inputH; |
|
|
|
|
|
|
|
|
|
float cx = (_w + 0.5f) * steps[i] / inputW; |
|
|
|
|
float cy = (_h + 0.5f) * steps[i] / inputH; |
|
|
|
|
|
|
|
|
|
Rect2f prior = { cx, cy, s_kx, s_ky }; |
|
|
|
|
priors.push_back(prior); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
Mat postProcess(const std::vector<Mat>& output_blobs) |
|
|
|
|
{ |
|
|
|
|
// Extract from output_blobs
|
|
|
|
|
Mat loc = output_blobs[0]; |
|
|
|
|
Mat conf = output_blobs[1]; |
|
|
|
|
Mat iou = output_blobs[2]; |
|
|
|
|
|
|
|
|
|
// Decode from deltas and priors
|
|
|
|
|
const std::vector<float> variance = {0.1f, 0.2f}; |
|
|
|
|
float* loc_v = (float*)(loc.data); |
|
|
|
|
float* conf_v = (float*)(conf.data); |
|
|
|
|
float* iou_v = (float*)(iou.data); |
|
|
|
|
Mat faces; |
|
|
|
|
// (tl_x, tl_y, w, h, re_x, re_y, le_x, le_y, nt_x, nt_y, rcm_x, rcm_y, lcm_x, lcm_y, score)
|
|
|
|
|
// 'tl': top left point of the bounding box
|
|
|
|
|
// 're': right eye, 'le': left eye
|
|
|
|
|
// 'nt': nose tip
|
|
|
|
|
// 'rcm': right corner of mouth, 'lcm': left corner of mouth
|
|
|
|
|
Mat face(1, 15, CV_32FC1); |
|
|
|
|
for (size_t i = 0; i < priors.size(); ++i) { |
|
|
|
|
// Get score
|
|
|
|
|
float clsScore = conf_v[i*2+1]; |
|
|
|
|
float iouScore = iou_v[i]; |
|
|
|
|
// Clamp
|
|
|
|
|
if (iouScore < 0.f) { |
|
|
|
|
iouScore = 0.f; |
|
|
|
|
} |
|
|
|
|
else if (iouScore > 1.f) { |
|
|
|
|
iouScore = 1.f; |
|
|
|
|
for (size_t i = 0; i < strides.size(); ++i) { |
|
|
|
|
int cols = int(padW / strides[i]); |
|
|
|
|
int rows = int(padH / strides[i]); |
|
|
|
|
|
|
|
|
|
// Extract from output_blobs
|
|
|
|
|
Mat cls = output_blobs[i]; |
|
|
|
|
Mat obj = output_blobs[i + strides.size() * 1]; |
|
|
|
|
Mat bbox = output_blobs[i + strides.size() * 2]; |
|
|
|
|
Mat kps = output_blobs[i + strides.size() * 3]; |
|
|
|
|
|
|
|
|
|
// Decode from predictions
|
|
|
|
|
float* cls_v = (float*)(cls.data); |
|
|
|
|
float* obj_v = (float*)(obj.data); |
|
|
|
|
float* bbox_v = (float*)(bbox.data); |
|
|
|
|
float* kps_v = (float*)(kps.data); |
|
|
|
|
|
|
|
|
|
// (tl_x, tl_y, w, h, re_x, re_y, le_x, le_y, nt_x, nt_y, rcm_x, rcm_y, lcm_x, lcm_y, score)
|
|
|
|
|
// 'tl': top left point of the bounding box
|
|
|
|
|
// 're': right eye, 'le': left eye
|
|
|
|
|
// 'nt': nose tip
|
|
|
|
|
// 'rcm': right corner of mouth, 'lcm': left corner of mouth
|
|
|
|
|
Mat face(1, 15, CV_32FC1); |
|
|
|
|
|
|
|
|
|
for(int r = 0; r < rows; ++r) { |
|
|
|
|
for(int c = 0; c < cols; ++c) { |
|
|
|
|
size_t idx = r * cols + c; |
|
|
|
|
|
|
|
|
|
// Get score
|
|
|
|
|
float cls_score = cls_v[idx]; |
|
|
|
|
float obj_score = obj_v[idx]; |
|
|
|
|
|
|
|
|
|
// Clamp
|
|
|
|
|
cls_score = MIN(cls_score, 1.f); |
|
|
|
|
cls_score = MAX(cls_score, 0.f); |
|
|
|
|
obj_score = MIN(obj_score, 1.f); |
|
|
|
|
obj_score = MAX(obj_score, 0.f); |
|
|
|
|
float score = std::sqrt(cls_score * obj_score); |
|
|
|
|
face.at<float>(0, 14) = score; |
|
|
|
|
|
|
|
|
|
// Get bounding box
|
|
|
|
|
float cx = ((c + bbox_v[idx * 4 + 0]) * strides[i]); |
|
|
|
|
float cy = ((r + bbox_v[idx * 4 + 1]) * strides[i]); |
|
|
|
|
float w = exp(bbox_v[idx * 4 + 2]) * strides[i]; |
|
|
|
|
float h = exp(bbox_v[idx * 4 + 3]) * strides[i]; |
|
|
|
|
|
|
|
|
|
float x1 = cx - w / 2.f; |
|
|
|
|
float y1 = cy - h / 2.f; |
|
|
|
|
|
|
|
|
|
face.at<float>(0, 0) = x1; |
|
|
|
|
face.at<float>(0, 1) = y1; |
|
|
|
|
face.at<float>(0, 2) = w; |
|
|
|
|
face.at<float>(0, 3) = h; |
|
|
|
|
|
|
|
|
|
// Get landmarks
|
|
|
|
|
for(int n = 0; n < 5; ++n) { |
|
|
|
|
face.at<float>(0, 4 + 2 * n) = (kps_v[idx * 10 + 2 * n] + c) * strides[i]; |
|
|
|
|
face.at<float>(0, 4 + 2 * n + 1) = (kps_v[idx * 10 + 2 * n + 1]+ r) * strides[i]; |
|
|
|
|
} |
|
|
|
|
faces.push_back(face); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
float score = std::sqrt(clsScore * iouScore); |
|
|
|
|
face.at<float>(0, 14) = score; |
|
|
|
|
|
|
|
|
|
// Get bounding box
|
|
|
|
|
float cx = (priors[i].x + loc_v[i*14+0] * variance[0] * priors[i].width) * inputW; |
|
|
|
|
float cy = (priors[i].y + loc_v[i*14+1] * variance[0] * priors[i].height) * inputH; |
|
|
|
|
float w = priors[i].width * exp(loc_v[i*14+2] * variance[0]) * inputW; |
|
|
|
|
float h = priors[i].height * exp(loc_v[i*14+3] * variance[1]) * inputH; |
|
|
|
|
float x1 = cx - w / 2; |
|
|
|
|
float y1 = cy - h / 2; |
|
|
|
|
face.at<float>(0, 0) = x1; |
|
|
|
|
face.at<float>(0, 1) = y1; |
|
|
|
|
face.at<float>(0, 2) = w; |
|
|
|
|
face.at<float>(0, 3) = h; |
|
|
|
|
|
|
|
|
|
// Get landmarks
|
|
|
|
|
face.at<float>(0, 4) = (priors[i].x + loc_v[i*14+ 4] * variance[0] * priors[i].width) * inputW; // right eye, x
|
|
|
|
|
face.at<float>(0, 5) = (priors[i].y + loc_v[i*14+ 5] * variance[0] * priors[i].height) * inputH; // right eye, y
|
|
|
|
|
face.at<float>(0, 6) = (priors[i].x + loc_v[i*14+ 6] * variance[0] * priors[i].width) * inputW; // left eye, x
|
|
|
|
|
face.at<float>(0, 7) = (priors[i].y + loc_v[i*14+ 7] * variance[0] * priors[i].height) * inputH; // left eye, y
|
|
|
|
|
face.at<float>(0, 8) = (priors[i].x + loc_v[i*14+ 8] * variance[0] * priors[i].width) * inputW; // nose tip, x
|
|
|
|
|
face.at<float>(0, 9) = (priors[i].y + loc_v[i*14+ 9] * variance[0] * priors[i].height) * inputH; // nose tip, y
|
|
|
|
|
face.at<float>(0, 10) = (priors[i].x + loc_v[i*14+10] * variance[0] * priors[i].width) * inputW; // right corner of mouth, x
|
|
|
|
|
face.at<float>(0, 11) = (priors[i].y + loc_v[i*14+11] * variance[0] * priors[i].height) * inputH; // right corner of mouth, y
|
|
|
|
|
face.at<float>(0, 12) = (priors[i].x + loc_v[i*14+12] * variance[0] * priors[i].width) * inputW; // left corner of mouth, x
|
|
|
|
|
face.at<float>(0, 13) = (priors[i].y + loc_v[i*14+13] * variance[0] * priors[i].height) * inputH; // left corner of mouth, y
|
|
|
|
|
|
|
|
|
|
faces.push_back(face); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (faces.rows > 1) |
|
|
|
@ -265,16 +216,27 @@ private: |
|
|
|
|
return faces; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
Mat padWithDivisor(InputArray& input_image) |
|
|
|
|
{ |
|
|
|
|
int bottom = padH - inputH; |
|
|
|
|
int right = padW - inputW; |
|
|
|
|
Mat pad_image; |
|
|
|
|
copyMakeBorder(input_image, pad_image, 0, bottom, 0, right, BORDER_CONSTANT, 0); |
|
|
|
|
return pad_image; |
|
|
|
|
} |
|
|
|
|
private: |
|
|
|
|
dnn::Net net; |
|
|
|
|
|
|
|
|
|
int inputW; |
|
|
|
|
int inputH; |
|
|
|
|
int padW; |
|
|
|
|
int padH; |
|
|
|
|
const int divisor; |
|
|
|
|
int topK; |
|
|
|
|
float scoreThreshold; |
|
|
|
|
float nmsThreshold; |
|
|
|
|
int topK; |
|
|
|
|
|
|
|
|
|
std::vector<Rect2f> priors; |
|
|
|
|
const std::vector<int> strides; |
|
|
|
|
}; |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|