Merge pull request #15189 from dvd42:keypoints_module

Keypoints module
pull/16160/head
Diego 5 years ago committed by Alexander Alekhin
parent c2b6c67431
commit 5b0b59ecfb
  1. 32
      modules/dnn/include/opencv2/dnn/dnn.hpp
  2. 58
      modules/dnn/src/model.cpp
  3. 51
      modules/dnn/test/test_model.cpp

@ -1135,6 +1135,38 @@ CV__DNN_INLINE_NS_BEGIN
CV_WRAP void classify(InputArray frame, CV_OUT int& classId, CV_OUT float& conf);
};
/** @brief This class represents high-level API for keypoints models
*
* KeypointsModel allows to set params for preprocessing input image.
* KeypointsModel creates net from file with trained weights and config,
* sets preprocessing input, runs forward pass and returns the x and y coordinates of each detected keypoint
*/
class CV_EXPORTS_W KeypointsModel: public Model
{
public:
/**
* @brief Create keypoints model from network represented in one of the supported formats.
* An order of @p model and @p config arguments does not matter.
* @param[in] model Binary file contains trained weights.
* @param[in] config Text file contains network configuration.
*/
CV_WRAP KeypointsModel(const String& model, const String& config = "");
/**
* @brief Create model from deep learning network.
* @param[in] network Net object.
*/
CV_WRAP KeypointsModel(const Net& network);
/** @brief Given the @p input frame, create input blob, run net
* @param[in] frame The input image.
* @param thresh minimum confidence threshold to select a keypoint
* @returns a vector holding the x and y coordinates of each detected keypoint
*
*/
CV_WRAP std::vector<Point2f> estimate(InputArray frame, float thresh=0.5);
};
/** @brief This class represents high-level API for segmentation models
*
* SegmentationModel allows to set params for preprocessing input image.

@ -137,6 +137,64 @@ void ClassificationModel::classify(InputArray frame, int& classId, float& conf)
std::tie(classId, conf) = classify(frame);
}
KeypointsModel::KeypointsModel(const String& model, const String& config)
: Model(model, config) {};
KeypointsModel::KeypointsModel(const Net& network) : Model(network) {};
std::vector<Point2f> KeypointsModel::estimate(InputArray frame, float thresh)
{
int frameHeight = frame.getMat().size[0];
int frameWidth = frame.getMat().size[1];
std::vector<Mat> outs;
impl->predict(*this, frame.getMat(), outs);
CV_Assert(outs.size() == 1);
Mat output = outs[0];
const int nPoints = output.size[1];
std::vector<Point2f> points;
// If output is a map, extract the keypoints
if (output.dims == 4)
{
int height = output.size[2];
int width = output.size[3];
// find the position of the keypoints (ignore the background)
for (int n=0; n < nPoints - 1; n++)
{
// Probability map of corresponding keypoint
Mat probMap(height, width, CV_32F, output.ptr(0, n));
Point2f p(-1, -1);
Point maxLoc;
double prob;
minMaxLoc(probMap, NULL, &prob, NULL, &maxLoc);
if (prob > thresh)
{
p = maxLoc;
p.x *= (float)frameWidth / width;
p.y *= (float)frameHeight / height;
points.push_back(p);
}
}
}
// Otherwise the output is a vector of keypoints and we can just return it
else
{
for (int n=0; n < nPoints; n++)
{
Point2f p;
p.x = *output.ptr<float>(0, n, 0);
p.y = *output.ptr<float>(0, n, 1);
points.push_back(p);
}
}
return points;
}
SegmentationModel::SegmentationModel(const String& model, const String& config)
: Model(model, config) {};

@ -70,6 +70,25 @@ public:
ASSERT_NEAR(prediction.second, ref.second, norm);
}
void testKeypointsModel(const std::string& weights, const std::string& cfg,
const Mat& frame, const Mat& exp, float norm,
const Size& size = {-1, -1}, Scalar mean = Scalar(),
double scale = 1.0, bool swapRB = false, bool crop = false)
{
checkBackend();
std::vector<Point2f> points;
KeypointsModel model(weights, cfg);
model.setInputSize(size).setInputMean(mean).setInputScale(scale)
.setInputSwapRB(swapRB).setInputCrop(crop);
points = model.estimate(frame, 0.5);
Mat out = Mat(points).reshape(1);
normAssert(exp, out, "", norm, norm);
}
void testSegmentationModel(const std::string& weights_file, const std::string& config_file,
const std::string& inImgPath, const std::string& outImgPath,
float norm, const Size& size = {-1, -1}, Scalar mean = Scalar(),
@ -221,6 +240,38 @@ TEST_P(Test_Model, DetectionMobilenetSSD)
scoreDiff, iouDiff, confThreshold, nmsThreshold, size, mean, scale);
}
TEST_P(Test_Model, Keypoints_pose)
{
Mat inp = imread(_tf("pose.png"));
std::string weights = _tf("lightweight_pose_estimation.onnx");
Mat exp = blobFromNPY(_tf("keypoints_exp.npy"));
Size size{256, 256};
float norm = 1e-4;
double scale = 1.0/255;
Scalar mean = Scalar(128, 128, 128);
bool swapRB = false;
testKeypointsModel(weights, "", inp, exp, norm, size, mean, scale, swapRB);
}
TEST_P(Test_Model, Keypoints_face)
{
Mat inp = imread(_tf("gray_face.png"), 0);
std::string weights = _tf("facial_keypoints.onnx");
Mat exp = blobFromNPY(_tf("facial_keypoints_exp.npy"));
Size size{224, 224};
float norm = 1e-4;
double scale = 1.0/255;
Scalar mean = Scalar();
bool swapRB = false;
testKeypointsModel(weights, "", inp, exp, norm, size, mean, scale, swapRB);
}
TEST_P(Test_Model, Detection_normalized)
{
std::string img_path = _tf("grace_hopper_227.png");

Loading…
Cancel
Save