parent
1a1097ab23
commit
886319c81d
12 changed files with 489 additions and 20 deletions
@ -0,0 +1,155 @@ |
|||||||
|
.. _akazeTracking: |
||||||
|
|
||||||
|
|
||||||
|
AKAZE and ORB planar tracking |
||||||
|
****************************** |
||||||
|
|
||||||
|
Introduction |
||||||
|
------------------ |
||||||
|
|
||||||
|
In this tutorial we will compare *AKAZE* and *ORB* local features |
||||||
|
using them to find matches between video frames and track object movements. |
||||||
|
|
||||||
|
The algorithm is as follows: |
||||||
|
|
||||||
|
* Detect and describe keypoints on the first frame, manually set object boundaries |
||||||
|
* For every next frame: |
||||||
|
|
||||||
|
#. Detect and describe keypoints |
||||||
|
#. Match them using bruteforce matcher |
||||||
|
#. Estimate homography transformation using RANSAC |
||||||
|
#. Filter inliers from all the matches |
||||||
|
#. Apply homography transformation to the bounding box to find the object |
||||||
|
#. Draw bounding box and inliers, compute inlier ratio as evaluation metric |
||||||
|
|
||||||
|
.. image:: images/frame.png |
||||||
|
:height: 480pt |
||||||
|
:width: 640pt |
||||||
|
:alt: Result frame example |
||||||
|
:align: center |
||||||
|
|
||||||
|
Data |
||||||
|
=========== |
||||||
|
To do the tracking we need a video and object position on the first frame. |
||||||
|
|
||||||
|
You can download our example video and data from `here <https://docs.google.com/file/d/0B72G7D4snftJandBb0taLVJHMFk>`_. |
||||||
|
|
||||||
|
To run the code you have to specify input and output video path and object bounding box. |
||||||
|
|
||||||
|
.. code-block:: none |
||||||
|
|
||||||
|
./planar_tracking blais.mp4 result.avi blais_bb.xml.gz |
||||||
|
|
||||||
|
Source Code |
||||||
|
=========== |
||||||
|
.. literalinclude:: ../../../../samples/cpp/tutorial_code/features2D/AKAZE_tracking/planar_tracking.cpp |
||||||
|
:language: cpp |
||||||
|
:linenos: |
||||||
|
:tab-width: 4 |
||||||
|
|
||||||
|
Explanation |
||||||
|
=========== |
||||||
|
|
||||||
|
Tracker class |
||||||
|
-------------- |
||||||
|
|
||||||
|
This class implements algorithm described abobve |
||||||
|
using given feature detector and descriptor matcher. |
||||||
|
|
||||||
|
* **Setting up the first frame** |
||||||
|
|
||||||
|
.. code-block:: cpp |
||||||
|
|
||||||
|
void Tracker::setFirstFrame(const Mat frame, vector<Point2f> bb, string title, Stats& stats) |
||||||
|
{ |
||||||
|
first_frame = frame.clone(); |
||||||
|
(*detector)(first_frame, noArray(), first_kp, first_desc); |
||||||
|
stats.keypoints = (int)first_kp.size(); |
||||||
|
drawBoundingBox(first_frame, bb); |
||||||
|
putText(first_frame, title, Point(0, 60), FONT_HERSHEY_PLAIN, 5, Scalar::all(0), 4); |
||||||
|
object_bb = bb; |
||||||
|
} |
||||||
|
|
||||||
|
We compute and store keypoints and descriptors from the first frame and prepare it for the output. |
||||||
|
|
||||||
|
We need to save number of detected keypoints to make sure both detectors locate roughly the same number of those. |
||||||
|
|
||||||
|
* **Processing frames** |
||||||
|
|
||||||
|
#. Locate keypoints and compute descriptors |
||||||
|
|
||||||
|
.. code-block:: cpp |
||||||
|
|
||||||
|
(*detector)(frame, noArray(), kp, desc); |
||||||
|
|
||||||
|
To find matches between frames we have to locate the keypoints first. |
||||||
|
|
||||||
|
In this tutorial detectors are set up to find about 1000 keypoints on each frame. |
||||||
|
|
||||||
|
#. Use 2-nn matcher to find correspondences |
||||||
|
|
||||||
|
.. code-block:: cpp |
||||||
|
|
||||||
|
matcher->knnMatch(first_desc, desc, matches, 2); |
||||||
|
for(unsigned i = 0; i < matches.size(); i++) { |
||||||
|
if(matches[i][0].distance < nn_match_ratio * matches[i][1].distance) { |
||||||
|
matched1.push_back(first_kp[matches[i][0].queryIdx]); |
||||||
|
matched2.push_back( kp[matches[i][0].trainIdx]); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
If the closest match is *nn_match_ratio* closer than the second closest one, then it's a match. |
||||||
|
|
||||||
|
2. Use *RANSAC* to estimate homography transformation |
||||||
|
|
||||||
|
.. code-block:: cpp |
||||||
|
|
||||||
|
homography = findHomography(Points(matched1), Points(matched2), |
||||||
|
RANSAC, ransac_thresh, inlier_mask); |
||||||
|
|
||||||
|
If there are at least 4 matches we can use random sample consensus to estimate image transformation. |
||||||
|
|
||||||
|
3. Save the inliers |
||||||
|
|
||||||
|
.. code-block:: cpp |
||||||
|
|
||||||
|
for(unsigned i = 0; i < matched1.size(); i++) { |
||||||
|
if(inlier_mask.at<uchar>(i)) { |
||||||
|
int new_i = static_cast<int>(inliers1.size()); |
||||||
|
inliers1.push_back(matched1[i]); |
||||||
|
inliers2.push_back(matched2[i]); |
||||||
|
inlier_matches.push_back(DMatch(new_i, new_i, 0)); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
Since *findHomography* computes the inliers we only have to save the chosen points and matches. |
||||||
|
|
||||||
|
4. Project object bounding box |
||||||
|
|
||||||
|
.. code-block:: cpp |
||||||
|
|
||||||
|
perspectiveTransform(object_bb, new_bb, homography); |
||||||
|
|
||||||
|
If there is a reasonable number of inliers we can use estimated transformation to locate the object. |
||||||
|
|
||||||
|
Results |
||||||
|
======= |
||||||
|
You can watch the resulting `video on youtube <http://www.youtube.com/watch?v=LWY-w8AGGhE>`_. |
||||||
|
|
||||||
|
*AKAZE* statistics: |
||||||
|
|
||||||
|
.. code-block:: none |
||||||
|
|
||||||
|
Matches 626 |
||||||
|
Inliers 410 |
||||||
|
Inlier ratio 0.58 |
||||||
|
Keypoints 1117 |
||||||
|
|
||||||
|
*ORB* statistics: |
||||||
|
|
||||||
|
.. code-block:: none |
||||||
|
|
||||||
|
Matches 504 |
||||||
|
Inliers 319 |
||||||
|
Inlier ratio 0.56 |
||||||
|
Keypoints 1112 |
After Width: | Height: | Size: 318 KiB |
After Width: | Height: | Size: 31 KiB |
@ -0,0 +1,183 @@ |
|||||||
|
#include <opencv2/features2d.hpp> |
||||||
|
#include <opencv2/videoio.hpp> |
||||||
|
#include <opencv2/opencv.hpp> |
||||||
|
#include <vector> |
||||||
|
#include <iostream> |
||||||
|
#include <iomanip> |
||||||
|
|
||||||
|
#include "stats.h" // Stats structure definition |
||||||
|
#include "utils.h" // Drawing and printing functions |
||||||
|
|
||||||
|
using namespace std; |
||||||
|
using namespace cv; |
||||||
|
|
||||||
|
const double akaze_thresh = 3e-4; // AKAZE detection threshold set to locate about 1000 keypoints
|
||||||
|
const double ransac_thresh = 2.5f; // RANSAC inlier threshold
|
||||||
|
const double nn_match_ratio = 0.8f; // Nearest-neighbour matching ratio
|
||||||
|
const int bb_min_inliers = 100; // Minimal number of inliers to draw bounding box
|
||||||
|
const int stats_update_period = 10; // On-screen statistics are updated every 10 frames
|
||||||
|
|
||||||
|
class Tracker |
||||||
|
{ |
||||||
|
public: |
||||||
|
Tracker(Ptr<Feature2D> _detector, Ptr<DescriptorMatcher> _matcher) : |
||||||
|
detector(_detector), |
||||||
|
matcher(_matcher) |
||||||
|
{} |
||||||
|
|
||||||
|
void setFirstFrame(const Mat frame, vector<Point2f> bb, string title, Stats& stats); |
||||||
|
Mat process(const Mat frame, Stats& stats); |
||||||
|
Ptr<Feature2D> getDetector() { |
||||||
|
return detector; |
||||||
|
} |
||||||
|
protected: |
||||||
|
Ptr<Feature2D> detector; |
||||||
|
Ptr<DescriptorMatcher> matcher; |
||||||
|
Mat first_frame, first_desc; |
||||||
|
vector<KeyPoint> first_kp; |
||||||
|
vector<Point2f> object_bb; |
||||||
|
}; |
||||||
|
|
||||||
|
void Tracker::setFirstFrame(const Mat frame, vector<Point2f> bb, string title, Stats& stats) |
||||||
|
{ |
||||||
|
first_frame = frame.clone(); |
||||||
|
(*detector)(first_frame, noArray(), first_kp, first_desc); |
||||||
|
stats.keypoints = (int)first_kp.size(); |
||||||
|
drawBoundingBox(first_frame, bb); |
||||||
|
putText(first_frame, title, Point(0, 60), FONT_HERSHEY_PLAIN, 5, Scalar::all(0), 4); |
||||||
|
object_bb = bb; |
||||||
|
} |
||||||
|
|
||||||
|
Mat Tracker::process(const Mat frame, Stats& stats) |
||||||
|
{ |
||||||
|
vector<KeyPoint> kp; |
||||||
|
Mat desc; |
||||||
|
(*detector)(frame, noArray(), kp, desc); |
||||||
|
stats.keypoints = (int)kp.size(); |
||||||
|
|
||||||
|
vector< vector<DMatch> > matches; |
||||||
|
vector<KeyPoint> matched1, matched2; |
||||||
|
matcher->knnMatch(first_desc, desc, matches, 2); |
||||||
|
for(unsigned i = 0; i < matches.size(); i++) { |
||||||
|
if(matches[i][0].distance < nn_match_ratio * matches[i][1].distance) { |
||||||
|
matched1.push_back(first_kp[matches[i][0].queryIdx]); |
||||||
|
matched2.push_back( kp[matches[i][0].trainIdx]); |
||||||
|
} |
||||||
|
} |
||||||
|
stats.matches = (int)matched1.size(); |
||||||
|
|
||||||
|
Mat inlier_mask, homography; |
||||||
|
vector<KeyPoint> inliers1, inliers2; |
||||||
|
vector<DMatch> inlier_matches; |
||||||
|
if(matched1.size() >= 4) { |
||||||
|
homography = findHomography(Points(matched1), Points(matched2), |
||||||
|
RANSAC, ransac_thresh, inlier_mask); |
||||||
|
} |
||||||
|
|
||||||
|
if(matched1.size() < 4 || homography.empty()) { |
||||||
|
Mat res; |
||||||
|
hconcat(first_frame, frame, res); |
||||||
|
stats.inliers = 0; |
||||||
|
stats.ratio = 0; |
||||||
|
return res; |
||||||
|
} |
||||||
|
for(unsigned i = 0; i < matched1.size(); i++) { |
||||||
|
if(inlier_mask.at<uchar>(i)) { |
||||||
|
int new_i = static_cast<int>(inliers1.size()); |
||||||
|
inliers1.push_back(matched1[i]); |
||||||
|
inliers2.push_back(matched2[i]); |
||||||
|
inlier_matches.push_back(DMatch(new_i, new_i, 0)); |
||||||
|
} |
||||||
|
} |
||||||
|
stats.inliers = (int)inliers1.size(); |
||||||
|
stats.ratio = stats.inliers * 1.0 / stats.matches; |
||||||
|
|
||||||
|
vector<Point2f> new_bb; |
||||||
|
perspectiveTransform(object_bb, new_bb, homography); |
||||||
|
Mat frame_with_bb = frame.clone(); |
||||||
|
if(stats.inliers >= bb_min_inliers) { |
||||||
|
drawBoundingBox(frame_with_bb, new_bb); |
||||||
|
} |
||||||
|
Mat res; |
||||||
|
drawMatches(first_frame, inliers1, frame_with_bb, inliers2, |
||||||
|
inlier_matches, res, |
||||||
|
Scalar(255, 0, 0), Scalar(255, 0, 0)); |
||||||
|
return res; |
||||||
|
} |
||||||
|
|
||||||
|
int main(int argc, char **argv) |
||||||
|
{ |
||||||
|
if(argc < 4) { |
||||||
|
cerr << "Usage: " << endl << |
||||||
|
"akaze_track input_path output_path bounding_box" << endl; |
||||||
|
return 1; |
||||||
|
} |
||||||
|
VideoCapture video_in(argv[1]); |
||||||
|
VideoWriter video_out(argv[2], |
||||||
|
(int)video_in.get(CAP_PROP_FOURCC), |
||||||
|
(int)video_in.get(CAP_PROP_FPS), |
||||||
|
Size(2 * (int)video_in.get(CAP_PROP_FRAME_WIDTH), |
||||||
|
2 * (int)video_in.get(CAP_PROP_FRAME_HEIGHT))); |
||||||
|
|
||||||
|
if(!video_in.isOpened()) { |
||||||
|
cerr << "Couldn't open " << argv[1] << endl; |
||||||
|
return 1; |
||||||
|
} |
||||||
|
if(!video_out.isOpened()) { |
||||||
|
cerr << "Couldn't open " << argv[2] << endl; |
||||||
|
return 1; |
||||||
|
} |
||||||
|
|
||||||
|
vector<Point2f> bb; |
||||||
|
FileStorage fs(argv[3], FileStorage::READ); |
||||||
|
if(fs["bounding_box"].empty()) { |
||||||
|
cerr << "Couldn't read bounding_box from " << argv[3] << endl; |
||||||
|
return 1; |
||||||
|
} |
||||||
|
fs["bounding_box"] >> bb; |
||||||
|
Ptr<Feature2D> akaze = Feature2D::create("AKAZE"); |
||||||
|
akaze->set("threshold", akaze_thresh); |
||||||
|
Ptr<Feature2D> orb = Feature2D::create("ORB"); |
||||||
|
Ptr<DescriptorMatcher> matcher = DescriptorMatcher::create("BruteForce-Hamming"); |
||||||
|
Tracker akaze_tracker(akaze, matcher); |
||||||
|
Tracker orb_tracker(orb, matcher); |
||||||
|
|
||||||
|
Stats stats, akaze_stats, orb_stats; |
||||||
|
Mat frame; |
||||||
|
video_in >> frame; |
||||||
|
akaze_tracker.setFirstFrame(frame, bb, "AKAZE", stats); |
||||||
|
orb_tracker.getDetector()->set("nFeatures", stats.keypoints); |
||||||
|
orb_tracker.setFirstFrame(frame, bb, "ORB", stats); |
||||||
|
|
||||||
|
Stats akaze_draw_stats, orb_draw_stats; |
||||||
|
int frame_count = (int)video_in.get(CAP_PROP_FRAME_COUNT); |
||||||
|
Mat akaze_res, orb_res, res_frame; |
||||||
|
for(int i = 1; i < frame_count; i++) { |
||||||
|
bool update_stats = (i % stats_update_period == 0); |
||||||
|
video_in >> frame; |
||||||
|
|
||||||
|
akaze_res = akaze_tracker.process(frame, stats); |
||||||
|
akaze_stats += stats; |
||||||
|
if(update_stats) { |
||||||
|
akaze_draw_stats = stats; |
||||||
|
} |
||||||
|
|
||||||
|
orb_tracker.getDetector()->set("nFeatures", stats.keypoints); |
||||||
|
orb_res = orb_tracker.process(frame, stats); |
||||||
|
orb_stats += stats; |
||||||
|
if(update_stats) { |
||||||
|
orb_draw_stats = stats; |
||||||
|
} |
||||||
|
|
||||||
|
drawStatistics(akaze_res, akaze_draw_stats); |
||||||
|
drawStatistics(orb_res, orb_draw_stats); |
||||||
|
vconcat(akaze_res, orb_res, res_frame); |
||||||
|
video_out << res_frame; |
||||||
|
cout << i << "/" << frame_count - 1 << endl; |
||||||
|
} |
||||||
|
akaze_stats /= frame_count - 1; |
||||||
|
orb_stats /= frame_count - 1; |
||||||
|
printStatistics("AKAZE", akaze_stats); |
||||||
|
printStatistics("ORB", orb_stats); |
||||||
|
return 0; |
||||||
|
} |
@ -0,0 +1,34 @@ |
|||||||
|
#ifndef STATS_H |
||||||
|
#define STATS_H |
||||||
|
|
||||||
|
struct Stats |
||||||
|
{ |
||||||
|
int matches; |
||||||
|
int inliers; |
||||||
|
double ratio; |
||||||
|
int keypoints; |
||||||
|
|
||||||
|
Stats() : matches(0), |
||||||
|
inliers(0), |
||||||
|
ratio(0), |
||||||
|
keypoints(0) |
||||||
|
{} |
||||||
|
|
||||||
|
Stats& operator+=(const Stats& op) { |
||||||
|
matches += op.matches; |
||||||
|
inliers += op.inliers; |
||||||
|
ratio += op.ratio; |
||||||
|
keypoints += op.keypoints; |
||||||
|
return *this; |
||||||
|
} |
||||||
|
Stats& operator/=(int num) |
||||||
|
{ |
||||||
|
matches /= num; |
||||||
|
inliers /= num; |
||||||
|
ratio /= num; |
||||||
|
keypoints /= num; |
||||||
|
return *this; |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
#endif // STATS_H
|
@ -0,0 +1,59 @@ |
|||||||
|
#ifndef UTILS_H |
||||||
|
#define UTILS_H |
||||||
|
|
||||||
|
#include <opencv2/core.hpp> |
||||||
|
#include <vector> |
||||||
|
#include "stats.h" |
||||||
|
|
||||||
|
using namespace std; |
||||||
|
using namespace cv; |
||||||
|
|
||||||
|
void drawBoundingBox(Mat image, vector<Point2f> bb); |
||||||
|
void drawStatistics(Mat image, const Stats& stats); |
||||||
|
void printStatistics(string name, Stats stats); |
||||||
|
vector<Point2f> Points(vector<KeyPoint> keypoints); |
||||||
|
|
||||||
|
void drawBoundingBox(Mat image, vector<Point2f> bb) |
||||||
|
{ |
||||||
|
for(unsigned i = 0; i < bb.size() - 1; i++) { |
||||||
|
line(image, bb[i], bb[i + 1], Scalar(0, 0, 255), 2); |
||||||
|
} |
||||||
|
line(image, bb[bb.size() - 1], bb[0], Scalar(0, 0, 255), 2); |
||||||
|
} |
||||||
|
|
||||||
|
void drawStatistics(Mat image, const Stats& stats) |
||||||
|
{ |
||||||
|
static const int font = FONT_HERSHEY_PLAIN; |
||||||
|
stringstream str1, str2, str3; |
||||||
|
|
||||||
|
str1 << "Matches: " << stats.matches; |
||||||
|
str2 << "Inliers: " << stats.inliers; |
||||||
|
str3 << "Inlier ratio: " << setprecision(2) << stats.ratio; |
||||||
|
|
||||||
|
putText(image, str1.str(), Point(0, image.rows - 90), font, 2, Scalar::all(255), 3); |
||||||
|
putText(image, str2.str(), Point(0, image.rows - 60), font, 2, Scalar::all(255), 3); |
||||||
|
putText(image, str3.str(), Point(0, image.rows - 30), font, 2, Scalar::all(255), 3); |
||||||
|
} |
||||||
|
|
||||||
|
void printStatistics(string name, Stats stats) |
||||||
|
{ |
||||||
|
cout << name << endl; |
||||||
|
cout << "----------" << endl; |
||||||
|
|
||||||
|
cout << "Matches " << stats.matches << endl; |
||||||
|
cout << "Inliers " << stats.inliers << endl; |
||||||
|
cout << "Inlier ratio " << setprecision(2) << stats.ratio << endl; |
||||||
|
cout << "Keypoints " << stats.keypoints << endl; |
||||||
|
cout << endl; |
||||||
|
} |
||||||
|
|
||||||
|
vector<Point2f> Points(vector<KeyPoint> keypoints) |
||||||
|
{ |
||||||
|
vector<Point2f> res; |
||||||
|
for(unsigned i = 0; i < keypoints.size(); i++) { |
||||||
|
res.push_back(keypoints[i].pt); |
||||||
|
} |
||||||
|
return res; |
||||||
|
} |
||||||
|
|
||||||
|
#endif // UTILS_H
|
Loading…
Reference in new issue