mirror of https://github.com/opencv/opencv.git
Merge pull request #22808 from zihaomu:nanotrack
[teset data in opencv_extra](https://github.com/opencv/opencv_extra/pull/1016) NanoTrack is an extremely lightweight and fast object-tracking model. The total size is **1.1 MB**. And the FPS on M1 chip is **150**, on Raspberry Pi 4 is about **30**. (Float32 CPU only) With this model, many users can run object tracking on the edge device. The author of NanoTrack is @HonglinChu. The original repo is https://github.com/HonglinChu/NanoTrack. ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMakepull/18377/merge
parent
b16f76eede
commit
cb8f1dca3b
6 changed files with 655 additions and 43 deletions
@ -0,0 +1,359 @@ |
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
// This file is modified from the https://github.com/HonglinChu/NanoTrack/blob/master/ncnn_macos_nanotrack/nanotrack.cpp
|
||||||
|
// Author, HongLinChu, 1628464345@qq.com
|
||||||
|
// Adapt to OpenCV, ZihaoMu: zihaomu@outlook.com
|
||||||
|
|
||||||
|
// Link to original inference code: https://github.com/HonglinChu/NanoTrack
|
||||||
|
// Link to original training repo: https://github.com/HonglinChu/SiamTrackers/tree/master/NanoTrack
|
||||||
|
|
||||||
|
#include "../precomp.hpp" |
||||||
|
#ifdef HAVE_OPENCV_DNN |
||||||
|
#include "opencv2/dnn.hpp" |
||||||
|
#endif |
||||||
|
|
||||||
|
namespace cv { |
||||||
|
|
||||||
|
TrackerNano::TrackerNano() |
||||||
|
{ |
||||||
|
// nothing
|
||||||
|
} |
||||||
|
|
||||||
|
TrackerNano::~TrackerNano() |
||||||
|
{ |
||||||
|
// nothing
|
||||||
|
} |
||||||
|
|
||||||
|
TrackerNano::Params::Params() |
||||||
|
{ |
||||||
|
backbone = "backbone.onnx"; |
||||||
|
neckhead = "neckhead.onnx"; |
||||||
|
#ifdef HAVE_OPENCV_DNN |
||||||
|
backend = dnn::DNN_BACKEND_DEFAULT; |
||||||
|
target = dnn::DNN_TARGET_CPU; |
||||||
|
#else |
||||||
|
backend = -1; // invalid value
|
||||||
|
target = -1; // invalid value
|
||||||
|
#endif |
||||||
|
} |
||||||
|
|
||||||
|
#ifdef HAVE_OPENCV_DNN |
||||||
|
static void softmax(const Mat& src, Mat& dst) |
||||||
|
{ |
||||||
|
Mat maxVal; |
||||||
|
cv::max(src.row(1), src.row(0), maxVal); |
||||||
|
|
||||||
|
src.row(1) -= maxVal; |
||||||
|
src.row(0) -= maxVal; |
||||||
|
|
||||||
|
exp(src, dst); |
||||||
|
|
||||||
|
Mat sumVal = dst.row(0) + dst.row(1); |
||||||
|
dst.row(0) = dst.row(0) / sumVal; |
||||||
|
dst.row(1) = dst.row(1) / sumVal; |
||||||
|
} |
||||||
|
|
||||||
|
static float sizeCal(float w, float h) |
||||||
|
{ |
||||||
|
float pad = (w + h) * 0.5f; |
||||||
|
float sz2 = (w + pad) * (h + pad); |
||||||
|
return sqrt(sz2); |
||||||
|
} |
||||||
|
|
||||||
|
static Mat sizeCal(const Mat& w, const Mat& h) |
||||||
|
{ |
||||||
|
Mat pad = (w + h) * 0.5; |
||||||
|
Mat sz2 = (w + pad).mul((h + pad)); |
||||||
|
|
||||||
|
cv::sqrt(sz2, sz2); |
||||||
|
return sz2; |
||||||
|
} |
||||||
|
|
||||||
|
// Similar python code: r = np.maximum(r, 1. / r) # r is matrix
|
||||||
|
static void elementReciprocalMax(Mat& srcDst) |
||||||
|
{ |
||||||
|
size_t totalV = srcDst.total(); |
||||||
|
float* ptr = srcDst.ptr<float>(0); |
||||||
|
for (size_t i = 0; i < totalV; i++) |
||||||
|
{ |
||||||
|
float val = *(ptr + i); |
||||||
|
*(ptr + i) = std::max(val, 1.0f/val); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
class TrackerNanoImpl : public TrackerNano |
||||||
|
{ |
||||||
|
public: |
||||||
|
TrackerNanoImpl(const TrackerNano::Params& parameters) |
||||||
|
: params(parameters) |
||||||
|
{ |
||||||
|
backbone = dnn::readNet(params.backbone); |
||||||
|
neckhead = dnn::readNet(params.neckhead); |
||||||
|
|
||||||
|
CV_Assert(!backbone.empty()); |
||||||
|
CV_Assert(!neckhead.empty()); |
||||||
|
|
||||||
|
backbone.setPreferableBackend(params.backend); |
||||||
|
backbone.setPreferableTarget(params.target); |
||||||
|
neckhead.setPreferableBackend(params.backend); |
||||||
|
neckhead.setPreferableTarget(params.target); |
||||||
|
} |
||||||
|
|
||||||
|
void init(InputArray image, const Rect& boundingBox) CV_OVERRIDE; |
||||||
|
bool update(InputArray image, Rect& boundingBox) CV_OVERRIDE; |
||||||
|
float getTrackingScore() CV_OVERRIDE; |
||||||
|
|
||||||
|
// Save the target bounding box for each frame.
|
||||||
|
std::vector<float> targetSz = {0, 0}; // H and W of bounding box
|
||||||
|
std::vector<float> targetPos = {0, 0}; // center point of bounding box (x, y)
|
||||||
|
float tracking_score; |
||||||
|
|
||||||
|
TrackerNano::Params params; |
||||||
|
|
||||||
|
struct trackerConfig |
||||||
|
{ |
||||||
|
float windowInfluence = 0.455f; |
||||||
|
float lr = 0.37f; |
||||||
|
float contextAmount = 0.5; |
||||||
|
bool swapRB = true; |
||||||
|
int totalStride = 16; |
||||||
|
float penaltyK = 0.055f; |
||||||
|
}; |
||||||
|
|
||||||
|
protected: |
||||||
|
const int exemplarSize = 127; |
||||||
|
const int instanceSize = 255; |
||||||
|
|
||||||
|
trackerConfig trackState; |
||||||
|
int scoreSize; |
||||||
|
Size imgSize = {0, 0}; |
||||||
|
Mat hanningWindow; |
||||||
|
Mat grid2searchX, grid2searchY; |
||||||
|
|
||||||
|
dnn::Net backbone, neckhead; |
||||||
|
Mat image; |
||||||
|
|
||||||
|
void getSubwindow(Mat& dstCrop, Mat& srcImg, int originalSz, int resizeSz); |
||||||
|
void generateGrids(); |
||||||
|
}; |
||||||
|
|
||||||
|
void TrackerNanoImpl::generateGrids() |
||||||
|
{ |
||||||
|
int sz = scoreSize; |
||||||
|
const int sz2 = sz / 2; |
||||||
|
|
||||||
|
std::vector<float> x1Vec(sz, 0); |
||||||
|
|
||||||
|
for (int i = 0; i < sz; i++) |
||||||
|
{ |
||||||
|
x1Vec[i] = i - sz2; |
||||||
|
} |
||||||
|
|
||||||
|
Mat x1M(1, sz, CV_32FC1, x1Vec.data()); |
||||||
|
|
||||||
|
cv::repeat(x1M, sz, 1, grid2searchX); |
||||||
|
cv::repeat(x1M.t(), 1, sz, grid2searchY); |
||||||
|
|
||||||
|
grid2searchX *= trackState.totalStride; |
||||||
|
grid2searchY *= trackState.totalStride; |
||||||
|
|
||||||
|
grid2searchX += instanceSize/2; |
||||||
|
grid2searchY += instanceSize/2; |
||||||
|
} |
||||||
|
|
||||||
|
void TrackerNanoImpl::init(InputArray image_, const Rect &boundingBox_) |
||||||
|
{ |
||||||
|
scoreSize = (instanceSize - exemplarSize) / trackState.totalStride + 8; |
||||||
|
trackState = trackerConfig(); |
||||||
|
image = image_.getMat().clone(); |
||||||
|
|
||||||
|
// convert Rect2d from left-up to center.
|
||||||
|
targetPos[0] = float(boundingBox_.x) + float(boundingBox_.width) * 0.5f; |
||||||
|
targetPos[1] = float(boundingBox_.y) + float(boundingBox_.height) * 0.5f; |
||||||
|
|
||||||
|
targetSz[0] = float(boundingBox_.width); |
||||||
|
targetSz[1] = float(boundingBox_.height); |
||||||
|
|
||||||
|
imgSize = image.size(); |
||||||
|
|
||||||
|
// Extent the bounding box.
|
||||||
|
float sumSz = targetSz[0] + targetSz[1]; |
||||||
|
float wExtent = targetSz[0] + trackState.contextAmount * (sumSz); |
||||||
|
float hExtent = targetSz[1] + trackState.contextAmount * (sumSz); |
||||||
|
int sz = int(cv::sqrt(wExtent * hExtent)); |
||||||
|
|
||||||
|
Mat crop; |
||||||
|
getSubwindow(crop, image, sz, exemplarSize); |
||||||
|
Mat blob = dnn::blobFromImage(crop, 1.0, Size(), Scalar(), trackState.swapRB); |
||||||
|
|
||||||
|
backbone.setInput(blob); |
||||||
|
Mat out = backbone.forward(); // Feature extraction.
|
||||||
|
neckhead.setInput(out, "input1"); |
||||||
|
|
||||||
|
createHanningWindow(hanningWindow, Size(scoreSize, scoreSize), CV_32F); |
||||||
|
generateGrids(); |
||||||
|
} |
||||||
|
|
||||||
|
void TrackerNanoImpl::getSubwindow(Mat& dstCrop, Mat& srcImg, int originalSz, int resizeSz) |
||||||
|
{ |
||||||
|
Scalar avgChans = mean(srcImg); |
||||||
|
Size imgSz = srcImg.size(); |
||||||
|
int c = (originalSz + 1) / 2; |
||||||
|
|
||||||
|
int context_xmin = targetPos[0] - c; |
||||||
|
int context_xmax = context_xmin + originalSz - 1; |
||||||
|
int context_ymin = targetPos[1] - c; |
||||||
|
int context_ymax = context_ymin + originalSz - 1; |
||||||
|
|
||||||
|
int left_pad = std::max(0, -context_xmin); |
||||||
|
int top_pad = std::max(0, -context_ymin); |
||||||
|
int right_pad = std::max(0, context_xmax - imgSz.width + 1); |
||||||
|
int bottom_pad = std::max(0, context_ymax - imgSz.height + 1); |
||||||
|
|
||||||
|
context_xmin += left_pad; |
||||||
|
context_xmax += left_pad; |
||||||
|
context_ymin += top_pad; |
||||||
|
context_ymax += top_pad; |
||||||
|
|
||||||
|
Mat cropImg; |
||||||
|
if (left_pad == 0 && top_pad == 0 && right_pad == 0 && bottom_pad == 0) |
||||||
|
{ |
||||||
|
// Crop image without padding.
|
||||||
|
cropImg = srcImg(cv::Rect(context_xmin, context_ymin, |
||||||
|
context_xmax - context_xmin + 1, context_ymax - context_ymin + 1)); |
||||||
|
} |
||||||
|
else // Crop image with padding, and the padding value is avgChans
|
||||||
|
{ |
||||||
|
cv::Mat tmpMat; |
||||||
|
cv::copyMakeBorder(srcImg, tmpMat, top_pad, bottom_pad, left_pad, right_pad, cv::BORDER_CONSTANT, avgChans); |
||||||
|
cropImg = tmpMat(cv::Rect(context_xmin, context_ymin, context_xmax - context_xmin + 1, context_ymax - context_ymin + 1)); |
||||||
|
} |
||||||
|
resize(cropImg, dstCrop, Size(resizeSz, resizeSz)); |
||||||
|
} |
||||||
|
|
||||||
|
bool TrackerNanoImpl::update(InputArray image_, Rect &boundingBoxRes) |
||||||
|
{ |
||||||
|
image = image_.getMat().clone(); |
||||||
|
int targetSzSum = targetSz[0] + targetSz[1]; |
||||||
|
|
||||||
|
float wc = targetSz[0] + trackState.contextAmount * targetSzSum; |
||||||
|
float hc = targetSz[1] + trackState.contextAmount * targetSzSum; |
||||||
|
float sz = cv::sqrt(wc * hc); |
||||||
|
float scale_z = exemplarSize / sz; |
||||||
|
float sx = sz * (instanceSize / exemplarSize); |
||||||
|
targetSz[0] *= scale_z; |
||||||
|
targetSz[1] *= scale_z; |
||||||
|
|
||||||
|
Mat crop; |
||||||
|
getSubwindow(crop, image, int(sx), instanceSize); |
||||||
|
|
||||||
|
Mat blob = dnn::blobFromImage(crop, 1.0, Size(), Scalar(), trackState.swapRB); |
||||||
|
backbone.setInput(blob); |
||||||
|
Mat xf = backbone.forward(); |
||||||
|
neckhead.setInput(xf, "input2"); |
||||||
|
std::vector<String> outputName = {"output1", "output2"}; |
||||||
|
std::vector<Mat> outs; |
||||||
|
neckhead.forward(outs, outputName); |
||||||
|
|
||||||
|
CV_Assert(outs.size() == 2); |
||||||
|
|
||||||
|
Mat clsScore = outs[0]; // 1x2x16x16
|
||||||
|
Mat bboxPred = outs[1]; // 1x4x16x16
|
||||||
|
|
||||||
|
clsScore = clsScore.reshape(0, {2, scoreSize, scoreSize}); |
||||||
|
bboxPred = bboxPred.reshape(0, {4, scoreSize, scoreSize}); |
||||||
|
|
||||||
|
Mat scoreSoftmax; // 2x16x16
|
||||||
|
softmax(clsScore, scoreSoftmax); |
||||||
|
|
||||||
|
Mat score = scoreSoftmax.row(1); |
||||||
|
score = score.reshape(0, {scoreSize, scoreSize}); |
||||||
|
|
||||||
|
Mat predX1 = grid2searchX - bboxPred.row(0).reshape(0, {scoreSize, scoreSize}); |
||||||
|
Mat predY1 = grid2searchY - bboxPred.row(1).reshape(0, {scoreSize, scoreSize}); |
||||||
|
Mat predX2 = grid2searchX + bboxPred.row(2).reshape(0, {scoreSize, scoreSize}); |
||||||
|
Mat predY2 = grid2searchY + bboxPred.row(3).reshape(0, {scoreSize, scoreSize}); |
||||||
|
|
||||||
|
// size penalty
|
||||||
|
// scale penalty
|
||||||
|
Mat sc = sizeCal(predX2 - predX1, predY2 - predY1)/sizeCal(targetPos[0], targetPos[1]); |
||||||
|
elementReciprocalMax(sc); |
||||||
|
|
||||||
|
// ratio penalty
|
||||||
|
float ratioVal = targetSz[0] / targetSz[1]; |
||||||
|
|
||||||
|
Mat ratioM(scoreSize, scoreSize, CV_32FC1, Scalar::all(ratioVal)); |
||||||
|
Mat rc = ratioM / ((predX2 - predX1) / (predY2 - predY1)); |
||||||
|
elementReciprocalMax(rc); |
||||||
|
|
||||||
|
Mat penalty; |
||||||
|
exp(((rc.mul(sc) - 1) * trackState.penaltyK * (-1)), penalty); |
||||||
|
Mat pscore = penalty.mul(score); |
||||||
|
|
||||||
|
// Window penalty
|
||||||
|
pscore = pscore * (1.0 - trackState.windowInfluence) + hanningWindow * trackState.windowInfluence; |
||||||
|
|
||||||
|
// get Max
|
||||||
|
int bestID[2] = { 0, 0 }; |
||||||
|
minMaxIdx(pscore, 0, 0, 0, bestID); |
||||||
|
|
||||||
|
tracking_score = pscore.at<float>(bestID); |
||||||
|
|
||||||
|
float x1Val = predX1.at<float>(bestID); |
||||||
|
float x2Val = predX2.at<float>(bestID); |
||||||
|
float y1Val = predY1.at<float>(bestID); |
||||||
|
float y2Val = predY2.at<float>(bestID); |
||||||
|
|
||||||
|
float predXs = (x1Val + x2Val)/2; |
||||||
|
float predYs = (y1Val + y2Val)/2; |
||||||
|
float predW = (x2Val - x1Val)/scale_z; |
||||||
|
float predH = (y2Val - y1Val)/scale_z; |
||||||
|
|
||||||
|
float diffXs = (predXs - instanceSize / 2) / scale_z; |
||||||
|
float diffYs = (predYs - instanceSize / 2) / scale_z; |
||||||
|
|
||||||
|
targetSz[0] /= scale_z; |
||||||
|
targetSz[1] /= scale_z; |
||||||
|
|
||||||
|
float lr = penalty.at<float>(bestID) * score.at<float>(bestID) * trackState.lr; |
||||||
|
|
||||||
|
float resX = targetPos[0] + diffXs; |
||||||
|
float resY = targetPos[1] + diffYs; |
||||||
|
float resW = predW * lr + (1 - lr) * targetSz[0]; |
||||||
|
float resH = predH * lr + (1 - lr) * targetSz[1]; |
||||||
|
|
||||||
|
resX = std::max(0.f, std::min((float)imgSize.width, resX)); |
||||||
|
resY = std::max(0.f, std::min((float)imgSize.height, resY)); |
||||||
|
resW = std::max(10.f, std::min((float)imgSize.width, resW)); |
||||||
|
resH = std::max(10.f, std::min((float)imgSize.height, resH)); |
||||||
|
|
||||||
|
targetPos[0] = resX; |
||||||
|
targetPos[1] = resY; |
||||||
|
targetSz[0] = resW; |
||||||
|
targetSz[1] = resH; |
||||||
|
|
||||||
|
// convert center to Rect.
|
||||||
|
boundingBoxRes = { int(resX - resW/2), int(resY - resH/2), int(resW), int(resH)}; |
||||||
|
return true; |
||||||
|
} |
||||||
|
|
||||||
|
float TrackerNanoImpl::getTrackingScore() |
||||||
|
{ |
||||||
|
return tracking_score; |
||||||
|
} |
||||||
|
|
||||||
|
Ptr<TrackerNano> TrackerNano::create(const TrackerNano::Params& parameters) |
||||||
|
{ |
||||||
|
return makePtr<TrackerNanoImpl>(parameters); |
||||||
|
} |
||||||
|
|
||||||
|
#else // OPENCV_HAVE_DNN
|
||||||
|
Ptr<TrackerNano> TrackerNano::create(const TrackerNano::Params& parameters) |
||||||
|
{ |
||||||
|
CV_UNUSED(parameters); |
||||||
|
CV_Error(cv::Error::StsNotImplemented, "to use NanoTrack, the tracking module needs to be built with opencv_dnn !"); |
||||||
|
} |
||||||
|
#endif // OPENCV_HAVE_DNN
|
||||||
|
} |
@ -0,0 +1,183 @@ |
|||||||
|
// NanoTrack
|
||||||
|
// Link to original inference code: https://github.com/HonglinChu/NanoTrack
|
||||||
|
// Link to original training repo: https://github.com/HonglinChu/SiamTrackers/tree/master/NanoTrack
|
||||||
|
// backBone model: https://github.com/HonglinChu/SiamTrackers/blob/master/NanoTrack/models/onnx/nanotrack_backbone_sim.onnx
|
||||||
|
// headNeck model: https://github.com/HonglinChu/SiamTrackers/blob/master/NanoTrack/models/onnx/nanotrack_head_sim.onnx
|
||||||
|
|
||||||
|
#include <iostream> |
||||||
|
#include <cmath> |
||||||
|
|
||||||
|
#include <opencv2/dnn.hpp> |
||||||
|
#include <opencv2/imgproc.hpp> |
||||||
|
#include <opencv2/highgui.hpp> |
||||||
|
#include <opencv2/video.hpp> |
||||||
|
|
||||||
|
using namespace cv; |
||||||
|
using namespace cv::dnn; |
||||||
|
|
||||||
|
const char *keys = |
||||||
|
"{ help h | | Print help message }" |
||||||
|
"{ input i | | Full path to input video folder, the specific camera index. (empty for camera 0) }" |
||||||
|
"{ backbone | backbone.onnx | Path to onnx model of backbone.onnx}" |
||||||
|
"{ headneck | headneck.onnx | Path to onnx model of headneck.onnx }" |
||||||
|
"{ backend | 0 | Choose one of computation backends: " |
||||||
|
"0: automatically (by default), " |
||||||
|
"1: Halide language (http://halide-lang.org/), " |
||||||
|
"2: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), " |
||||||
|
"3: OpenCV implementation, " |
||||||
|
"4: VKCOM, " |
||||||
|
"5: CUDA }," |
||||||
|
"{ target | 0 | Choose one of target computation devices: " |
||||||
|
"0: CPU target (by default), " |
||||||
|
"1: OpenCL, " |
||||||
|
"2: OpenCL fp16 (half-float precision), " |
||||||
|
"3: VPU, " |
||||||
|
"4: Vulkan, " |
||||||
|
"6: CUDA, " |
||||||
|
"7: CUDA fp16 (half-float preprocess) }" |
||||||
|
; |
||||||
|
|
||||||
|
static |
||||||
|
int run(int argc, char** argv) |
||||||
|
{ |
||||||
|
// Parse command line arguments.
|
||||||
|
CommandLineParser parser(argc, argv, keys); |
||||||
|
|
||||||
|
if (parser.has("help")) |
||||||
|
{ |
||||||
|
parser.printMessage(); |
||||||
|
return 0; |
||||||
|
} |
||||||
|
|
||||||
|
std::string inputName = parser.get<String>("input"); |
||||||
|
std::string backbone = parser.get<String>("backbone"); |
||||||
|
std::string headneck = parser.get<String>("headneck"); |
||||||
|
int backend = parser.get<int>("backend"); |
||||||
|
int target = parser.get<int>("target"); |
||||||
|
|
||||||
|
Ptr<TrackerNano> tracker; |
||||||
|
try |
||||||
|
{ |
||||||
|
TrackerNano::Params params; |
||||||
|
params.backbone = samples::findFile(backbone); |
||||||
|
params.neckhead = samples::findFile(headneck); |
||||||
|
params.backend = backend; |
||||||
|
params.target = target; |
||||||
|
tracker = TrackerNano::create(params); |
||||||
|
} |
||||||
|
catch (const cv::Exception& ee) |
||||||
|
{ |
||||||
|
std::cerr << "Exception: " << ee.what() << std::endl; |
||||||
|
std::cout << "Can't load the network by using the following files:" << std::endl; |
||||||
|
std::cout << "backbone : " << backbone << std::endl; |
||||||
|
std::cout << "headneck : " << headneck << std::endl; |
||||||
|
return 2; |
||||||
|
} |
||||||
|
|
||||||
|
const std::string winName = "NanoTrack"; |
||||||
|
namedWindow(winName, WINDOW_AUTOSIZE); |
||||||
|
|
||||||
|
// Open a video file or an image file or a camera stream.
|
||||||
|
VideoCapture cap; |
||||||
|
|
||||||
|
if (inputName.empty() || (isdigit(inputName[0]) && inputName.size() == 1)) |
||||||
|
{ |
||||||
|
int c = inputName.empty() ? 0 : inputName[0] - '0'; |
||||||
|
std::cout << "Trying to open camera #" << c << " ..." << std::endl; |
||||||
|
if (!cap.open(c)) |
||||||
|
{ |
||||||
|
std::cout << "Capture from camera #" << c << " didn't work. Specify -i=<video> parameter to read from video file" << std::endl; |
||||||
|
return 2; |
||||||
|
} |
||||||
|
} |
||||||
|
else if (inputName.size()) |
||||||
|
{ |
||||||
|
inputName = samples::findFileOrKeep(inputName); |
||||||
|
if (!cap.open(inputName)) |
||||||
|
{ |
||||||
|
std::cout << "Could not open: " << inputName << std::endl; |
||||||
|
return 2; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Read the first image.
|
||||||
|
Mat image; |
||||||
|
cap >> image; |
||||||
|
if (image.empty()) |
||||||
|
{ |
||||||
|
std::cerr << "Can't capture frame!" << std::endl; |
||||||
|
return 2; |
||||||
|
} |
||||||
|
|
||||||
|
Mat image_select = image.clone(); |
||||||
|
putText(image_select, "Select initial bounding box you want to track.", Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0)); |
||||||
|
putText(image_select, "And Press the ENTER key.", Point(0, 35), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0)); |
||||||
|
|
||||||
|
Rect selectRect = selectROI(winName, image_select); |
||||||
|
std::cout << "ROI=" << selectRect << std::endl; |
||||||
|
|
||||||
|
tracker->init(image, selectRect); |
||||||
|
|
||||||
|
TickMeter tickMeter; |
||||||
|
|
||||||
|
for (int count = 0; ; ++count) |
||||||
|
{ |
||||||
|
cap >> image; |
||||||
|
if (image.empty()) |
||||||
|
{ |
||||||
|
std::cerr << "Can't capture frame " << count << ". End of video stream?" << std::endl; |
||||||
|
break; |
||||||
|
} |
||||||
|
|
||||||
|
Rect rect; |
||||||
|
|
||||||
|
tickMeter.start(); |
||||||
|
bool ok = tracker->update(image, rect); |
||||||
|
tickMeter.stop(); |
||||||
|
|
||||||
|
float score = tracker->getTrackingScore(); |
||||||
|
|
||||||
|
std::cout << "frame " << count << |
||||||
|
": predicted score=" << score << |
||||||
|
" rect=" << rect << |
||||||
|
" time=" << tickMeter.getTimeMilli() << "ms" << |
||||||
|
std::endl; |
||||||
|
|
||||||
|
Mat render_image = image.clone(); |
||||||
|
|
||||||
|
if (ok) |
||||||
|
{ |
||||||
|
rectangle(render_image, rect, Scalar(0, 255, 0), 2); |
||||||
|
|
||||||
|
std::string timeLabel = format("Inference time: %.2f ms", tickMeter.getTimeMilli()); |
||||||
|
std::string scoreLabel = format("Score: %f", score); |
||||||
|
putText(render_image, timeLabel, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0)); |
||||||
|
putText(render_image, scoreLabel, Point(0, 35), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0)); |
||||||
|
} |
||||||
|
|
||||||
|
imshow(winName, render_image); |
||||||
|
|
||||||
|
tickMeter.reset(); |
||||||
|
|
||||||
|
int c = waitKey(1); |
||||||
|
if (c == 27 /*ESC*/) |
||||||
|
break; |
||||||
|
} |
||||||
|
|
||||||
|
std::cout << "Exit" << std::endl; |
||||||
|
return 0; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
int main(int argc, char **argv) |
||||||
|
{ |
||||||
|
try |
||||||
|
{ |
||||||
|
return run(argc, argv); |
||||||
|
} |
||||||
|
catch (const std::exception& e) |
||||||
|
{ |
||||||
|
std::cerr << "FATAL: C++ exception: " << e.what() << std::endl; |
||||||
|
return 1; |
||||||
|
} |
||||||
|
} |
Loading…
Reference in new issue