diff --git a/modules/tracking/include/opencv2/tracking/tldDataset.hpp b/modules/tracking/include/opencv2/tracking/tldDataset.hpp index 6d21a7d48..d31919ab5 100644 --- a/modules/tracking/include/opencv2/tracking/tldDataset.hpp +++ b/modules/tracking/include/opencv2/tracking/tldDataset.hpp @@ -48,7 +48,7 @@ namespace cv { namespace tld { - CV_EXPORTS cv::Rect2d tld_InitDataset(int datasetInd, const char* rootPath = "TLD_dataset"); + CV_EXPORTS cv::Rect2d tld_InitDataset(int videoInd, const char* rootPath = "TLD_dataset", int datasetInd = 0); CV_EXPORTS cv::Mat tld_getNextDatasetFrame(); } } diff --git a/modules/tracking/include/opencv2/tracking/tracker.hpp b/modules/tracking/include/opencv2/tracking/tracker.hpp index 09ef94f66..04660d6a6 100644 --- a/modules/tracking/include/opencv2/tracking/tracker.hpp +++ b/modules/tracking/include/opencv2/tracking/tracker.hpp @@ -49,6 +49,7 @@ #include "onlineBoosting.hpp" #include + #define BOILERPLATE_CODE(name,classname) \ static Ptr createTracker(const classname::Params ¶meters=classname::Params());\ virtual ~classname(){}; @@ -564,6 +565,11 @@ class CV_EXPORTS_W Tracker : public virtual Algorithm virtual void read( const FileNode& fn )=0; virtual void write( FileStorage& fs ) const=0; + Ptr getModel() + { + return model; + } + protected: virtual bool initImpl( const Mat& image, const Rect2d& boundingBox ) = 0; @@ -576,6 +582,7 @@ class CV_EXPORTS_W Tracker : public virtual Algorithm Ptr model; }; + /************************************ Specific TrackerStateEstimator Classes ************************************/ /** @brief TrackerStateEstimator based on Boosting @@ -1243,6 +1250,85 @@ class CV_EXPORTS_W TrackerKCF : public Tracker BOILERPLATE_CODE("KCF",TrackerKCF); }; +/************************************ Multi-Tracker Classes ************************************/ + +/** @brief Base abstract class for the long-term Multi Object Trackers: + +@sa Tracker, MultiTrackerTLD +*/ +class CV_EXPORTS_W MultiTracker +{ +public: + /** @brief Constructor for Multitracker + */ + MultiTracker() + { + targetNum = 0; + } + + /** @brief Add a new target to a tracking-list and initialize the tracker with a know bounding box that surrounding the target + @param image The initial frame + @param boundingBox The initial boundig box of target + @param tracker_algorithm_name Multi-tracker algorithm name + + @return True if new target initialization went succesfully, false otherwise + */ + bool addTarget(const Mat& image, const Rect2d& boundingBox, char* tracker_algorithm_name); + + /** @brief Update all trackers from the tracking-list, find a new most likely bounding boxes for the targets + @param image The current frame + + @return True means that all targets were located and false means that tracker couldn't locate one of the targets in + current frame. Note, that latter *does not* imply that tracker has failed, maybe target is indeed + missing from the frame (say, out of sight) + */ + bool update(const Mat& image); + + /** @brief Current number of targets in tracking-list + */ + int targetNum; + + /** @brief Trackers list for Multi-Object-Tracker + */ + std::vector > trackers; + + /** @brief Bounding Boxes list for Multi-Object-Tracker + */ + std::vector boundingBoxes; + /** @brief List of randomly generated colors for bounding boxes display + */ + std::vector colors; +}; + +/** @brief Multi Object Tracker for TLD. TLD is a novel tracking framework that explicitly decomposes +the long-term tracking task into tracking, learning and detection. + +The tracker follows the object from frame to frame. The detector localizes all appearances that +have been observed so far and corrects the tracker if necessary. The learning estimates detector’s +errors and updates it to avoid these errors in the future. The implementation is based on @cite TLD . + +The Median Flow algorithm (see cv::TrackerMedianFlow) was chosen as a tracking component in this +implementation, following authors. Tracker is supposed to be able to handle rapid motions, partial +occlusions, object absence etc. + +@sa Tracker, MultiTracker, TrackerTLD +*/ +class CV_EXPORTS_W MultiTrackerTLD : public MultiTracker +{ +public: + /** @brief Update all trackers from the tracking-list, find a new most likely bounding boxes for the targets by + optimized update method using some techniques to speedup calculations specifically for MO TLD. The only limitation + is that all target bounding boxes should have approximately same aspect ratios. Speed boost is around 20% + + @param image The current frame. + + @return True means that all targets were located and false means that tracker couldn't locate one of the targets in + current frame. Note, that latter *does not* imply that tracker has failed, maybe target is indeed + missing from the frame (say, out of sight) + */ + bool update_opt(const Mat& image); +}; + //! @} } /* namespace cv */ diff --git a/modules/tracking/samples/multiTracker_test.cpp b/modules/tracking/samples/multiTracker_test.cpp new file mode 100644 index 000000000..68cefc7e3 --- /dev/null +++ b/modules/tracking/samples/multiTracker_test.cpp @@ -0,0 +1,226 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include +#include +#include +#include +#include + +using namespace std; +using namespace cv; + +#define NUM_TEST_FRAMES 100 +#define TEST_VIDEO_INDEX 15 //TLD Dataset Video Index from 1-10 for TLD and 1-60 for VOT +//#define RECORD_VIDEO_FLG + +static Mat image; +static bool paused; +static bool selectObject = false; +static bool startSelection = false; +Rect2d boundingBox; + +static void onMouse(int event, int x, int y, int, void*) +{ + if (!selectObject) + { + switch (event) + { + case EVENT_LBUTTONDOWN: + //set origin of the bounding box + startSelection = true; + boundingBox.x = x; + boundingBox.y = y; + boundingBox.width = boundingBox.height = 0; + break; + case EVENT_LBUTTONUP: + //sei with and height of the bounding box + boundingBox.width = std::abs(x - boundingBox.x); + boundingBox.height = std::abs(y - boundingBox.y); + paused = false; + selectObject = true; + break; + case EVENT_MOUSEMOVE: + + if (startSelection && !selectObject) + { + //draw the bounding box + Mat currentFrame; + image.copyTo(currentFrame); + rectangle(currentFrame, Point((int)boundingBox.x, (int)boundingBox.y), Point(x, y), Scalar(255, 0, 0), 2, 1); + imshow("Tracking API", currentFrame); + } + break; + } + } +} + +int main() +{ + // + // "MIL", "BOOSTING", "MEDIANFLOW", "TLD" + // + char* tracker_algorithm_name = (char*)"TLD"; + + Mat frame; + paused = false; + namedWindow("Tracking API", 0); + setMouseCallback("Tracking API", onMouse, 0); + + MultiTrackerTLD mt; + + //Get the first frame + ////Open the capture + // VideoCapture cap(0); + // if( !cap.isOpened() ) + // { + // cout << "Video stream error"; + // return; + // } + //cap >> frame; + + //From TLD dataset + selectObject = true; + Rect2d boundingBox1 = tld::tld_InitDataset(TEST_VIDEO_INDEX, "D:/opencv/VOT 2015", 1); + Rect2d boundingBox2(470, 490, 50, 120); + + frame = tld::tld_getNextDatasetFrame(); + frame.copyTo(image); + + // Setup output video +#ifdef RECORD_VIDEO_FLG + String outputFilename = "test.avi"; + VideoWriter outputVideo; + outputVideo.open(outputFilename, -1, 15, Size(image.cols, image.rows)); + + if (!outputVideo.isOpened()) + { + std::cout << "!!! Output video could not be opened" << std::endl; + getchar(); + return 0; + + } +#endif + + rectangle(image, boundingBox, Scalar(255, 0, 0), 2, 1); + imshow("Tracking API", image); + + + bool initialized = false; + int frameCounter = 0; + + //Time measurment + int64 e3 = getTickCount(); + + for (;;) + { + //Time measurment + int64 e1 = getTickCount(); + //Frame num + frameCounter++; + if (frameCounter == NUM_TEST_FRAMES) break; + + char c = (char)waitKey(2); + if (c == 'q' || c == 27) + break; + if (c == 'p') + paused = !paused; + + if (!paused) + { + //cap >> frame; + frame = tld::tld_getNextDatasetFrame(); + if (frame.empty()) + { + break; + } + frame.copyTo(image); + + if (selectObject) + { + if (!initialized) + { + //initializes the tracker + mt.addTarget(frame, boundingBox1, tracker_algorithm_name); + rectangle(frame, boundingBox1, mt.colors[0], 2, 1); + + + mt.addTarget(frame, boundingBox2, tracker_algorithm_name); + rectangle(frame, boundingBox2, mt.colors[1], 2, 1); + initialized = true; + } + else + { + //updates the tracker + if (mt.update(frame)) + { + for (int i = 0; i < mt.targetNum; i++) + rectangle(frame, mt.boundingBoxes[i], mt.colors[i], 2, 1); + } + } + } + imshow("Tracking API", frame); + +#ifdef RECORD_VIDEO_FLG + outputVideo << frame; +#endif + + + //Time measurment + int64 e2 = getTickCount(); + double t1 = (e2 - e1) / getTickFrequency(); + cout << frameCounter << "\tframe : " << t1 * 1000.0 << "ms" << endl; + + //waitKey(0); + } + } + + //Time measurment + int64 e4 = getTickCount(); + double t2 = (e4 - e3) / getTickFrequency(); + cout << "Average Time for Frame: " << t2 * 1000.0 / frameCounter << "ms" << endl; + cout << "Average FPS: " << 1.0 / t2*frameCounter << endl; + + + waitKey(0); + + return 0; +} \ No newline at end of file diff --git a/modules/tracking/src/multiTracker.cpp b/modules/tracking/src/multiTracker.cpp new file mode 100644 index 000000000..3053f2174 --- /dev/null +++ b/modules/tracking/src/multiTracker.cpp @@ -0,0 +1,648 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "multiTracker.hpp" + +namespace cv +{ + //Multitracker + bool MultiTracker::addTarget(const Mat& image, const Rect2d& boundingBox, char* tracker_algorithm_name) + { + Ptr tracker = Tracker::create(tracker_algorithm_name); + if (tracker == NULL) + return false; + + if (!tracker->init(image, boundingBox)) + return false; + + //Add BB of target + boundingBoxes.push_back(boundingBox); + + //Add Tracker to stack + trackers.push_back(tracker); + + //Assign a random color to target + if (targetNum == 1) + colors.push_back(Scalar(0, 0, 255)); + else + colors.push_back(Scalar(rand() % 256, rand() % 256, rand() % 256)); + + + + //Target counter + targetNum++; + + return true; + } + + bool MultiTracker::update(const Mat& image) + { + for (int i = 0; i < (int)trackers.size(); i++) + if (!trackers[i]->update(image, boundingBoxes[i])) + return false; + + return true; + } + + //Multitracker TLD + /*Optimized update method for TLD Multitracker */ + bool MultiTrackerTLD::update_opt(const Mat& image) + { + //Get parameters from first object + //TLD Tracker data extraction + Tracker* trackerPtr = trackers[0]; + tld::TrackerTLDImpl* tracker = static_cast(trackerPtr); + //TLD Model Extraction + tld::TrackerTLDModel* tldModel = ((tld::TrackerTLDModel*)static_cast(tracker->getModel())); + Ptr data = tracker->data; + double scale = data->getScale(); + + Mat image_gray, image_blurred, imageForDetector; + cvtColor(image, image_gray, COLOR_BGR2GRAY); + + if (scale > 1.0) + resize(image_gray, imageForDetector, Size(cvRound(image.cols*scale), cvRound(image.rows*scale)), 0, 0, tld::DOWNSCALE_MODE); + else + imageForDetector = image_gray; + GaussianBlur(imageForDetector, image_blurred, tld::GaussBlurKernelSize, 0.0); + + //best overlap around 92% + Mat_ standardPatch(tld::STANDARD_PATCH_SIZE, tld::STANDARD_PATCH_SIZE); + + std::vector > detectorResults(targetNum); + std::vector > candidates(targetNum); + std::vector > candidatesRes(targetNum); + std::vector tmpCandidates(targetNum); + std::vector detect_flgs(targetNum); + std::vector trackerNeedsReInit(targetNum); + + bool DETECT_FLG = false; + + //Detect all + for (int k = 0; k < targetNum; k++) + tmpCandidates[k] = boundingBoxes[k]; + if (ocl::haveOpenCL()) + ocl_detect_all(imageForDetector, image_blurred, tmpCandidates, detectorResults, detect_flgs, trackers); + else + detect_all(imageForDetector, image_blurred, tmpCandidates, detectorResults, detect_flgs, trackers); + + for (int k = 0; k < targetNum; k++) + { + //TLD Tracker data extraction + trackerPtr = trackers[k]; + tracker = static_cast(trackerPtr); + //TLD Model Extraction + tldModel = ((tld::TrackerTLDModel*)static_cast(tracker->getModel())); + data = tracker->data; + + data->frameNum++; + + for (int i = 0; i < 2; i++) + { + Rect2d tmpCandid = boundingBoxes[k]; + + //if (i == 1) + { + DETECT_FLG = detect_flgs[k]; + tmpCandid = tmpCandidates[k]; + } + + if (((i == 0) && !data->failedLastTime && tracker->trackerProxy->update(image, tmpCandid)) || (DETECT_FLG)) + { + candidates[k].push_back(tmpCandid); + if (i == 0) + tld::resample(image_gray, tmpCandid, standardPatch); + else + tld::resample(imageForDetector, tmpCandid, standardPatch); + candidatesRes[k].push_back(tldModel->detector->Sc(standardPatch)); + } + else + { + if (i == 0) + trackerNeedsReInit[k] = true; + else + trackerNeedsReInit[k] = false; + } + } + + std::vector::iterator it = std::max_element(candidatesRes[k].begin(), candidatesRes[k].end()); + + + if (it == candidatesRes[k].end()) + { + + data->confident = false; + data->failedLastTime = true; + return false; + } + else + { + boundingBoxes[k] = candidates[k][it - candidatesRes[k].begin()]; + data->failedLastTime = false; + if (trackerNeedsReInit[k] || it != candidatesRes[k].begin()) + tracker->trackerProxy->init(image, boundingBoxes[k]); + } + +#if 1 + if (it != candidatesRes[k].end()) + tld::resample(imageForDetector, candidates[k][it - candidatesRes[k].begin()], standardPatch); +#endif + + if (*it > tld::CORE_THRESHOLD) + data->confident = true; + + if (data->confident) + { + tld::TrackerTLDImpl::Pexpert pExpert(imageForDetector, image_blurred, boundingBoxes[k], tldModel->detector, tracker->params, data->getMinSize()); + tld::TrackerTLDImpl::Nexpert nExpert(imageForDetector, boundingBoxes[k], tldModel->detector, tracker->params); + std::vector > examplesForModel, examplesForEnsemble; + examplesForModel.reserve(100); examplesForEnsemble.reserve(100); + int negRelabeled = 0; + for (int i = 0; i < (int)detectorResults[k].size(); i++) + { + bool expertResult; + if (detectorResults[k][i].isObject) + { + expertResult = nExpert(detectorResults[k][i].rect); + if (expertResult != detectorResults[k][i].isObject) + negRelabeled++; + } + else + { + expertResult = pExpert(detectorResults[k][i].rect); + } + + detectorResults[k][i].shouldBeIntegrated = detectorResults[k][i].shouldBeIntegrated || (detectorResults[k][i].isObject != expertResult); + detectorResults[k][i].isObject = expertResult; + } + tldModel->integrateRelabeled(imageForDetector, image_blurred, detectorResults[k]); + pExpert.additionalExamples(examplesForModel, examplesForEnsemble); + if (ocl::haveOpenCL()) + tldModel->ocl_integrateAdditional(examplesForModel, examplesForEnsemble, true); + else + tldModel->integrateAdditional(examplesForModel, examplesForEnsemble, true); + examplesForModel.clear(); examplesForEnsemble.clear(); + nExpert.additionalExamples(examplesForModel, examplesForEnsemble); + + if (ocl::haveOpenCL()) + tldModel->ocl_integrateAdditional(examplesForModel, examplesForEnsemble, false); + else + tldModel->integrateAdditional(examplesForModel, examplesForEnsemble, false); + } + else + { +#ifdef CLOSED_LOOP + tldModel->integrateRelabeled(imageForDetector, image_blurred, detectorResults); +#endif + } + + + } + + return true; + } + + void detect_all(const Mat& img, const Mat& imgBlurred, std::vector& res, std::vector < std::vector < tld::TLDDetector::LabeledPatch > > &patches, std::vector &detect_flgs, + std::vector > &trackers) + { + //TLD Tracker data extraction + Tracker* trackerPtr = trackers[0]; + cv::tld::TrackerTLDImpl* tracker = static_cast(trackerPtr); + //TLD Model Extraction + tld::TrackerTLDModel* tldModel = ((tld::TrackerTLDModel*)static_cast(tracker->getModel())); + Size initSize = tldModel->getMinSize(); + + for (int k = 0; k < (int)trackers.size(); k++) + patches[k].clear(); + + Mat_ standardPatch(tld::STANDARD_PATCH_SIZE, tld::STANDARD_PATCH_SIZE); + Mat tmp; + int dx = initSize.width / 10, dy = initSize.height / 10; + Size2d size = img.size(); + double scale = 1.0; + int npos = 0, nneg = 0; + double maxSc = -5.0; + Rect2d maxScRect; + int scaleID; + std::vector resized_imgs, blurred_imgs; + + std::vector > varBuffer(trackers.size()), ensBuffer(trackers.size()); + std::vector > varScaleIDs(trackers.size()), ensScaleIDs(trackers.size()); + + std::vector tmpP; + std::vector tmpI; + + //Detection part + //Generate windows and filter by variance + scaleID = 0; + resized_imgs.push_back(img); + blurred_imgs.push_back(imgBlurred); + do + { + Mat_ intImgP, intImgP2; + tld::TLDDetector::computeIntegralImages(resized_imgs[scaleID], intImgP, intImgP2); + for (int i = 0, imax = cvFloor((0.0 + resized_imgs[scaleID].cols - initSize.width) / dx); i < imax; i++) + { + for (int j = 0, jmax = cvFloor((0.0 + resized_imgs[scaleID].rows - initSize.height) / dy); j < jmax; j++) + { + //Optimized variance calculation + int x = dx * i, + y = dy * j, + width = initSize.width, + height = initSize.height; + double p = 0, p2 = 0; + double A, B, C, D; + + A = intImgP(y, x); + B = intImgP(y, x + width); + C = intImgP(y + height, x); + D = intImgP(y + height, x + width); + p = (A + D - B - C) / (width * height); + + A = intImgP2(y, x); + B = intImgP2(y, x + width); + C = intImgP2(y + height, x); + D = intImgP2(y + height, x + width); + p2 = (A + D - B - C) / (width * height); + double windowVar = p2 - p * p; + + //Loop for on all objects + for (int k = 0; k < (int)trackers.size(); k++) + { + //TLD Tracker data extraction + trackerPtr = trackers[k]; + tracker = static_cast(trackerPtr); + //TLD Model Extraction + tldModel = ((tld::TrackerTLDModel*)static_cast(tracker->getModel())); + + //Optimized variance calculation + bool varPass = (windowVar > tld::VARIANCE_THRESHOLD * *tldModel->detector->originalVariancePtr); + + if (!varPass) + continue; + varBuffer[k].push_back(Point(dx * i, dy * j)); + varScaleIDs[k].push_back(scaleID); + } + } + } + scaleID++; + size.width /= tld::SCALE_STEP; + size.height /= tld::SCALE_STEP; + scale *= tld::SCALE_STEP; + resize(img, tmp, size, 0, 0, tld::DOWNSCALE_MODE); + resized_imgs.push_back(tmp); + GaussianBlur(resized_imgs[scaleID], tmp, tld::GaussBlurKernelSize, 0.0f); + blurred_imgs.push_back(tmp); + } while (size.width >= initSize.width && size.height >= initSize.height); + + //Encsemble classification + for (int k = 0; k < (int)trackers.size(); k++) + { + //TLD Tracker data extraction + trackerPtr = trackers[k]; + tracker = static_cast(trackerPtr); + //TLD Model Extraction + tldModel = ((tld::TrackerTLDModel*)static_cast(tracker->getModel())); + + + for (int i = 0; i < (int)varBuffer[k].size(); i++) + { + tldModel->detector->prepareClassifiers(static_cast (blurred_imgs[varScaleIDs[k][i]].step[0])); + + double ensRes = 0; + uchar* data = &blurred_imgs[varScaleIDs[k][i]].at(varBuffer[k][i].y, varBuffer[k][i].x); + for (int x = 0; x < (int)tldModel->detector->classifiers.size(); x++) + { + int position = 0; + for (int n = 0; n < (int)tldModel->detector->classifiers[x].measurements.size(); n++) + { + position = position << 1; + if (data[tldModel->detector->classifiers[x].offset[n].x] < data[tldModel->detector->classifiers[x].offset[n].y]) + position++; + } + double posNum = (double)tldModel->detector->classifiers[x].posAndNeg[position].x; + double negNum = (double)tldModel->detector->classifiers[x].posAndNeg[position].y; + if (posNum == 0.0 && negNum == 0.0) + continue; + else + ensRes += posNum / (posNum + negNum); + } + ensRes /= tldModel->detector->classifiers.size(); + ensRes = tldModel->detector->ensembleClassifierNum(&blurred_imgs[varScaleIDs[k][i]].at(varBuffer[k][i].y, varBuffer[k][i].x)); + + if ( ensRes <= tld::ENSEMBLE_THRESHOLD) + continue; + ensBuffer[k].push_back(varBuffer[k][i]); + ensScaleIDs[k].push_back(varScaleIDs[k][i]); + } + } + + //NN classification + for (int k = 0; k < (int)trackers.size(); k++) + { + //TLD Tracker data extraction + trackerPtr = trackers[k]; + tracker = static_cast(trackerPtr); + //TLD Model Extraction + tldModel = ((tld::TrackerTLDModel*)static_cast(tracker->getModel())); + + npos = 0; + nneg = 0; + maxSc = -5.0; + + for (int i = 0; i < (int)ensBuffer[k].size(); i++) + { + tld::TLDDetector::LabeledPatch labPatch; + double curScale = pow(tld::SCALE_STEP, ensScaleIDs[k][i]); + labPatch.rect = Rect2d(ensBuffer[k][i].x*curScale, ensBuffer[k][i].y*curScale, initSize.width * curScale, initSize.height * curScale); + tld::resample(resized_imgs[ensScaleIDs[k][i]], Rect2d(ensBuffer[k][i], initSize), standardPatch); + + double srValue, scValue; + srValue = tldModel->detector->Sr(standardPatch); + + ////To fix: Check the paper, probably this cause wrong learning + // + labPatch.isObject = srValue > tld::THETA_NN; + labPatch.shouldBeIntegrated = abs(srValue - tld::THETA_NN) < 0.1; + patches[k].push_back(labPatch); + // + + if (!labPatch.isObject) + { + nneg++; + continue; + } + else + { + npos++; + } + scValue = tldModel->detector->Sc(standardPatch); + if (scValue > maxSc) + { + maxSc = scValue; + maxScRect = labPatch.rect; + } + } + + + + if (maxSc < 0) + detect_flgs[k] = false; + else + { + res[k] = maxScRect; + detect_flgs[k] = true; + } + } + } + + void ocl_detect_all(const Mat& img, const Mat& imgBlurred, std::vector& res, std::vector < std::vector < tld::TLDDetector::LabeledPatch > > &patches, std::vector &detect_flgs, + std::vector > &trackers) + { + //TLD Tracker data extraction + Tracker* trackerPtr = trackers[0]; + cv::tld::TrackerTLDImpl* tracker = static_cast(trackerPtr); + //TLD Model Extraction + tld::TrackerTLDModel* tldModel = ((tld::TrackerTLDModel*)static_cast(tracker->getModel())); + Size initSize = tldModel->getMinSize(); + + for (int k = 0; k < (int)trackers.size(); k++) + patches[k].clear(); + + Mat_ standardPatch(tld::STANDARD_PATCH_SIZE, tld::STANDARD_PATCH_SIZE); + Mat tmp; + int dx = initSize.width / 10, dy = initSize.height / 10; + Size2d size = img.size(); + double scale = 1.0; + int npos = 0, nneg = 0; + double maxSc = -5.0; + Rect2d maxScRect; + int scaleID; + std::vector resized_imgs, blurred_imgs; + + std::vector > varBuffer(trackers.size()), ensBuffer(trackers.size()); + std::vector > varScaleIDs(trackers.size()), ensScaleIDs(trackers.size()); + + std::vector tmpP; + std::vector tmpI; + + //Detection part + //Generate windows and filter by variance + scaleID = 0; + resized_imgs.push_back(img); + blurred_imgs.push_back(imgBlurred); + do + { + Mat_ intImgP, intImgP2; + tld::TLDDetector::computeIntegralImages(resized_imgs[scaleID], intImgP, intImgP2); + for (int i = 0, imax = cvFloor((0.0 + resized_imgs[scaleID].cols - initSize.width) / dx); i < imax; i++) + { + for (int j = 0, jmax = cvFloor((0.0 + resized_imgs[scaleID].rows - initSize.height) / dy); j < jmax; j++) + { + //Optimized variance calculation + int x = dx * i, + y = dy * j, + width = initSize.width, + height = initSize.height; + double p = 0, p2 = 0; + double A, B, C, D; + + A = intImgP(y, x); + B = intImgP(y, x + width); + C = intImgP(y + height, x); + D = intImgP(y + height, x + width); + p = (A + D - B - C) / (width * height); + + A = intImgP2(y, x); + B = intImgP2(y, x + width); + C = intImgP2(y + height, x); + D = intImgP2(y + height, x + width); + p2 = (A + D - B - C) / (width * height); + double windowVar = p2 - p * p; + + //Loop for on all objects + for (int k = 0; k < (int)trackers.size(); k++) + { + //TLD Tracker data extraction + trackerPtr = trackers[k]; + tracker = static_cast(trackerPtr); + //TLD Model Extraction + tldModel = ((tld::TrackerTLDModel*)static_cast(tracker->getModel())); + + //Optimized variance calculation + bool varPass = (windowVar > tld::VARIANCE_THRESHOLD * *tldModel->detector->originalVariancePtr); + + if (!varPass) + continue; + varBuffer[k].push_back(Point(dx * i, dy * j)); + varScaleIDs[k].push_back(scaleID); + } + } + } + scaleID++; + size.width /= tld::SCALE_STEP; + size.height /= tld::SCALE_STEP; + scale *= tld::SCALE_STEP; + resize(img, tmp, size, 0, 0, tld::DOWNSCALE_MODE); + resized_imgs.push_back(tmp); + GaussianBlur(resized_imgs[scaleID], tmp, tld::GaussBlurKernelSize, 0.0f); + blurred_imgs.push_back(tmp); + } while (size.width >= initSize.width && size.height >= initSize.height); + + //Encsemble classification + for (int k = 0; k < (int)trackers.size(); k++) + { + //TLD Tracker data extraction + trackerPtr = trackers[k]; + tracker = static_cast(trackerPtr); + //TLD Model Extraction + tldModel = ((tld::TrackerTLDModel*)static_cast(tracker->getModel())); + + + for (int i = 0; i < (int)varBuffer[k].size(); i++) + { + tldModel->detector->prepareClassifiers(static_cast (blurred_imgs[varScaleIDs[k][i]].step[0])); + + double ensRes = 0; + uchar* data = &blurred_imgs[varScaleIDs[k][i]].at(varBuffer[k][i].y, varBuffer[k][i].x); + for (int x = 0; x < (int)tldModel->detector->classifiers.size(); x++) + { + int position = 0; + for (int n = 0; n < (int)tldModel->detector->classifiers[x].measurements.size(); n++) + { + position = position << 1; + if (data[tldModel->detector->classifiers[x].offset[n].x] < data[tldModel->detector->classifiers[x].offset[n].y]) + position++; + } + double posNum = (double)tldModel->detector->classifiers[x].posAndNeg[position].x; + double negNum = (double)tldModel->detector->classifiers[x].posAndNeg[position].y; + if (posNum == 0.0 && negNum == 0.0) + continue; + else + ensRes += posNum / (posNum + negNum); + } + ensRes /= tldModel->detector->classifiers.size(); + ensRes = tldModel->detector->ensembleClassifierNum(&blurred_imgs[varScaleIDs[k][i]].at(varBuffer[k][i].y, varBuffer[k][i].x)); + + if (ensRes <= tld::ENSEMBLE_THRESHOLD) + continue; + ensBuffer[k].push_back(varBuffer[k][i]); + ensScaleIDs[k].push_back(varScaleIDs[k][i]); + } + } + + //NN classification + for (int k = 0; k < (int)trackers.size(); k++) + { + //TLD Tracker data extraction + trackerPtr = trackers[k]; + tracker = static_cast(trackerPtr); + //TLD Model Extraction + tldModel = ((tld::TrackerTLDModel*)static_cast(tracker->getModel())); + npos = 0; + nneg = 0; + maxSc = -5.0; + + //Prepare batch of patches + int numOfPatches = (int)ensBuffer[k].size(); + Mat_ stdPatches(numOfPatches, 225); + double *resultSr = new double[numOfPatches]; + double *resultSc = new double[numOfPatches]; + + uchar *patchesData = stdPatches.data; + for (int i = 0; i < (int)ensBuffer.size(); i++) + { + tld::resample(resized_imgs[ensScaleIDs[k][i]], Rect2d(ensBuffer[k][i], initSize), standardPatch); + uchar *stdPatchData = standardPatch.data; + for (int j = 0; j < 225; j++) + patchesData[225 * i + j] = stdPatchData[j]; + } + //Calculate Sr and Sc batches + tldModel->detector->ocl_batchSrSc(stdPatches, resultSr, resultSc, numOfPatches); + + for (int i = 0; i < (int)ensBuffer[k].size(); i++) + { + tld::TLDDetector::LabeledPatch labPatch; + standardPatch.data = &stdPatches.data[225 * i]; + double curScale = pow(tld::SCALE_STEP, ensScaleIDs[k][i]); + labPatch.rect = Rect2d(ensBuffer[k][i].x*curScale, ensBuffer[k][i].y*curScale, initSize.width * curScale, initSize.height * curScale); + tld::resample(resized_imgs[ensScaleIDs[k][i]], Rect2d(ensBuffer[k][i], initSize), standardPatch); + + double srValue, scValue; + srValue = resultSr[i]; + + ////To fix: Check the paper, probably this cause wrong learning + // + labPatch.isObject = srValue > tld::THETA_NN; + labPatch.shouldBeIntegrated = abs(srValue - tld::THETA_NN) < 0.1; + patches[k].push_back(labPatch); + // + + if (!labPatch.isObject) + { + nneg++; + continue; + } + else + { + npos++; + } + scValue = resultSc[i]; + if (scValue > maxSc) + { + maxSc = scValue; + maxScRect = labPatch.rect; + } + } + + + + if (maxSc < 0) + detect_flgs[k] = false; + else + { + res[k] = maxScRect; + detect_flgs[k] = true; + } + } + } + +} \ No newline at end of file diff --git a/modules/tracking/src/multiTracker.hpp b/modules/tracking/src/multiTracker.hpp new file mode 100644 index 000000000..9d363802c --- /dev/null +++ b/modules/tracking/src/multiTracker.hpp @@ -0,0 +1,57 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_MULTITRACKER +#define OPENCV_MULTITRACKER + +#include "precomp.hpp" +#include "tldTracker.hpp" +#include "tldUtils.hpp" +#include + +namespace cv +{ + void detect_all(const Mat& img, const Mat& imgBlurred, std::vector& res, std::vector < std::vector < tld::TLDDetector::LabeledPatch > > &patches, + std::vector& detect_flgs, std::vector >& trackers); + void ocl_detect_all(const Mat& img, const Mat& imgBlurred, std::vector& res, std::vector < std::vector < tld::TLDDetector::LabeledPatch > > &patches, + std::vector& detect_flgs, std::vector >& trackers); +} +#endif \ No newline at end of file diff --git a/modules/tracking/src/tldDataset.cpp b/modules/tracking/src/tldDataset.cpp index b93de2576..9ddbeed25 100644 --- a/modules/tracking/src/tldDataset.cpp +++ b/modules/tracking/src/tldDataset.cpp @@ -48,70 +48,108 @@ namespace cv char tldRootPath[100]; int frameNum = 0; bool flagPNG = false; + bool flagVOT = false; - cv::Rect2d tld_InitDataset(int datasetInd,const char* rootPath) + //TLD Dataset Parameters + const char* tldFolderName[10] = { + "01_david", + "02_jumping", + "03_pedestrian1", + "04_pedestrian2", + "05_pedestrian3", + "06_car", + "07_motocross", + "08_volkswagen", + "09_carchase", + "10_panda" + }; + const char* votFolderName[60] = { + "bag", "ball1", "ball2", "basketball", "birds1", "birds2", "blanket", "bmx", "bolt1", "bolt2", + "book", "butterfly", "car1", "car2", "crossing", "dinosaur", "fernando", "fish1", "fish2", "fish3", + "fish4", "girl", "glove", "godfather", "graduate", "gymnastics1", "gymnastics2 ", "gymnastics3", "gymnastics4", "hand", + "handball1", "handball2", "helicopter", "iceskater1", "iceskater2", "leaves", "marching", "matrix", "motocross1", "motocross2", + "nature", "octopus", "pedestrian1", "pedestrian2", "rabbit", "racing", "road", "shaking", "sheep", "singer1", + "singer2", "singer3", "soccer1", "soccer2", "soldier", "sphere", "tiger", "traffic", "tunnel", "wiper" + }; + + const Rect2d tldInitBB[10] = { + Rect2d(165, 93, 51, 54), Rect2d(147, 110, 33, 32), Rect2d(47, 51, 21, 36), Rect2d(130, 134, 21, 53), Rect2d(154, 102, 24, 52), + Rect2d(142, 125, 90, 39), Rect2d(290, 43, 23, 40), Rect2d(273, 77, 27, 25), Rect2d(337, 219, 54, 37), Rect2d(58, 100, 27, 22) + }; + const Rect2d votInitBB[60] = { + Rect2d(142, 125, 90, 39), Rect2d(490, 400, 40, 40), Rect2d(273, 77, 27, 25), Rect2d(145, 84, 54, 37), Rect2d(58, 100, 27, 22), + Rect2d(450, 380, 60, 60), Rect2d(290, 43, 23, 40), Rect2d(273, 77, 27, 25), Rect2d(225, 175, 50, 50), Rect2d(58, 100, 27, 22), + + Rect2d(142, 125, 90, 39), Rect2d(290, 43, 23, 40), Rect2d(273, 77, 27, 25), Rect2d(145, 84, 54, 37), Rect2d(560, 460, 50, 120), + Rect2d(142, 125, 90, 39), Rect2d(290, 43, 23, 40), Rect2d(273, 77, 27, 25), Rect2d(145, 84, 54, 37), Rect2d(58, 100, 27, 22), + + Rect2d(142, 125, 90, 39), Rect2d(290, 43, 23, 40), Rect2d(273, 77, 27, 25), Rect2d(145, 84, 54, 37), Rect2d(58, 100, 27, 22), + Rect2d(142, 125, 90, 39), Rect2d(290, 43, 23, 40), Rect2d(273, 77, 27, 25), Rect2d(145, 84, 54, 37), Rect2d(58, 100, 27, 22), + + Rect2d(142, 125, 90, 39), Rect2d(290, 43, 23, 40), Rect2d(273, 77, 27, 25), Rect2d(145, 84, 54, 37), Rect2d(58, 100, 27, 22), + Rect2d(142, 125, 90, 39), Rect2d(290, 43, 23, 40), Rect2d(273, 77, 27, 25), Rect2d(145, 84, 54, 37), Rect2d(58, 100, 27, 22), + + Rect2d(142, 125, 90, 39), Rect2d(290, 43, 23, 40), Rect2d(273, 77, 27, 25), Rect2d(145, 84, 54, 37), Rect2d(58, 100, 27, 22), + Rect2d(142, 125, 90, 39), Rect2d(290, 43, 23, 40), Rect2d(273, 77, 27, 25), Rect2d(145, 84, 54, 37), Rect2d(58, 100, 27, 22), + + Rect2d(142, 125, 90, 39), Rect2d(290, 43, 23, 40), Rect2d(273, 77, 27, 25), Rect2d(145, 84, 54, 37), Rect2d(58, 100, 27, 22), + Rect2d(142, 125, 90, 39), Rect2d(290, 43, 23, 40), Rect2d(273, 77, 27, 25), Rect2d(145, 84, 54, 37), Rect2d(58, 100, 27, 22), + }; + + int tldFrameOffset[10] = { 100, 1, 1, 1, 1, 1, 1, 1, 1, 1 }; + int votFrameOffset[60] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + }; + bool tldFlagPNG[10] = { 0, 0, 0, 0, 0, 0, 1, 0, 0, 0 }; + bool votFlagPNG[60] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + + cv::Rect2d tld_InitDataset(int videoInd, const char* rootPath, int datasetInd) { char* folderName = (char *)""; - int x = 0; - int y = 0; - int w = 0; - int h = 0; - flagPNG = false; - - frameNum = 1; - - if (datasetInd == 1) { - folderName = (char *)"01_david"; - x = 165, y = 83; - w = 51; h = 54; - frameNum = 100; - } - if (datasetInd == 2) { - folderName = (char *)"02_jumping"; - x = 147, y = 110; - w = 33; h = 32; - } - if (datasetInd == 3) { - folderName = (char *)"03_pedestrian1"; - x = 47, y = 51; - w = 21; h = 36; - } - if (datasetInd == 4) { - folderName = (char *)"04_pedestrian2"; - x = 130, y = 134; - w = 21; h = 53; - } - if (datasetInd == 5) { - folderName = (char *)"05_pedestrian3"; - x = 154, y = 102; - w = 24; h = 52; - } - if (datasetInd == 6) { - folderName = (char *)"06_car"; - x = 142, y = 125; - w = 90; h = 39; - } - if (datasetInd == 7) { - folderName = (char *)"07_motocross"; - x = 290, y = 43; - w = 23; h = 40; - flagPNG = true; - } - if (datasetInd == 8) { - folderName = (char *)"08_volkswagen"; - x = 273, y = 77; - w = 27; h = 25; - } - if (datasetInd == 9) { - folderName = (char *)"09_carchase"; - x = 145, y = 84; - w = 54; h = 37; - } - if (datasetInd == 10){ - folderName = (char *)"10_panda"; - x = 58, y = 100; - w = 27; h = 22; + double x = 0, + y = 0, + w = 0, + h = 0; + + //Index range + // 1-10 TLD Dataset + // 1-60 VOT 2015 Dataset + int id = videoInd - 1; + + if (datasetInd == 0) + { + folderName = (char*)tldFolderName[id]; + x = tldInitBB[id].x; + y = tldInitBB[id].y; + w = tldInitBB[id].width; + h = tldInitBB[id].height; + frameNum = tldFrameOffset[id]; + flagPNG = tldFlagPNG[id]; + flagVOT = false; } + if (datasetInd == 1) + { + folderName = (char*)votFolderName[id]; + x = votInitBB[id].x; + y = votInitBB[id].y; + w = votInitBB[id].width; + h = votInitBB[id].height; + frameNum = votFrameOffset[id]; + flagPNG = votFlagPNG[id]; + flagVOT = true; + } strcpy(tldRootPath, rootPath); strcat(tldRootPath, "\\"); @@ -127,6 +165,8 @@ namespace cv char numStr[10]; strcpy(fullPath, tldRootPath); strcat(fullPath, "\\"); + if (flagVOT) + strcat(fullPath, "000"); if (frameNum < 10) strcat(fullPath, "0000"); else if (frameNum < 100) strcat(fullPath, "000"); else if (frameNum < 1000) strcat(fullPath, "00"); diff --git a/modules/tracking/src/tldDetector.cpp b/modules/tracking/src/tldDetector.cpp index cbb783ece..ba9f03d9f 100644 --- a/modules/tracking/src/tldDetector.cpp +++ b/modules/tracking/src/tldDetector.cpp @@ -65,25 +65,6 @@ namespace cv // Calculate Relative similarity of the patch (NN-Model) double TLDDetector::Sr(const Mat_& patch) { - /* - int64 e1, e2; - float t; - e1 = getTickCount(); - double splus = 0.0, sminus = 0.0; - for (int i = 0; i < (int)(*positiveExamples).size(); i++) - splus = std::max(splus, 0.5 * (NCC((*positiveExamples)[i], patch) + 1.0)); - for (int i = 0; i < (int)(*negativeExamples).size(); i++) - sminus = std::max(sminus, 0.5 * (NCC((*negativeExamples)[i], patch) + 1.0)); - e2 = getTickCount(); - t = (e2 - e1) / getTickFrequency()*1000.0; - printf("Sr: %f\n", t); - if (splus + sminus == 0.0) - return 0.0; - return splus / (sminus + splus); - */ - //int64 e1, e2; - //float t; - //e1 = getTickCount(); double splus = 0.0, sminus = 0.0; Mat_ modelSample(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE); for (int i = 0; i < *posNum; i++) @@ -96,9 +77,7 @@ namespace cv modelSample.data = &(negExp->data[i * 225]); sminus = std::max(sminus, 0.5 * (NCC(modelSample, patch) + 1.0)); } - //e2 = getTickCount(); - //t = (e2 - e1) / getTickFrequency()*1000.0; - //printf("Sr CPU: %f\n", t); + if (splus + sminus == 0.0) return 0.0; return splus / (sminus + splus); @@ -106,10 +85,6 @@ namespace cv double TLDDetector::ocl_Sr(const Mat_& patch) { - //int64 e1, e2, e3, e4; - //double t; - //e1 = getTickCount(); - //e3 = getTickCount(); double splus = 0.0, sminus = 0.0; @@ -131,41 +106,15 @@ namespace cv ocl::KernelArg::PtrReadOnly(devPositiveSamples), ocl::KernelArg::PtrReadOnly(devNegativeSamples), ocl::KernelArg::PtrWriteOnly(devNCC), - posNum, - negNum); - - //e4 = getTickCount(); - //t = (e4 - e3) / getTickFrequency()*1000.0; - //printf("Mem Cpy GPU: %f\n", t); + *posNum, + *negNum); size_t globSize = 1000; - //e3 = getTickCount(); + if (!k.run(1, &globSize, NULL, false)) printf("Kernel Run Error!!!"); - //e4 = getTickCount(); - //t = (e4 - e3) / getTickFrequency()*1000.0; - //printf("Kernel Run GPU: %f\n", t); - //e3 = getTickCount(); Mat resNCC = devNCC.getMat(ACCESS_READ); - //e4 = getTickCount(); - //t = (e4 - e3) / getTickFrequency()*1000.0; - //printf("Read Mem GPU: %f\n", t); - - ////Compare - //Mat_ modelSample(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE); - //for (int i = 0; i < 200; i+=17) - //{ - // modelSample.data = &(posExp->data[i * 225]); - // printf("%f\t%f\n\n", resNCC.at(i), NCC(modelSample, patch)); - //} - - //for (int i = 0; i < 200; i+=23) - //{ - // modelSample.data = &(negExp->data[i * 225]); - // printf("%f\t%f\n", resNCC.at(500+i), NCC(modelSample, patch)); - //} - for (int i = 0; i < *posNum; i++) splus = std::max(splus, 0.5 * (resNCC.at(i) + 1.0)); @@ -173,10 +122,6 @@ namespace cv for (int i = 0; i < *negNum; i++) sminus = std::max(sminus, 0.5 * (resNCC.at(i+500) +1.0)); - //e2 = getTickCount(); - //t = (e2 - e1) / getTickFrequency()*1000.0; - //printf("Sr GPU: %f\n\n", t); - if (splus + sminus == 0.0) return 0.0; return splus / (sminus + splus); @@ -184,11 +129,6 @@ namespace cv void TLDDetector::ocl_batchSrSc(const Mat_& patches, double *resultSr, double *resultSc, int numOfPatches) { - //int64 e1, e2, e3, e4; - //double t; - //e1 = getTickCount(); - //e3 = getTickCount(); - UMat devPatches = patches.getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY); UMat devPositiveSamples = posExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY); UMat devNegativeSamples = negExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY); @@ -208,29 +148,17 @@ namespace cv ocl::KernelArg::PtrReadOnly(devNegativeSamples), ocl::KernelArg::PtrWriteOnly(devPosNCC), ocl::KernelArg::PtrWriteOnly(devNegNCC), - posNum, - negNum, + *posNum, + *negNum, numOfPatches); - //e4 = getTickCount(); - //t = (e4 - e3) / getTickFrequency()*1000.0; - //printf("Mem Cpy GPU: %f\n", t); - - // 2 -> Pos&Neg size_t globSize = 2 * numOfPatches*MAX_EXAMPLES_IN_MODEL; - //e3 = getTickCount(); - if (!k.run(1, &globSize, NULL, false)) + + if (!k.run(1, &globSize, NULL, true)) printf("Kernel Run Error!!!"); - //e4 = getTickCount(); - //t = (e4 - e3) / getTickFrequency()*1000.0; - //printf("Kernel Run GPU: %f\n", t); - //e3 = getTickCount(); Mat posNCC = devPosNCC.getMat(ACCESS_READ); Mat negNCC = devNegNCC.getMat(ACCESS_READ); - //e4 = getTickCount(); - //t = (e4 - e3) / getTickFrequency()*1000.0; - //printf("Read Mem GPU: %f\n", t); //Calculate Srs for (int id = 0; id < numOfPatches; id++) @@ -256,62 +184,11 @@ namespace cv else resultSc[id] = spc / (smc + spc); } - - ////Compare positive NCCs - /*Mat_ modelSample(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE); - Mat_ patch(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE); - for (int j = 0; j < numOfPatches; j++) - { - for (int i = 0; i < 1; i++) - { - modelSample.data = &(posExp->data[i * 225]); - patch.data = &(patches.data[j * 225]); - printf("%f\t%f\n", resultSr[j], Sr(patch)); - printf("%f\t%f\n", resultSc[j], Sc(patch)); - } - }*/ - - //for (int i = 0; i < 200; i+=23) - //{ - // modelSample.data = &(negExp->data[i * 225]); - // printf("%f\t%f\n", resNCC.at(500+i), NCC(modelSample, patch)); - //} - - - - //e2 = getTickCount(); - //t = (e2 - e1) / getTickFrequency()*1000.0; - //printf("Sr GPU: %f\n\n", t); } // Calculate Conservative similarity of the patch (NN-Model) double TLDDetector::Sc(const Mat_& patch) { - /* - int64 e1, e2; - float t; - e1 = getTickCount(); - double splus = 0.0, sminus = 0.0; - int med = getMedian((*timeStampsPositive)); - for (int i = 0; i < (int)(*positiveExamples).size(); i++) - { - if ((int)(*timeStampsPositive)[i] <= med) - splus = std::max(splus, 0.5 * (NCC((*positiveExamples)[i], patch) + 1.0)); - } - for (int i = 0; i < (int)(*negativeExamples).size(); i++) - sminus = std::max(sminus, 0.5 * (NCC((*negativeExamples)[i], patch) + 1.0)); - e2 = getTickCount(); - t = (e2 - e1) / getTickFrequency()*1000.0; - printf("Sc: %f\n", t); - if (splus + sminus == 0.0) - return 0.0; - - return splus / (sminus + splus); - */ - - //int64 e1, e2; - //double t; - //e1 = getTickCount(); double splus = 0.0, sminus = 0.0; Mat_ modelSample(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE); int med = getMedian((*timeStampsPositive)); @@ -328,9 +205,7 @@ namespace cv modelSample.data = &(negExp->data[i * 225]); sminus = std::max(sminus, 0.5 * (NCC(modelSample, patch) + 1.0)); } - //e2 = getTickCount(); - //t = (e2 - e1) / getTickFrequency()*1000.0; - //printf("Sc: %f\n", t); + if (splus + sminus == 0.0) return 0.0; @@ -339,13 +214,8 @@ namespace cv double TLDDetector::ocl_Sc(const Mat_& patch) { - //int64 e1, e2, e3, e4; - //float t; - //e1 = getTickCount(); double splus = 0.0, sminus = 0.0; - //e3 = getTickCount(); - UMat devPatch = patch.getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY); UMat devPositiveSamples = posExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY); UMat devNegativeSamples = negExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY); @@ -364,40 +234,15 @@ namespace cv ocl::KernelArg::PtrReadOnly(devPositiveSamples), ocl::KernelArg::PtrReadOnly(devNegativeSamples), ocl::KernelArg::PtrWriteOnly(devNCC), - posNum, - negNum); - - //e4 = getTickCount(); - //t = (e4 - e3) / getTickFrequency()*1000.0; - //printf("Mem Cpy GPU: %f\n", t); + *posNum, + *negNum); size_t globSize = 1000; - //e3 = getTickCount(); + if (!k.run(1, &globSize, NULL, false)) printf("Kernel Run Error!!!"); - //e4 = getTickCount(); - //t = (e4 - e3) / getTickFrequency()*1000.0; - //printf("Kernel Run GPU: %f\n", t); - //e3 = getTickCount(); Mat resNCC = devNCC.getMat(ACCESS_READ); - //e4 = getTickCount(); - //t = (e4 - e3) / getTickFrequency()*1000.0; - //printf("Read Mem GPU: %f\n", t); - - ////Compare - //Mat_ modelSample(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE); - //for (int i = 0; i < 200; i+=17) - //{ - // modelSample.data = &(posExp->data[i * 225]); - // printf("%f\t%f\n\n", resNCC.at(i), NCC(modelSample, patch)); - //} - - //for (int i = 0; i < 200; i+=23) - //{ - // modelSample.data = &(negExp->data[i * 225]); - // printf("%f\t%f\n", resNCC.at(500+i), NCC(modelSample, patch)); - //} int med = getMedian((*timeStampsPositive)); for (int i = 0; i < *posNum; i++) @@ -407,10 +252,6 @@ namespace cv for (int i = 0; i < *negNum; i++) sminus = std::max(sminus, 0.5 * (resNCC.at(i + 500) + 1.0)); - //e2 = getTickCount(); - //t = (e2 - e1) / getTickFrequency()*1000.0; - //printf("Sc GPU: %f\n\n", t); - if (splus + sminus == 0.0) return 0.0; return splus / (sminus + splus); @@ -449,7 +290,6 @@ namespace cv break; } } - //dprintf(("%d rects in res\n", (int)res.size())); } //Detection - returns most probable new target location (Max Sc) @@ -469,10 +309,6 @@ namespace cv std::vector resized_imgs, blurred_imgs; std::vector varBuffer, ensBuffer; std::vector varScaleIDs, ensScaleIDs; - //int64 e1, e2; - //double t; - - //e1 = getTickCount(); //Detection part //Generate windows and filter by variance @@ -502,12 +338,8 @@ namespace cv GaussianBlur(resized_imgs[scaleID], tmp, GaussBlurKernelSize, 0.0f); blurred_imgs.push_back(tmp); } while (size.width >= initSize.width && size.height >= initSize.height); - //e2 = getTickCount(); - //t = (e2 - e1) / getTickFrequency()*1000.0; - //printf("Variance: %d\t%f\n", varBuffer.size(), t); //Encsemble classification - //e1 = getTickCount(); for (int i = 0; i < (int)varBuffer.size(); i++) { prepareClassifiers(static_cast (blurred_imgs[varScaleIDs[i]].step[0])); @@ -516,12 +348,8 @@ namespace cv ensBuffer.push_back(varBuffer[i]); ensScaleIDs.push_back(varScaleIDs[i]); } - //e2 = getTickCount(); - //t = (e2 - e1) / getTickFrequency()*1000.0; - //printf("Ensemble: %d\t%f\n", ensBuffer.size(), t); //NN classification - //e1 = getTickCount(); for (int i = 0; i < (int)ensBuffer.size(); i++) { LabeledPatch labPatch; @@ -555,14 +383,14 @@ namespace cv maxScRect = labPatch.rect; } } - //e2 = getTickCount(); - //t = (e2 - e1) / getTickFrequency()*1000.0; - //printf("NN: %d\t%f\n", patches.size(), t); if (maxSc < 0) return false; - res = maxScRect; - return true; + else + { + res = maxScRect; + return true; + } } bool TLDDetector::ocl_detect(const Mat& img, const Mat& imgBlurred, Rect2d& res, std::vector& patches, Size initSize) @@ -580,10 +408,7 @@ namespace cv std::vector resized_imgs, blurred_imgs; std::vector varBuffer, ensBuffer; std::vector varScaleIDs, ensScaleIDs; - //int64 e1, e2; - //double t; - //e1 = getTickCount(); //Detection part //Generate windows and filter by variance scaleID = 0; @@ -612,12 +437,8 @@ namespace cv GaussianBlur(resized_imgs[scaleID], tmp, GaussBlurKernelSize, 0.0f); blurred_imgs.push_back(tmp); } while (size.width >= initSize.width && size.height >= initSize.height); - //e2 = getTickCount(); - //t = (e2 - e1) / getTickFrequency()*1000.0; - //printf("Variance: %d\t%f\n", varBuffer.size(), t); //Encsemble classification - //e1 = getTickCount(); for (int i = 0; i < (int)varBuffer.size(); i++) { prepareClassifiers((int)blurred_imgs[varScaleIDs[i]].step[0]); @@ -626,12 +447,8 @@ namespace cv ensBuffer.push_back(varBuffer[i]); ensScaleIDs.push_back(varScaleIDs[i]); } - //e2 = getTickCount(); - //t = (e2 - e1) / getTickFrequency()*1000.0; - //printf("Ensemble: %d\t%f\n", ensBuffer.size(), t); //NN classification - //e1 = getTickCount(); //Prepare batch of patches int numOfPatches = (int)ensBuffer.size(); Mat_ stdPatches(numOfPatches, 225); @@ -661,9 +478,6 @@ namespace cv srValue = resultSr[i]; - //srValue = Sr(standardPatch); - //printf("%f\t%f\t\n", srValue, resultSr[i]); - ////To fix: Check the paper, probably this cause wrong learning // labPatch.isObject = srValue > THETA_NN; @@ -687,9 +501,6 @@ namespace cv maxScRect = labPatch.rect; } } - //e2 = getTickCount(); - //t = (e2 - e1) / getTickFrequency()*1000.0; - //printf("NN: %d\t%f\n", patches.size(), t); if (maxSc < 0) return false; diff --git a/modules/tracking/src/tldDetector.hpp b/modules/tracking/src/tldDetector.hpp index 3e1189592..7f5a3f9e5 100644 --- a/modules/tracking/src/tldDetector.hpp +++ b/modules/tracking/src/tldDetector.hpp @@ -66,13 +66,15 @@ namespace cv static const cv::Size GaussBlurKernelSize(3, 3); + + class TLDDetector { public: TLDDetector(){} ~TLDDetector(){} - inline double ensembleClassifierNum(const uchar* data); - inline void prepareClassifiers(int rowstep); + double ensembleClassifierNum(const uchar* data); + void prepareClassifiers(int rowstep); double Sr(const Mat_& patch); double ocl_Sr(const Mat_& patch); double Sc(const Mat_& patch); @@ -93,15 +95,14 @@ namespace cv bool isObject, shouldBeIntegrated; }; bool detect(const Mat& img, const Mat& imgBlurred, Rect2d& res, std::vector& patches, Size initSize); - bool ocl_detect(const Mat& img, const Mat& imgBlurred, Rect2d& res, std::vector& patches, Size initSize); - protected: - - + bool ocl_detect(const Mat& img, const Mat& imgBlurred, Rect2d& res, std::vector& patches, Size initSize); friend class MyMouseCallbackDEBUG; - void computeIntegralImages(const Mat& img, Mat_& intImgP, Mat_& intImgP2){ integral(img, intImgP, intImgP2, CV_64F); } - inline bool patchVariance(Mat_& intImgP, Mat_& intImgP2, double *originalVariance, Point pt, Size size); + static void computeIntegralImages(const Mat& img, Mat_& intImgP, Mat_& intImgP2){ integral(img, intImgP, intImgP2, CV_64F); } + static inline bool patchVariance(Mat_& intImgP, Mat_& intImgP2, double *originalVariance, Point pt, Size size); }; + + } } diff --git a/modules/tracking/src/tldEnsembleClassifier.hpp b/modules/tracking/src/tldEnsembleClassifier.hpp index 97221372d..f0ec175ba 100644 --- a/modules/tracking/src/tldEnsembleClassifier.hpp +++ b/modules/tracking/src/tldEnsembleClassifier.hpp @@ -54,7 +54,7 @@ namespace cv double posteriorProbability(const uchar* data, int rowstep) const; double posteriorProbabilityFast(const uchar* data) const; void prepareClassifier(int rowstep); - private: + TLDEnsembleClassifier(const std::vector& meas, int beg, int end); static void stepPrefSuff(std::vector & arr, int pos, int len, int gridSize); int code(const uchar* data, int rowstep) const; diff --git a/modules/tracking/src/tldModel.cpp b/modules/tracking/src/tldModel.cpp index 085402265..9415c2a99 100644 --- a/modules/tracking/src/tldModel.cpp +++ b/modules/tracking/src/tldModel.cpp @@ -140,7 +140,6 @@ namespace cv detector->classifiers[k].integrate(blurredPatch, false); } } - //dprintf(("positive patches: %d\nnegative patches: %d\n", (int)positiveExamples.size(), (int)negativeExamples.size())); } @@ -180,16 +179,6 @@ namespace cv detector->classifiers[i].integrate(blurredPatch, patches[k].isObject); } } - /* - if( negativeIntoModel > 0 ) - dfprintf((stdout, "negativeIntoModel = %d ", negativeIntoModel)); - if( positiveIntoModel > 0) - dfprintf((stdout, "positiveIntoModel = %d ", positiveIntoModel)); - if( negativeIntoEnsemble > 0 ) - dfprintf((stdout, "negativeIntoEnsemble = %d ", negativeIntoEnsemble)); - if( positiveIntoEnsemble > 0 ) - dfprintf((stdout, "positiveIntoEnsemble = %d ", positiveIntoEnsemble)); - dfprintf((stdout, "\n"));*/ } @@ -198,9 +187,6 @@ namespace cv int positiveIntoModel = 0, negativeIntoModel = 0, positiveIntoEnsemble = 0, negativeIntoEnsemble = 0; if ((int)eForModel.size() == 0) return; - //int64 e1, e2; - //double t; - //e1 = getTickCount(); for (int k = 0; k < (int)eForModel.size(); k++) { double sr = detector->Sr(eForModel[k]); @@ -231,19 +217,6 @@ namespace cv detector->classifiers[i].integrate(eForEnsemble[k], isPositive); } } - //e2 = getTickCount(); - //t = (e2 - e1) / getTickFrequency() * 1000; - //printf("Integrate Additional: %fms\n", t); - /* - if( negativeIntoModel > 0 ) - dfprintf((stdout, "negativeIntoModel = %d ", negativeIntoModel)); - if( positiveIntoModel > 0 ) - dfprintf((stdout, "positiveIntoModel = %d ", positiveIntoModel)); - if( negativeIntoEnsemble > 0 ) - dfprintf((stdout, "negativeIntoEnsemble = %d ", negativeIntoEnsemble)); - if( positiveIntoEnsemble > 0 ) - dfprintf((stdout, "positiveIntoEnsemble = %d ", positiveIntoEnsemble)); - dfprintf((stdout, "\n"));*/ } void TrackerTLDModel::ocl_integrateAdditional(const std::vector >& eForModel, const std::vector >& eForEnsemble, bool isPositive) @@ -251,10 +224,6 @@ namespace cv int positiveIntoModel = 0, negativeIntoModel = 0, positiveIntoEnsemble = 0, negativeIntoEnsemble = 0; if ((int)eForModel.size() == 0) return; - //int64 e1, e2; - //double t; - //e1 = getTickCount(); - //Prepare batch of patches int numOfPatches = (int)eForModel.size(); Mat_ stdPatches(numOfPatches, 225); @@ -301,19 +270,6 @@ namespace cv detector->classifiers[i].integrate(eForEnsemble[k], isPositive); } } - //e2 = getTickCount(); - //t = (e2 - e1) / getTickFrequency() * 1000; - //printf("Integrate Additional OCL: %fms\n", t); - /* - if( negativeIntoModel > 0 ) - dfprintf((stdout, "negativeIntoModel = %d ", negativeIntoModel)); - if( positiveIntoModel > 0 ) - dfprintf((stdout, "positiveIntoModel = %d ", positiveIntoModel)); - if( negativeIntoEnsemble > 0 ) - dfprintf((stdout, "negativeIntoEnsemble = %d ", negativeIntoEnsemble)); - if( positiveIntoEnsemble > 0 ) - dfprintf((stdout, "positiveIntoEnsemble = %d ", positiveIntoEnsemble)); - dfprintf((stdout, "\n"));*/ } //Push the patch to the model diff --git a/modules/tracking/src/tldTracker.cpp b/modules/tracking/src/tldTracker.cpp index 0f1e2f78e..a1334a91c 100644 --- a/modules/tracking/src/tldTracker.cpp +++ b/modules/tracking/src/tldTracker.cpp @@ -45,6 +45,13 @@ namespace cv { + TrackerTLD::Params::Params(){} + + void TrackerTLD::Params::read(const cv::FileNode& /*fn*/){} + + void TrackerTLD::Params::write(cv::FileStorage& /*fs*/) const {} + + Ptr TrackerTLD::createTracker(const TrackerTLD::Params ¶meters) { return Ptr(new tld::TrackerTLDImpl(parameters)); @@ -112,7 +119,6 @@ bool TrackerTLDImpl::updateImpl(const Mat& image, Rect2d& boundingBox) Mat_ standardPatch(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE); std::vector detectorResults; //best overlap around 92% - std::vector candidates; std::vector candidatesRes; bool trackerNeedsReInit = false; @@ -128,7 +134,6 @@ bool TrackerTLDImpl::updateImpl(const Mat& image, Rect2d& boundingBox) else DETECT_FLG = tldModel->detector->detect(imageForDetector, image_blurred, tmpCandid, detectorResults, tldModel->getMinSize()); } - if( ( (i == 0) && !data->failedLastTime && trackerProxy->update(image, tmpCandid) ) || ( DETECT_FLG)) { candidates.push_back(tmpCandid); @@ -144,15 +149,8 @@ bool TrackerTLDImpl::updateImpl(const Mat& image, Rect2d& boundingBox) trackerNeedsReInit = true; } } - std::vector::iterator it = std::max_element(candidatesRes.begin(), candidatesRes.end()); - //dfprintf((stdout, "scale = %f\n", log(1.0 * boundingBox.width / (data->getMinSize()).width) / log(SCALE_STEP))); - //for( int i = 0; i < (int)candidatesRes.size(); i++ ) - //dprintf(("\tcandidatesRes[%d] = %f\n", i, candidatesRes[i])); - //data->printme(); - //tldModel->printme(stdout); - if( it == candidatesRes.end() ) { data->confident = false; @@ -169,16 +167,7 @@ bool TrackerTLDImpl::updateImpl(const Mat& image, Rect2d& boundingBox) #if 1 if( it != candidatesRes.end() ) - { resample(imageForDetector, candidates[it - candidatesRes.begin()], standardPatch); - //dfprintf((stderr, "%d %f %f\n", data->frameNum, tldModel->Sc(standardPatch), tldModel->Sr(standardPatch))); - //if( candidatesRes.size() == 2 && it == (candidatesRes.begin() + 1) ) - //dfprintf((stderr, "detector WON\n")); - } - else - { - //dfprintf((stderr, "%d x x\n", data->frameNum)); - } #endif if( *it > CORE_THRESHOLD ) @@ -209,7 +198,6 @@ bool TrackerTLDImpl::updateImpl(const Mat& image, Rect2d& boundingBox) detectorResults[i].isObject = expertResult; } tldModel->integrateRelabeled(imageForDetector, image_blurred, detectorResults); - //dprintf(("%d relabeled by nExpert\n", negRelabeled)); pExpert.additionalExamples(examplesForModel, examplesForEnsemble); if (ocl::haveOpenCL()) tldModel->ocl_integrateAdditional(examplesForModel, examplesForEnsemble, true); @@ -296,7 +284,6 @@ Data::Data(Rect2d initBox) minSize.width = (int)(initBox.width * 20.0 / minDim); minSize.height = (int)(initBox.height * 20.0 / minDim); frameNum = 0; - //dprintf(("minSize = %dx%d\n", minSize.width, minSize.height)); } void Data::printme(FILE* port) diff --git a/modules/tracking/src/tldTracker.hpp b/modules/tracking/src/tldTracker.hpp index e04926d5d..4a5610de3 100644 --- a/modules/tracking/src/tldTracker.hpp +++ b/modules/tracking/src/tldTracker.hpp @@ -52,12 +52,6 @@ namespace cv { -TrackerTLD::Params::Params(){} - -void TrackerTLD::Params::read(const cv::FileNode& /*fn*/){} - -void TrackerTLD::Params::write(cv::FileStorage& /*fs*/) const {} - namespace tld { class TrackerProxy @@ -128,7 +122,6 @@ public: void read(const FileNode& fn); void write(FileStorage& fs) const; -protected: class Pexpert { public: