You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
537 lines
17 KiB
537 lines
17 KiB
/*M/////////////////////////////////////////////////////////////////////////////////////// |
|
// |
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
|
// |
|
// By downloading, copying, installing or using the software you agree to this license. |
|
// If you do not agree to this license, do not download, install, |
|
// copy or use the software. |
|
// |
|
// |
|
// License Agreement |
|
// For Open Source Computer Vision Library |
|
// |
|
// Copyright (C) 2013, OpenCV Foundation, all rights reserved. |
|
// Third party copyrights are property of their respective owners. |
|
// |
|
// Redistribution and use in source and binary forms, with or without modification, |
|
// are permitted provided that the following conditions are met: |
|
// |
|
// * Redistribution's of source code must retain the above copyright notice, |
|
// this list of conditions and the following disclaimer. |
|
// |
|
// * Redistribution's in binary form must reproduce the above copyright notice, |
|
// this list of conditions and the following disclaimer in the documentation |
|
// and/or other materials provided with the distribution. |
|
// |
|
// * The name of the copyright holders may not be used to endorse or promote products |
|
// derived from this software without specific prior written permission. |
|
// |
|
// This software is provided by the copyright holders and contributors "as is" and |
|
// any express or implied warranties, including, but not limited to, the implied |
|
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
|
// In no event shall the Intel Corporation or contributors be liable for any direct, |
|
// indirect, incidental, special, exemplary, or consequential damages |
|
// (including, but not limited to, procurement of substitute goods or services; |
|
// loss of use, data, or profits; or business interruption) however caused |
|
// and on any theory of liability, whether in contract, strict liability, |
|
// or tort (including negligence or otherwise) arising in any way out of |
|
// the use of this software, even if advised of the possibility of such damage. |
|
// |
|
//M*/ |
|
|
|
#include "tldDetector.hpp" |
|
|
|
namespace cv |
|
{ |
|
namespace tld |
|
{ |
|
// Calculate offsets for classifiers |
|
void TLDDetector::prepareClassifiers(int rowstep) |
|
{ |
|
for (int i = 0; i < (int)classifiers.size(); i++) |
|
classifiers[i].prepareClassifier(rowstep); |
|
} |
|
|
|
// Calculate posterior probability, that the patch belongs to the current EC model |
|
double TLDDetector::ensembleClassifierNum(const uchar* data) |
|
{ |
|
double p = 0; |
|
for (int k = 0; k < (int)classifiers.size(); k++) |
|
p += classifiers[k].posteriorProbabilityFast(data); |
|
p /= classifiers.size(); |
|
return p; |
|
} |
|
|
|
// Calculate Relative similarity of the patch (NN-Model) |
|
double TLDDetector::Sr(const Mat_<uchar>& patch) |
|
{ |
|
double splus = 0.0, sminus = 0.0; |
|
Mat_<uchar> modelSample(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE); |
|
for (int i = 0; i < *posNum; i++) |
|
{ |
|
modelSample.data = &(posExp->data[i * 225]); |
|
splus = std::max(splus, 0.5 * (NCC(modelSample, patch) + 1.0)); |
|
} |
|
for (int i = 0; i < *negNum; i++) |
|
{ |
|
modelSample.data = &(negExp->data[i * 225]); |
|
sminus = std::max(sminus, 0.5 * (NCC(modelSample, patch) + 1.0)); |
|
} |
|
|
|
if (splus + sminus == 0.0) |
|
return 0.0; |
|
return splus / (sminus + splus); |
|
} |
|
|
|
double TLDDetector::ocl_Sr(const Mat_<uchar>& patch) |
|
{ |
|
double splus = 0.0, sminus = 0.0; |
|
|
|
|
|
UMat devPatch = patch.getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY); |
|
UMat devPositiveSamples = posExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY); |
|
UMat devNegativeSamples = negExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY); |
|
UMat devNCC(1, 2*MAX_EXAMPLES_IN_MODEL, CV_32FC1, ACCESS_RW, USAGE_ALLOCATE_DEVICE_MEMORY); |
|
|
|
|
|
ocl::Kernel k; |
|
ocl::ProgramSource src = ocl::tracking::tldDetector_oclsrc; |
|
String error; |
|
ocl::Program prog(src, NULL, error); |
|
k.create("NCC", prog); |
|
if (k.empty()) |
|
printf("Kernel create failed!!!\n"); |
|
k.args( |
|
ocl::KernelArg::PtrReadOnly(devPatch), |
|
ocl::KernelArg::PtrReadOnly(devPositiveSamples), |
|
ocl::KernelArg::PtrReadOnly(devNegativeSamples), |
|
ocl::KernelArg::PtrWriteOnly(devNCC), |
|
*posNum, |
|
*negNum); |
|
|
|
size_t globSize = 1000; |
|
|
|
if (!k.run(1, &globSize, NULL, false)) |
|
printf("Kernel Run Error!!!"); |
|
|
|
Mat resNCC = devNCC.getMat(ACCESS_READ); |
|
|
|
for (int i = 0; i < *posNum; i++) |
|
splus = std::max(splus, 0.5 * (resNCC.at<float>(i) + 1.0)); |
|
|
|
for (int i = 0; i < *negNum; i++) |
|
sminus = std::max(sminus, 0.5 * (resNCC.at<float>(i+500) +1.0)); |
|
|
|
if (splus + sminus == 0.0) |
|
return 0.0; |
|
return splus / (sminus + splus); |
|
} |
|
|
|
void TLDDetector::ocl_batchSrSc(const Mat_<uchar>& patches, double *resultSr, double *resultSc, int numOfPatches) |
|
{ |
|
UMat devPatches = patches.getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY); |
|
UMat devPositiveSamples = posExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY); |
|
UMat devNegativeSamples = negExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY); |
|
UMat devPosNCC(MAX_EXAMPLES_IN_MODEL, numOfPatches, CV_32FC1, ACCESS_RW, USAGE_ALLOCATE_DEVICE_MEMORY); |
|
UMat devNegNCC(MAX_EXAMPLES_IN_MODEL, numOfPatches, CV_32FC1, ACCESS_RW, USAGE_ALLOCATE_DEVICE_MEMORY); |
|
|
|
ocl::Kernel k; |
|
ocl::ProgramSource src = ocl::tracking::tldDetector_oclsrc; |
|
String error; |
|
ocl::Program prog(src, NULL, error); |
|
k.create("batchNCC", prog); |
|
if (k.empty()) |
|
printf("Kernel create failed!!!\n"); |
|
k.args( |
|
ocl::KernelArg::PtrReadOnly(devPatches), |
|
ocl::KernelArg::PtrReadOnly(devPositiveSamples), |
|
ocl::KernelArg::PtrReadOnly(devNegativeSamples), |
|
ocl::KernelArg::PtrWriteOnly(devPosNCC), |
|
ocl::KernelArg::PtrWriteOnly(devNegNCC), |
|
*posNum, |
|
*negNum, |
|
numOfPatches); |
|
|
|
size_t globSize = 2 * numOfPatches*MAX_EXAMPLES_IN_MODEL; |
|
|
|
if (!k.run(1, &globSize, NULL, true)) |
|
printf("Kernel Run Error!!!"); |
|
|
|
Mat posNCC = devPosNCC.getMat(ACCESS_READ); |
|
Mat negNCC = devNegNCC.getMat(ACCESS_READ); |
|
|
|
//Calculate Srs |
|
for (int id = 0; id < numOfPatches; id++) |
|
{ |
|
double spr = 0.0, smr = 0.0, spc = 0.0, smc = 0; |
|
int med = getMedian((*timeStampsPositive)); |
|
for (int i = 0; i < *posNum; i++) |
|
{ |
|
spr = std::max(spr, 0.5 * (posNCC.at<float>(id * 500 + i) + 1.0)); |
|
if ((int)(*timeStampsPositive)[i] <= med) |
|
spc = std::max(spr, 0.5 * (posNCC.at<float>(id * 500 + i) + 1.0)); |
|
} |
|
for (int i = 0; i < *negNum; i++) |
|
smc = smr = std::max(smr, 0.5 * (negNCC.at<float>(id * 500 + i) + 1.0)); |
|
|
|
if (spr + smr == 0.0) |
|
resultSr[id] = 0.0; |
|
else |
|
resultSr[id] = spr / (smr + spr); |
|
|
|
if (spc + smc == 0.0) |
|
resultSc[id] = 0.0; |
|
else |
|
resultSc[id] = spc / (smc + spc); |
|
} |
|
} |
|
|
|
// Calculate Conservative similarity of the patch (NN-Model) |
|
double TLDDetector::Sc(const Mat_<uchar>& patch) |
|
{ |
|
double splus = 0.0, sminus = 0.0; |
|
Mat_<uchar> modelSample(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE); |
|
int med = getMedian((*timeStampsPositive)); |
|
for (int i = 0; i < *posNum; i++) |
|
{ |
|
if ((int)(*timeStampsPositive)[i] <= med) |
|
{ |
|
modelSample.data = &(posExp->data[i * 225]); |
|
splus = std::max(splus, 0.5 * (NCC(modelSample, patch) + 1.0)); |
|
} |
|
} |
|
for (int i = 0; i < *negNum; i++) |
|
{ |
|
modelSample.data = &(negExp->data[i * 225]); |
|
sminus = std::max(sminus, 0.5 * (NCC(modelSample, patch) + 1.0)); |
|
} |
|
|
|
if (splus + sminus == 0.0) |
|
return 0.0; |
|
|
|
return splus / (sminus + splus); |
|
} |
|
|
|
double TLDDetector::ocl_Sc(const Mat_<uchar>& patch) |
|
{ |
|
double splus = 0.0, sminus = 0.0; |
|
|
|
UMat devPatch = patch.getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY); |
|
UMat devPositiveSamples = posExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY); |
|
UMat devNegativeSamples = negExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY); |
|
UMat devNCC(1, 2 * MAX_EXAMPLES_IN_MODEL, CV_32FC1, ACCESS_RW, USAGE_ALLOCATE_DEVICE_MEMORY); |
|
|
|
|
|
ocl::Kernel k; |
|
ocl::ProgramSource src = ocl::tracking::tldDetector_oclsrc; |
|
String error; |
|
ocl::Program prog(src, NULL, error); |
|
k.create("NCC", prog); |
|
if (k.empty()) |
|
printf("Kernel create failed!!!\n"); |
|
k.args( |
|
ocl::KernelArg::PtrReadOnly(devPatch), |
|
ocl::KernelArg::PtrReadOnly(devPositiveSamples), |
|
ocl::KernelArg::PtrReadOnly(devNegativeSamples), |
|
ocl::KernelArg::PtrWriteOnly(devNCC), |
|
*posNum, |
|
*negNum); |
|
|
|
size_t globSize = 1000; |
|
|
|
if (!k.run(1, &globSize, NULL, false)) |
|
printf("Kernel Run Error!!!"); |
|
|
|
Mat resNCC = devNCC.getMat(ACCESS_READ); |
|
|
|
int med = getMedian((*timeStampsPositive)); |
|
for (int i = 0; i < *posNum; i++) |
|
if ((int)(*timeStampsPositive)[i] <= med) |
|
splus = std::max(splus, 0.5 * (resNCC.at<float>(i) +1.0)); |
|
|
|
for (int i = 0; i < *negNum; i++) |
|
sminus = std::max(sminus, 0.5 * (resNCC.at<float>(i + 500) + 1.0)); |
|
|
|
if (splus + sminus == 0.0) |
|
return 0.0; |
|
return splus / (sminus + splus); |
|
} |
|
|
|
// Generate Search Windows for detector from aspect ratio of initial BBs |
|
void TLDDetector::generateScanGrid(int rows, int cols, Size initBox, std::vector<Rect2d>& res, bool withScaling) |
|
{ |
|
res.clear(); |
|
//Scales step: SCALE_STEP; Translation steps: 10% of width & 10% of height; minSize: 20pix |
|
for (double h = initBox.height, w = initBox.width; h < cols && w < rows;) |
|
{ |
|
for (double x = 0; (x + w + 1.0) <= cols; x += (0.1 * w)) |
|
{ |
|
for (double y = 0; (y + h + 1.0) <= rows; y += (0.1 * h)) |
|
res.push_back(Rect2d(x, y, w, h)); |
|
} |
|
if (withScaling) |
|
{ |
|
if (h <= initBox.height) |
|
{ |
|
h /= SCALE_STEP; w /= SCALE_STEP; |
|
if (h < 20 || w < 20) |
|
{ |
|
h = initBox.height * SCALE_STEP; w = initBox.width * SCALE_STEP; |
|
CV_Assert(h > initBox.height || w > initBox.width); |
|
} |
|
} |
|
else |
|
{ |
|
h *= SCALE_STEP; w *= SCALE_STEP; |
|
} |
|
} |
|
else |
|
{ |
|
break; |
|
} |
|
} |
|
} |
|
|
|
//Detection - returns most probable new target location (Max Sc) |
|
|
|
bool TLDDetector::detect(const Mat& img, const Mat& imgBlurred, Rect2d& res, std::vector<LabeledPatch>& patches, Size initSize) |
|
{ |
|
patches.clear(); |
|
Mat_<uchar> standardPatch(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE); |
|
Mat tmp; |
|
int dx = initSize.width / 10, dy = initSize.height / 10; |
|
Size2d size = img.size(); |
|
double scale = 1.0; |
|
int npos = 0, nneg = 0; |
|
double maxSc = -5.0; |
|
Rect2d maxScRect; |
|
int scaleID; |
|
std::vector <Mat> resized_imgs, blurred_imgs; |
|
std::vector <Point> varBuffer, ensBuffer; |
|
std::vector <int> varScaleIDs, ensScaleIDs; |
|
|
|
//Detection part |
|
//Generate windows and filter by variance |
|
scaleID = 0; |
|
resized_imgs.push_back(img); |
|
blurred_imgs.push_back(imgBlurred); |
|
do |
|
{ |
|
Mat_<double> intImgP, intImgP2; |
|
computeIntegralImages(resized_imgs[scaleID], intImgP, intImgP2); |
|
for (int i = 0, imax = cvFloor((0.0 + resized_imgs[scaleID].cols - initSize.width) / dx); i < imax; i++) |
|
{ |
|
for (int j = 0, jmax = cvFloor((0.0 + resized_imgs[scaleID].rows - initSize.height) / dy); j < jmax; j++) |
|
{ |
|
if (!patchVariance(intImgP, intImgP2, originalVariancePtr, Point(dx * i, dy * j), initSize)) |
|
continue; |
|
varBuffer.push_back(Point(dx * i, dy * j)); |
|
varScaleIDs.push_back(scaleID); |
|
} |
|
} |
|
scaleID++; |
|
size.width /= SCALE_STEP; |
|
size.height /= SCALE_STEP; |
|
scale *= SCALE_STEP; |
|
resize(img, tmp, size, 0, 0, DOWNSCALE_MODE); |
|
resized_imgs.push_back(tmp); |
|
GaussianBlur(resized_imgs[scaleID], tmp, GaussBlurKernelSize, 0.0f); |
|
blurred_imgs.push_back(tmp); |
|
} while (size.width >= initSize.width && size.height >= initSize.height); |
|
|
|
//Encsemble classification |
|
for (int i = 0; i < (int)varBuffer.size(); i++) |
|
{ |
|
prepareClassifiers(static_cast<int> (blurred_imgs[varScaleIDs[i]].step[0])); |
|
if (ensembleClassifierNum(&blurred_imgs[varScaleIDs[i]].at<uchar>(varBuffer[i].y, varBuffer[i].x)) <= ENSEMBLE_THRESHOLD) |
|
continue; |
|
ensBuffer.push_back(varBuffer[i]); |
|
ensScaleIDs.push_back(varScaleIDs[i]); |
|
} |
|
|
|
//NN classification |
|
for (int i = 0; i < (int)ensBuffer.size(); i++) |
|
{ |
|
LabeledPatch labPatch; |
|
double curScale = pow(SCALE_STEP, ensScaleIDs[i]); |
|
labPatch.rect = Rect2d(ensBuffer[i].x*curScale, ensBuffer[i].y*curScale, initSize.width * curScale, initSize.height * curScale); |
|
resample(resized_imgs[ensScaleIDs[i]], Rect2d(ensBuffer[i], initSize), standardPatch); |
|
|
|
double srValue, scValue; |
|
srValue = Sr(standardPatch); |
|
|
|
////To fix: Check the paper, probably this cause wrong learning |
|
// |
|
labPatch.isObject = srValue > THETA_NN; |
|
labPatch.shouldBeIntegrated = abs(srValue - THETA_NN) < 0.1; |
|
patches.push_back(labPatch); |
|
// |
|
|
|
if (!labPatch.isObject) |
|
{ |
|
nneg++; |
|
continue; |
|
} |
|
else |
|
{ |
|
npos++; |
|
} |
|
scValue = Sc(standardPatch); |
|
if (scValue > maxSc) |
|
{ |
|
maxSc = scValue; |
|
maxScRect = labPatch.rect; |
|
} |
|
} |
|
|
|
if (maxSc < 0) |
|
return false; |
|
else |
|
{ |
|
res = maxScRect; |
|
return true; |
|
} |
|
} |
|
|
|
bool TLDDetector::ocl_detect(const Mat& img, const Mat& imgBlurred, Rect2d& res, std::vector<LabeledPatch>& patches, Size initSize) |
|
{ |
|
patches.clear(); |
|
Mat_<uchar> standardPatch(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE); |
|
Mat tmp; |
|
int dx = initSize.width / 10, dy = initSize.height / 10; |
|
Size2d size = img.size(); |
|
double scale = 1.0; |
|
int npos = 0, nneg = 0; |
|
double maxSc = -5.0; |
|
Rect2d maxScRect; |
|
int scaleID; |
|
std::vector <Mat> resized_imgs, blurred_imgs; |
|
std::vector <Point> varBuffer, ensBuffer; |
|
std::vector <int> varScaleIDs, ensScaleIDs; |
|
|
|
//Detection part |
|
//Generate windows and filter by variance |
|
scaleID = 0; |
|
resized_imgs.push_back(img); |
|
blurred_imgs.push_back(imgBlurred); |
|
do |
|
{ |
|
Mat_<double> intImgP, intImgP2; |
|
computeIntegralImages(resized_imgs[scaleID], intImgP, intImgP2); |
|
for (int i = 0, imax = cvFloor((0.0 + resized_imgs[scaleID].cols - initSize.width) / dx); i < imax; i++) |
|
{ |
|
for (int j = 0, jmax = cvFloor((0.0 + resized_imgs[scaleID].rows - initSize.height) / dy); j < jmax; j++) |
|
{ |
|
if (!patchVariance(intImgP, intImgP2, originalVariancePtr, Point(dx * i, dy * j), initSize)) |
|
continue; |
|
varBuffer.push_back(Point(dx * i, dy * j)); |
|
varScaleIDs.push_back(scaleID); |
|
} |
|
} |
|
scaleID++; |
|
size.width /= SCALE_STEP; |
|
size.height /= SCALE_STEP; |
|
scale *= SCALE_STEP; |
|
resize(img, tmp, size, 0, 0, DOWNSCALE_MODE); |
|
resized_imgs.push_back(tmp); |
|
GaussianBlur(resized_imgs[scaleID], tmp, GaussBlurKernelSize, 0.0f); |
|
blurred_imgs.push_back(tmp); |
|
} while (size.width >= initSize.width && size.height >= initSize.height); |
|
|
|
//Encsemble classification |
|
for (int i = 0; i < (int)varBuffer.size(); i++) |
|
{ |
|
prepareClassifiers((int)blurred_imgs[varScaleIDs[i]].step[0]); |
|
if (ensembleClassifierNum(&blurred_imgs[varScaleIDs[i]].at<uchar>(varBuffer[i].y, varBuffer[i].x)) <= ENSEMBLE_THRESHOLD) |
|
continue; |
|
ensBuffer.push_back(varBuffer[i]); |
|
ensScaleIDs.push_back(varScaleIDs[i]); |
|
} |
|
|
|
//NN classification |
|
//Prepare batch of patches |
|
int numOfPatches = (int)ensBuffer.size(); |
|
Mat_<uchar> stdPatches(numOfPatches, 225); |
|
double *resultSr = new double[numOfPatches]; |
|
double *resultSc = new double[numOfPatches]; |
|
|
|
uchar *patchesData = stdPatches.data; |
|
for (int i = 0; i < (int)ensBuffer.size(); i++) |
|
{ |
|
resample(resized_imgs[ensScaleIDs[i]], Rect2d(ensBuffer[i], initSize), standardPatch); |
|
uchar *stdPatchData = standardPatch.data; |
|
for (int j = 0; j < 225; j++) |
|
patchesData[225*i+j] = stdPatchData[j]; |
|
} |
|
//Calculate Sr and Sc batches |
|
ocl_batchSrSc(stdPatches, resultSr, resultSc, numOfPatches); |
|
|
|
|
|
for (int i = 0; i < (int)ensBuffer.size(); i++) |
|
{ |
|
LabeledPatch labPatch; |
|
standardPatch.data = &stdPatches.data[225 * i]; |
|
double curScale = pow(SCALE_STEP, ensScaleIDs[i]); |
|
labPatch.rect = Rect2d(ensBuffer[i].x*curScale, ensBuffer[i].y*curScale, initSize.width * curScale, initSize.height * curScale); |
|
|
|
double srValue, scValue; |
|
|
|
srValue = resultSr[i]; |
|
|
|
////To fix: Check the paper, probably this cause wrong learning |
|
// |
|
labPatch.isObject = srValue > THETA_NN; |
|
labPatch.shouldBeIntegrated = abs(srValue - THETA_NN) < 0.1; |
|
patches.push_back(labPatch); |
|
// |
|
|
|
if (!labPatch.isObject) |
|
{ |
|
nneg++; |
|
continue; |
|
} |
|
else |
|
{ |
|
npos++; |
|
} |
|
scValue = resultSc[i]; |
|
if (scValue > maxSc) |
|
{ |
|
maxSc = scValue; |
|
maxScRect = labPatch.rect; |
|
} |
|
} |
|
|
|
if (maxSc < 0) |
|
return false; |
|
res = maxScRect; |
|
return true; |
|
} |
|
|
|
// Computes the variance of subimage given by box, with the help of two integral |
|
// images intImgP and intImgP2 (sum of squares), which should be also provided. |
|
bool TLDDetector::patchVariance(Mat_<double>& intImgP, Mat_<double>& intImgP2, double *originalVariance, Point pt, Size size) |
|
{ |
|
int x = (pt.x), y = (pt.y), width = (size.width), height = (size.height); |
|
CV_Assert(0 <= x && (x + width) < intImgP.cols && (x + width) < intImgP2.cols); |
|
CV_Assert(0 <= y && (y + height) < intImgP.rows && (y + height) < intImgP2.rows); |
|
double p = 0, p2 = 0; |
|
double A, B, C, D; |
|
|
|
A = intImgP(y, x); |
|
B = intImgP(y, x + width); |
|
C = intImgP(y + height, x); |
|
D = intImgP(y + height, x + width); |
|
p = (A + D - B - C) / (width * height); |
|
|
|
A = intImgP2(y, x); |
|
B = intImgP2(y, x + width); |
|
C = intImgP2(y + height, x); |
|
D = intImgP2(y + height, x + width); |
|
p2 = (A + D - B - C) / (width * height); |
|
|
|
return ((p2 - p * p) > VARIANCE_THRESHOLD * *originalVariance); |
|
} |
|
|
|
} |
|
} |