opencv_contrib/modules/tracking/src/tldDetector.cpp

/*M///////////////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
//                           License Agreement
//                For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//
//   * The name of the copyright holders may not be used to endorse or promote products
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

#include "tldDetector.hpp"

namespace cv
{
	namespace tld
	{
		// Calculate offsets for classifiers
		void TLDDetector::prepareClassifiers(int rowstep)
		{
			for (int i = 0; i < (int)classifiers.size(); i++)
				classifiers[i].prepareClassifier(rowstep);
		}

		// Calculate posterior probability, that the patch belongs to the current EC model
		double TLDDetector::ensembleClassifierNum(const uchar* data)
		{
			double p = 0;
			for (int k = 0; k < (int)classifiers.size(); k++)
				p += classifiers[k].posteriorProbabilityFast(data);
			p /= classifiers.size();
			return p;
		}

		// Calculate Relative similarity of the patch (NN-Model)
		double TLDDetector::Sr(const Mat_<uchar>& patch)
		{
			double splus = 0.0, sminus = 0.0;
			Mat_<uchar> modelSample(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE);
			for (int i = 0; i < *posNum; i++)
			{
				modelSample.data = &(posExp->data[i * 225]);
				splus = std::max(splus, 0.5 * (NCC(modelSample, patch) + 1.0));
			}
			for (int i = 0; i < *negNum; i++)
			{
				modelSample.data = &(negExp->data[i * 225]);
				sminus = std::max(sminus, 0.5 * (NCC(modelSample, patch) + 1.0));
			}

			if (splus + sminus == 0.0)
				return 0.0;
			return splus / (sminus + splus);
		}

		double TLDDetector::ocl_Sr(const Mat_<uchar>& patch)
		{
			double splus = 0.0, sminus = 0.0;


			UMat devPatch = patch.getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
			UMat devPositiveSamples = posExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
			UMat devNegativeSamples = negExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
			UMat devNCC(1, 2*MAX_EXAMPLES_IN_MODEL, CV_32FC1, ACCESS_RW, USAGE_ALLOCATE_DEVICE_MEMORY);


			ocl::Kernel k;
			ocl::ProgramSource src = ocl::tracking::tldDetector_oclsrc;
			String error;
			ocl::Program prog(src, NULL, error);
			k.create("NCC", prog);
			if (k.empty())
				printf("Kernel create failed!!!\n");
			k.args(
				ocl::KernelArg::PtrReadOnly(devPatch),
				ocl::KernelArg::PtrReadOnly(devPositiveSamples),
				ocl::KernelArg::PtrReadOnly(devNegativeSamples),
				ocl::KernelArg::PtrWriteOnly(devNCC),
				*posNum,
				*negNum);

			size_t globSize = 1000;

			if (!k.run(1, &globSize, NULL, false))
				printf("Kernel Run Error!!!");

			Mat resNCC = devNCC.getMat(ACCESS_READ);

			for (int i = 0; i < *posNum; i++)
				splus = std::max(splus, 0.5 * (resNCC.at<float>(i) + 1.0));

			for (int i = 0; i < *negNum; i++)
				sminus = std::max(sminus, 0.5 * (resNCC.at<float>(i+500) +1.0));

			if (splus + sminus == 0.0)
				return 0.0;
			return splus / (sminus + splus);
		}

		void TLDDetector::ocl_batchSrSc(const Mat_<uchar>& patches, double *resultSr, double *resultSc, int numOfPatches)
		{
			UMat devPatches = patches.getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
			UMat devPositiveSamples = posExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
			UMat devNegativeSamples = negExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
			UMat devPosNCC(MAX_EXAMPLES_IN_MODEL, numOfPatches, CV_32FC1, ACCESS_RW, USAGE_ALLOCATE_DEVICE_MEMORY);
			UMat devNegNCC(MAX_EXAMPLES_IN_MODEL, numOfPatches, CV_32FC1, ACCESS_RW, USAGE_ALLOCATE_DEVICE_MEMORY);

			ocl::Kernel k;
			ocl::ProgramSource src = ocl::tracking::tldDetector_oclsrc;
			String error;
			ocl::Program prog(src, NULL, error);
			k.create("batchNCC", prog);
			if (k.empty())
				printf("Kernel create failed!!!\n");
			k.args(
				ocl::KernelArg::PtrReadOnly(devPatches),
				ocl::KernelArg::PtrReadOnly(devPositiveSamples),
				ocl::KernelArg::PtrReadOnly(devNegativeSamples),
				ocl::KernelArg::PtrWriteOnly(devPosNCC),
				ocl::KernelArg::PtrWriteOnly(devNegNCC),
				*posNum,
				*negNum,
				numOfPatches);

			size_t globSize = 2 * numOfPatches*MAX_EXAMPLES_IN_MODEL;

			if (!k.run(1, &globSize, NULL, true))
				printf("Kernel Run Error!!!");

			Mat posNCC = devPosNCC.getMat(ACCESS_READ);
			Mat negNCC = devNegNCC.getMat(ACCESS_READ);

			//Calculate Srs
			for (int id = 0; id < numOfPatches; id++)
			{
				double spr = 0.0, smr = 0.0, spc = 0.0, smc = 0;
				int med = getMedian((*timeStampsPositive));
				for (int i = 0; i < *posNum; i++)
				{
					spr = std::max(spr, 0.5 * (posNCC.at<float>(id * 500 + i) + 1.0));
					if ((int)(*timeStampsPositive)[i] <= med)
						spc = std::max(spr, 0.5 * (posNCC.at<float>(id * 500 + i) + 1.0));
				}
				for (int i = 0; i < *negNum; i++)
					smc = smr = std::max(smr, 0.5 * (negNCC.at<float>(id * 500 + i) + 1.0));

				if (spr + smr == 0.0)
					resultSr[id] = 0.0;
				else
					resultSr[id] = spr / (smr + spr);

				if (spc + smc == 0.0)
					resultSc[id] = 0.0;
				else
					resultSc[id] = spc / (smc + spc);
			}
		}

		// Calculate Conservative similarity of the patch (NN-Model)
		double TLDDetector::Sc(const Mat_<uchar>& patch)
		{
			double splus = 0.0, sminus = 0.0;
			Mat_<uchar> modelSample(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE);
			int med = getMedian((*timeStampsPositive));
			for (int i = 0; i < *posNum; i++)
			{
				if ((int)(*timeStampsPositive)[i] <= med)
				{
					modelSample.data = &(posExp->data[i * 225]);
					splus = std::max(splus, 0.5 * (NCC(modelSample, patch) + 1.0));
				}
			}
			for (int i = 0; i < *negNum; i++)
			{
				modelSample.data = &(negExp->data[i * 225]);
				sminus = std::max(sminus, 0.5 * (NCC(modelSample, patch) + 1.0));
			}

			if (splus + sminus == 0.0)
				return 0.0;

			return splus / (sminus + splus);
		}

		double TLDDetector::ocl_Sc(const Mat_<uchar>& patch)
		{
			double splus = 0.0, sminus = 0.0;

			UMat devPatch = patch.getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
			UMat devPositiveSamples = posExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
			UMat devNegativeSamples = negExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
			UMat devNCC(1, 2 * MAX_EXAMPLES_IN_MODEL, CV_32FC1, ACCESS_RW, USAGE_ALLOCATE_DEVICE_MEMORY);


			ocl::Kernel k;
			ocl::ProgramSource src = ocl::tracking::tldDetector_oclsrc;
			String error;
			ocl::Program prog(src, NULL, error);
			k.create("NCC", prog);
			if (k.empty())
				printf("Kernel create failed!!!\n");
			k.args(
				ocl::KernelArg::PtrReadOnly(devPatch),
				ocl::KernelArg::PtrReadOnly(devPositiveSamples),
				ocl::KernelArg::PtrReadOnly(devNegativeSamples),
				ocl::KernelArg::PtrWriteOnly(devNCC),
				*posNum,
				*negNum);

			size_t globSize = 1000;

			if (!k.run(1, &globSize, NULL, false))
				printf("Kernel Run Error!!!");

			Mat resNCC = devNCC.getMat(ACCESS_READ);

			int med = getMedian((*timeStampsPositive));
			for (int i = 0; i < *posNum; i++)
				if ((int)(*timeStampsPositive)[i] <= med)
					splus = std::max(splus, 0.5 * (resNCC.at<float>(i) +1.0));

			for (int i = 0; i < *negNum; i++)
				sminus = std::max(sminus, 0.5 * (resNCC.at<float>(i + 500) + 1.0));

			if (splus + sminus == 0.0)
				return 0.0;
			return splus / (sminus + splus);
		}

		// Generate Search Windows for detector from aspect ratio of initial BBs
		void TLDDetector::generateScanGrid(int rows, int cols, Size initBox, std::vector<Rect2d>& res, bool withScaling)
		{
			res.clear();
			//Scales step: SCALE_STEP; Translation steps: 10% of width & 10% of height; minSize: 20pix
			for (double h = initBox.height, w = initBox.width; h < cols && w < rows;)
			{
				for (double x = 0; (x + w + 1.0) <= cols; x += (0.1 * w))
				{
					for (double y = 0; (y + h + 1.0) <= rows; y += (0.1 * h))
						res.push_back(Rect2d(x, y, w, h));
				}
				if (withScaling)
				{
					if (h <= initBox.height)
					{
						h /= SCALE_STEP; w /= SCALE_STEP;
						if (h < 20 || w < 20)
						{
							h = initBox.height * SCALE_STEP; w = initBox.width * SCALE_STEP;
							CV_Assert(h > initBox.height || w > initBox.width);
						}
					}
					else
					{
						h *= SCALE_STEP; w *= SCALE_STEP;
					}
				}
				else
				{
					break;
				}
			}
		}

		//Detection - returns most probable new target location (Max Sc)

		bool TLDDetector::detect(const Mat& img, const Mat& imgBlurred, Rect2d& res, std::vector<LabeledPatch>& patches, Size initSize)
		{
			patches.clear();
			Mat_<uchar> standardPatch(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE);
			Mat tmp;
			int dx = initSize.width / 10, dy = initSize.height / 10;
			Size2d size = img.size();
			double scale = 1.0;
			int npos = 0, nneg = 0;
			double maxSc = -5.0;
			Rect2d maxScRect;
			int scaleID;
			std::vector <Mat> resized_imgs, blurred_imgs;
			std::vector <Point> varBuffer, ensBuffer;
			std::vector <int> varScaleIDs, ensScaleIDs;

			//Detection part
			//Generate windows and filter by variance
			scaleID = 0;
			resized_imgs.push_back(img);
			blurred_imgs.push_back(imgBlurred);
			do
			{
				Mat_<double> intImgP, intImgP2;
				computeIntegralImages(resized_imgs[scaleID], intImgP, intImgP2);
				for (int i = 0, imax = cvFloor((0.0 + resized_imgs[scaleID].cols - initSize.width) / dx); i < imax; i++)
				{
					for (int j = 0, jmax = cvFloor((0.0 + resized_imgs[scaleID].rows - initSize.height) / dy); j < jmax; j++)
					{
						if (!patchVariance(intImgP, intImgP2, originalVariancePtr, Point(dx * i, dy * j), initSize))
							continue;
						varBuffer.push_back(Point(dx * i, dy * j));
						varScaleIDs.push_back(scaleID);
					}
				}
				scaleID++;
				size.width /= SCALE_STEP;
				size.height /= SCALE_STEP;
				scale *= SCALE_STEP;
				resize(img, tmp, size, 0, 0, DOWNSCALE_MODE);
				resized_imgs.push_back(tmp);
				GaussianBlur(resized_imgs[scaleID], tmp, GaussBlurKernelSize, 0.0f);
				blurred_imgs.push_back(tmp);
			} while (size.width >= initSize.width && size.height >= initSize.height);

			//Encsemble classification
			for (int i = 0; i < (int)varBuffer.size(); i++)
			{
				prepareClassifiers(static_cast<int> (blurred_imgs[varScaleIDs[i]].step[0]));
				if (ensembleClassifierNum(&blurred_imgs[varScaleIDs[i]].at<uchar>(varBuffer[i].y, varBuffer[i].x)) <= ENSEMBLE_THRESHOLD)
					continue;
				ensBuffer.push_back(varBuffer[i]);
				ensScaleIDs.push_back(varScaleIDs[i]);
			}

			//NN classification
			for (int i = 0; i < (int)ensBuffer.size(); i++)
			{
				LabeledPatch labPatch;
				double curScale = pow(SCALE_STEP, ensScaleIDs[i]);
				labPatch.rect = Rect2d(ensBuffer[i].x*curScale, ensBuffer[i].y*curScale, initSize.width * curScale, initSize.height * curScale);
				resample(resized_imgs[ensScaleIDs[i]], Rect2d(ensBuffer[i], initSize), standardPatch);

				double srValue, scValue;
				srValue = Sr(standardPatch);

				////To fix: Check the paper, probably this cause wrong learning
				//
				labPatch.isObject = srValue > THETA_NN;
				labPatch.shouldBeIntegrated = abs(srValue - THETA_NN) < 0.1;
				patches.push_back(labPatch);
				//

				if (!labPatch.isObject)
				{
					nneg++;
					continue;
				}
				else
				{
					npos++;
				}
				scValue = Sc(standardPatch);
				if (scValue > maxSc)
				{
					maxSc = scValue;
					maxScRect = labPatch.rect;
				}
			}

			if (maxSc < 0)
				return false;
			else
			{
				res = maxScRect;
				return true;
			}
		}

		bool TLDDetector::ocl_detect(const Mat& img, const Mat& imgBlurred, Rect2d& res, std::vector<LabeledPatch>& patches, Size initSize)
		{
			patches.clear();
			Mat_<uchar> standardPatch(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE);
			Mat tmp;
			int dx = initSize.width / 10, dy = initSize.height / 10;
			Size2d size = img.size();
			double scale = 1.0;
			int npos = 0, nneg = 0;
			double maxSc = -5.0;
			Rect2d maxScRect;
			int scaleID;
			std::vector <Mat> resized_imgs, blurred_imgs;
			std::vector <Point> varBuffer, ensBuffer;
			std::vector <int> varScaleIDs, ensScaleIDs;

			//Detection part
			//Generate windows and filter by variance
			scaleID = 0;
			resized_imgs.push_back(img);
			blurred_imgs.push_back(imgBlurred);
			do
			{
				Mat_<double> intImgP, intImgP2;
				computeIntegralImages(resized_imgs[scaleID], intImgP, intImgP2);
				for (int i = 0, imax = cvFloor((0.0 + resized_imgs[scaleID].cols - initSize.width) / dx); i < imax; i++)
				{
					for (int j = 0, jmax = cvFloor((0.0 + resized_imgs[scaleID].rows - initSize.height) / dy); j < jmax; j++)
					{
						if (!patchVariance(intImgP, intImgP2, originalVariancePtr, Point(dx * i, dy * j), initSize))
							continue;
						varBuffer.push_back(Point(dx * i, dy * j));
						varScaleIDs.push_back(scaleID);
					}
				}
				scaleID++;
				size.width /= SCALE_STEP;
				size.height /= SCALE_STEP;
				scale *= SCALE_STEP;
				resize(img, tmp, size, 0, 0, DOWNSCALE_MODE);
				resized_imgs.push_back(tmp);
				GaussianBlur(resized_imgs[scaleID], tmp, GaussBlurKernelSize, 0.0f);
				blurred_imgs.push_back(tmp);
			} while (size.width >= initSize.width && size.height >= initSize.height);

			//Encsemble classification
			for (int i = 0; i < (int)varBuffer.size(); i++)
			{
				prepareClassifiers((int)blurred_imgs[varScaleIDs[i]].step[0]);
				if (ensembleClassifierNum(&blurred_imgs[varScaleIDs[i]].at<uchar>(varBuffer[i].y, varBuffer[i].x)) <= ENSEMBLE_THRESHOLD)
					continue;
				ensBuffer.push_back(varBuffer[i]);
				ensScaleIDs.push_back(varScaleIDs[i]);
			}

			//NN classification
			//Prepare batch of patches
			int numOfPatches = (int)ensBuffer.size();
			Mat_<uchar> stdPatches(numOfPatches, 225);
			double *resultSr = new double[numOfPatches];
			double *resultSc = new double[numOfPatches];

			uchar *patchesData = stdPatches.data;
			for (int i = 0; i < (int)ensBuffer.size(); i++)
			{
				resample(resized_imgs[ensScaleIDs[i]], Rect2d(ensBuffer[i], initSize), standardPatch);
				uchar *stdPatchData = standardPatch.data;
				for (int j = 0; j < 225; j++)
					patchesData[225*i+j] = stdPatchData[j];
			}
			//Calculate Sr and Sc batches
			ocl_batchSrSc(stdPatches, resultSr, resultSc, numOfPatches);


			for (int i = 0; i < (int)ensBuffer.size(); i++)
			{
				LabeledPatch labPatch;
				standardPatch.data = &stdPatches.data[225 * i];
				double curScale = pow(SCALE_STEP, ensScaleIDs[i]);
				labPatch.rect = Rect2d(ensBuffer[i].x*curScale, ensBuffer[i].y*curScale, initSize.width * curScale, initSize.height * curScale);

				double srValue, scValue;

				srValue = resultSr[i];

				////To fix: Check the paper, probably this cause wrong learning
				//
				labPatch.isObject = srValue > THETA_NN;
				labPatch.shouldBeIntegrated = abs(srValue - THETA_NN) < 0.1;
				patches.push_back(labPatch);
				//

				if (!labPatch.isObject)
				{
					nneg++;
					continue;
				}
				else
				{
					npos++;
				}
				scValue = resultSc[i];
				if (scValue > maxSc)
				{
					maxSc = scValue;
					maxScRect = labPatch.rect;
				}
			}

			if (maxSc < 0)
				return false;
			res = maxScRect;
			return true;
		}

		// Computes the variance of subimage given by box, with the help of two integral
		// images intImgP and intImgP2 (sum of squares), which should be also provided.
		bool TLDDetector::patchVariance(Mat_<double>& intImgP, Mat_<double>& intImgP2, double *originalVariance, Point pt, Size size)
		{
			int x = (pt.x), y = (pt.y), width = (size.width), height = (size.height);
			CV_Assert(0 <= x && (x + width) < intImgP.cols && (x + width) < intImgP2.cols);
			CV_Assert(0 <= y && (y + height) < intImgP.rows && (y + height) < intImgP2.rows);
			double p = 0, p2 = 0;
			double A, B, C, D;

			A = intImgP(y, x);
			B = intImgP(y, x + width);
			C = intImgP(y + height, x);
			D = intImgP(y + height, x + width);
			p = (A + D - B - C) / (width * height);

			A = intImgP2(y, x);
			B = intImgP2(y, x + width);
			C = intImgP2(y + height, x);
			D = intImgP2(y + height, x + width);
			p2 = (A + D - B - C) / (width * height);

			return ((p2 - p * p) > VARIANCE_THRESHOLD * *originalVariance);
		}

	}
}