diff --git a/modules/tracking/src/opencl/tldDetector.cl b/modules/tracking/src/opencl/tldDetector.cl new file mode 100644 index 000000000..9610bd72c --- /dev/null +++ b/modules/tracking/src/opencl/tldDetector.cl @@ -0,0 +1,69 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. + + + +__kernel void NCC(__global const uchar *patch, + __global const uchar *positiveSamples, + __global const uchar *negativeSamples, + __global float *ncc, + int posNum, + int negNum) +{ + int id = get_global_id(0); + if (id >= 1000) return; + bool posFlg; + + if (id < 500) + posFlg = true; + if (id >= 500) + { + //Negative index + id = id - 500; + posFlg = false; + } + + //Variables + int s1 = 0, s2 = 0, n1 = 0, n2 = 0, prod = 0; + float sq1 = 0, sq2 = 0, ares = 0; + int N = 225; + //NCC with positive patch + if (posFlg && id < posNum) + { + for (int i = 0; i < N; i++) + { + + s1 += positiveSamples[id * N + i]; + s2 += patch[i]; + n1 += positiveSamples[id * N + i] * positiveSamples[id * N + i]; + n2 += patch[i] * patch[i]; + prod += positiveSamples[id * N + i] * patch[i]; + } + sq1 = sqrt(max(0.0, n1 - 1.0 * s1 * s1 / N)); + sq2 = sqrt(max(0.0, n2 - 1.0 * s2 * s2 / N)); + ares = (sq2 == 0) ? sq1 / fabs(sq1) : (prod - s1 * s2 / N) / sq1 / sq2; + ncc[id] = ares; + } + + //NCC with negative patch + if (!posFlg && id < negNum) + { + for (int i = 0; i < N; i++) + { + + s1 += negativeSamples[id * N + i]; + s2 += patch[i]; + n1 += negativeSamples[id * N + i] * negativeSamples[id * N + i]; + n2 += patch[i] * patch[i]; + prod += negativeSamples[id * N + i] * patch[i]; + } + sq1 = sqrt(max(0.0, n1 - 1.0 * s1 * s1 / N)); + sq2 = sqrt(max(0.0, n2 - 1.0 * s2 * s2 / N)); + ares = (sq2 == 0) ? sq1 / fabs(sq1) : (prod - s1 * s2 / N) / sq1 / sq2; + ncc[id+500] = ares; + } +} diff --git a/modules/tracking/src/precomp.hpp b/modules/tracking/src/precomp.hpp index 277f9d85b..78acf7971 100644 --- a/modules/tracking/src/precomp.hpp +++ b/modules/tracking/src/precomp.hpp @@ -44,6 +44,7 @@ #include "opencv2/tracking.hpp" #include "opencv2/core/utility.hpp" +#include "opencv2/core/ocl.hpp" namespace cv { diff --git a/modules/tracking/src/tldDetector.cpp b/modules/tracking/src/tldDetector.cpp index a30974fbd..22a910ac5 100644 --- a/modules/tracking/src/tldDetector.cpp +++ b/modules/tracking/src/tldDetector.cpp @@ -65,11 +65,119 @@ namespace cv // Calculate Relative similarity of the patch (NN-Model) double TLDDetector::Sr(const Mat_& patch) { + /* + int64 e1, e2; + float t; + e1 = getTickCount(); double splus = 0.0, sminus = 0.0; for (int i = 0; i < (int)(*positiveExamples).size(); i++) splus = std::max(splus, 0.5 * (NCC((*positiveExamples)[i], patch) + 1.0)); for (int i = 0; i < (int)(*negativeExamples).size(); i++) sminus = std::max(sminus, 0.5 * (NCC((*negativeExamples)[i], patch) + 1.0)); + e2 = getTickCount(); + t = (e2 - e1) / getTickFrequency()*1000.0; + printf("Sr: %f\n", t); + if (splus + sminus == 0.0) + return 0.0; + return splus / (sminus + splus); + */ + int64 e1, e2; + float t; + e1 = getTickCount(); + double splus = 0.0, sminus = 0.0; + Mat_ modelSample(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE); + for (int i = 0; i < *posNum; i++) + { + modelSample.data = &(posExp->data[i * 225]); + splus = std::max(splus, 0.5 * (NCC(modelSample, patch) + 1.0)); + } + for (int i = 0; i < *negNum; i++) + { + modelSample.data = &(negExp->data[i * 225]); + sminus = std::max(sminus, 0.5 * (NCC(modelSample, patch) + 1.0)); + } + e2 = getTickCount(); + t = (e2 - e1) / getTickFrequency()*1000.0; + printf("Sr CPU: %f\n", t); + if (splus + sminus == 0.0) + return 0.0; + return splus / (sminus + splus); + } + + double TLDDetector::ocl_Sr(const Mat_& patch) + { + int64 e1, e2, e3, e4; + float t; + e1 = getTickCount(); + double splus = 0.0, sminus = 0.0; + + e3 = getTickCount(); + + UMat devPatch = patch.getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY); + UMat devPositiveSamples = posExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY); + UMat devNegativeSamples = negExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY); + UMat devNCC(1, 2*MAX_EXAMPLES_IN_MODEL, CV_32FC1, ACCESS_RW, USAGE_ALLOCATE_DEVICE_MEMORY); + + + ocl::Kernel k; + ocl::ProgramSource src = ocl::tracking::tldDetector_oclsrc; + String error; + ocl::Program prog(src, NULL, error); + k.create("NCC", prog); + if (k.empty()) + printf("Kernel create failed!!!\n"); + k.args( + ocl::KernelArg::PtrReadOnly(devPatch), + ocl::KernelArg::PtrReadOnly(devPositiveSamples), + ocl::KernelArg::PtrReadOnly(devNegativeSamples), + ocl::KernelArg::PtrWriteOnly(devNCC), + (int)posNum, + (int)negNum); + + e4 = getTickCount(); + t = (e4 - e3) / getTickFrequency()*1000.0; + //printf("Mem Cpy GPU: %f\n", t); + + size_t globSize = 1000; + size_t localSize = 128; + e3 = getTickCount(); + if (!k.run(1, &globSize, &localSize, true)) + printf("Kernel Run Error!!!"); + e4 = getTickCount(); + t = (e4 - e3) / getTickFrequency()*1000.0; + //printf("Kernel Run GPU: %f\n", t); + + e3 = getTickCount(); + Mat resNCC = devNCC.getMat(ACCESS_READ); + e4 = getTickCount(); + t = (e4 - e3) / getTickFrequency()*1000.0; + //printf("Read Mem GPU: %f\n", t); + + ////Compare + //Mat_ modelSample(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE); + //for (int i = 0; i < 200; i+=17) + //{ + // modelSample.data = &(posExp->data[i * 225]); + // printf("%f\t%f\n\n", resNCC.at(i), NCC(modelSample, patch)); + //} + + //for (int i = 0; i < 200; i+=23) + //{ + // modelSample.data = &(negExp->data[i * 225]); + // printf("%f\t%f\n", resNCC.at(500+i), NCC(modelSample, patch)); + //} + + + for (int i = 0; i < *posNum; i++) + splus = std::max(splus, 0.5 * (resNCC.at(i) + 1.0)); + + for (int i = 0; i < *negNum; i++) + sminus = std::max(sminus, 0.5 * (resNCC.at(i+500) +1.0)); + + e2 = getTickCount(); + t = (e2 - e1) / getTickFrequency()*1000.0; + //printf("Sr GPU: %f\n\n", t); + if (splus + sminus == 0.0) return 0.0; return splus / (sminus + splus); @@ -78,6 +186,10 @@ namespace cv // Calculate Conservative similarity of the patch (NN-Model) double TLDDetector::Sc(const Mat_& patch) { + /* + int64 e1, e2; + float t; + e1 = getTickCount(); double splus = 0.0, sminus = 0.0; int med = getMedian((*timeStampsPositive)); for (int i = 0; i < (int)(*positiveExamples).size(); i++) @@ -87,6 +199,118 @@ namespace cv } for (int i = 0; i < (int)(*negativeExamples).size(); i++) sminus = std::max(sminus, 0.5 * (NCC((*negativeExamples)[i], patch) + 1.0)); + e2 = getTickCount(); + t = (e2 - e1) / getTickFrequency()*1000.0; + printf("Sc: %f\n", t); + if (splus + sminus == 0.0) + return 0.0; + + return splus / (sminus + splus); + */ + + int64 e1, e2; + float t; + e1 = getTickCount(); + double splus = 0.0, sminus = 0.0; + Mat_ modelSample(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE); + int med = getMedian((*timeStampsPositive)); + for (int i = 0; i < *posNum; i++) + { + if ((int)(*timeStampsPositive)[i] <= med) + { + modelSample.data = &(posExp->data[i * 225]); + splus = std::max(splus, 0.5 * (NCC(modelSample, patch) + 1.0)); + } + } + for (int i = 0; i < *negNum; i++) + { + modelSample.data = &(negExp->data[i * 225]); + sminus = std::max(sminus, 0.5 * (NCC(modelSample, patch) + 1.0)); + } + e2 = getTickCount(); + t = (e2 - e1) / getTickFrequency()*1000.0; + printf("Sc: %f\n", t); + if (splus + sminus == 0.0) + return 0.0; + + return splus / (sminus + splus); + } + + double TLDDetector::ocl_Sc(const Mat_& patch) + { + int64 e1, e2, e3, e4; + float t; + e1 = getTickCount(); + double splus = 0.0, sminus = 0.0; + + e3 = getTickCount(); + + UMat devPatch = patch.getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY); + UMat devPositiveSamples = posExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY); + UMat devNegativeSamples = negExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY); + UMat devNCC(1, 2 * MAX_EXAMPLES_IN_MODEL, CV_32FC1, ACCESS_RW, USAGE_ALLOCATE_DEVICE_MEMORY); + + + ocl::Kernel k; + ocl::ProgramSource src = ocl::tracking::tldDetector_oclsrc; + String error; + ocl::Program prog(src, NULL, error); + k.create("NCC", prog); + if (k.empty()) + printf("Kernel create failed!!!\n"); + k.args( + ocl::KernelArg::PtrReadOnly(devPatch), + ocl::KernelArg::PtrReadOnly(devPositiveSamples), + ocl::KernelArg::PtrReadOnly(devNegativeSamples), + ocl::KernelArg::PtrWriteOnly(devNCC), + (int)posNum, + (int)negNum); + + e4 = getTickCount(); + t = (e4 - e3) / getTickFrequency()*1000.0; + //printf("Mem Cpy GPU: %f\n", t); + + size_t globSize = 1000; + size_t localSize = 128; + e3 = getTickCount(); + if (!k.run(1, &globSize, &localSize, true)) + printf("Kernel Run Error!!!"); + e4 = getTickCount(); + t = (e4 - e3) / getTickFrequency()*1000.0; + //printf("Kernel Run GPU: %f\n", t); + + e3 = getTickCount(); + Mat resNCC = devNCC.getMat(ACCESS_READ); + e4 = getTickCount(); + t = (e4 - e3) / getTickFrequency()*1000.0; + //printf("Read Mem GPU: %f\n", t); + + ////Compare + //Mat_ modelSample(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE); + //for (int i = 0; i < 200; i+=17) + //{ + // modelSample.data = &(posExp->data[i * 225]); + // printf("%f\t%f\n\n", resNCC.at(i), NCC(modelSample, patch)); + //} + + //for (int i = 0; i < 200; i+=23) + //{ + // modelSample.data = &(negExp->data[i * 225]); + // printf("%f\t%f\n", resNCC.at(500+i), NCC(modelSample, patch)); + //} + + int med = getMedian((*timeStampsPositive)); + for (int i = 0; i < *posNum; i++) + if ((int)(*timeStampsPositive)[i] <= med) + splus = std::max(splus, 0.5 * (resNCC.at(i) +1.0)); + + for (int i = 0; i < *negNum; i++) + sminus = std::max(sminus, 0.5 * (resNCC.at(i + 500) + 1.0)); + + e2 = getTickCount(); + t = (e2 - e1) / getTickFrequency()*1000.0; + //printf("Sc GPU: %f\n\n", t); + if (splus + sminus == 0.0) return 0.0; return splus / (sminus + splus); @@ -166,7 +390,8 @@ namespace cv labPatch.rect = Rect2d(dx * i * scale, dy * j * scale, initSize.width * scale, initSize.height * scale); resample(resized_img, Rect2d(Point(dx * i, dy * j), initSize), standardPatch); - tmp = Sr(standardPatch); + + tmp = ocl_Sr(standardPatch); ////To fix: Check the paper, probably this cause wrong learning // @@ -184,7 +409,7 @@ namespace cv { npos++; } - tmp = Sc(standardPatch); + tmp = ocl_Sc(standardPatch); if (tmp > maxSc) { maxSc = tmp; diff --git a/modules/tracking/src/tldDetector.hpp b/modules/tracking/src/tldDetector.hpp index 77d57dd43..0d75f04c7 100644 --- a/modules/tracking/src/tldDetector.hpp +++ b/modules/tracking/src/tldDetector.hpp @@ -43,6 +43,7 @@ #define OPENCV_TLD_DETECTOR #include "precomp.hpp" +#include "opencl_kernels_tracking.hpp" #include "tldEnsembleClassifier.hpp" #include "tldUtils.hpp" @@ -73,9 +74,13 @@ namespace cv inline double ensembleClassifierNum(const uchar* data); inline void prepareClassifiers(int rowstep); double Sr(const Mat_& patch); + double ocl_Sr(const Mat_& patch); double Sc(const Mat_& patch); + double ocl_Sc(const Mat_& patch); std::vector classifiers; + Mat *posExp, *negExp; + int *posNum, *negNum; std::vector > *positiveExamples, *negativeExamples; std::vector *timeStampsPositive, *timeStampsNegative; double *originalVariancePtr; @@ -87,6 +92,7 @@ namespace cv bool isObject, shouldBeIntegrated; }; bool detect(const Mat& img, const Mat& imgBlurred, Rect2d& res, std::vector& patches, Size initSize); + bool ocl_detect(const Mat& img, const Mat& imgBlurred, Rect2d& res, std::vector& patches, Size initSize); protected: diff --git a/modules/tracking/src/tldModel.cpp b/modules/tracking/src/tldModel.cpp index 154433d9b..69540f05a 100644 --- a/modules/tracking/src/tldModel.cpp +++ b/modules/tracking/src/tldModel.cpp @@ -56,7 +56,16 @@ namespace cv detector = Ptr(new TLDDetector()); //Propagate data to Detector - detector->positiveExamples = &positiveExamples; + posNum = 0; + negNum = 0; + posExp = Mat(Size(225, 500), CV_8UC1); + negExp = Mat(Size(225, 500), CV_8UC1); + detector->posNum = &posNum; + detector->negNum = &negNum; + detector->posExp = &posExp; + detector->negExp = &negExp; + + detector->positiveExamples = &positiveExamples; detector->negativeExamples = &negativeExamples; detector->timeStampsPositive = &timeStampsPositive; detector->timeStampsNegative = &timeStampsNegative; @@ -77,6 +86,7 @@ namespace cv //Generate initial positive samples and put them to the model positiveExamples.reserve(200); + for (int i = 0; i < (int)closest.size(); i++) { for (int j = 0; j < 20; j++) @@ -238,12 +248,30 @@ namespace cv std::vector* proxyT; if (positive) { + if (posNum < 500) + { + uchar *patchPtr = example.data; + uchar *modelPtr = posExp.data; + for (int i = 0; i < STANDARD_PATCH_SIZE*STANDARD_PATCH_SIZE; i++) + modelPtr[posNum*STANDARD_PATCH_SIZE*STANDARD_PATCH_SIZE + i] = patchPtr[i]; + posNum++; + } + proxyV = &positiveExamples; proxyN = &timeStampPositiveNext; proxyT = &timeStampsPositive; } else { + if (negNum < 500) + { + uchar *patchPtr = example.data; + uchar *modelPtr = negExp.data; + for (int i = 0; i < STANDARD_PATCH_SIZE*STANDARD_PATCH_SIZE; i++) + modelPtr[negNum*STANDARD_PATCH_SIZE*STANDARD_PATCH_SIZE + i] = patchPtr[i]; + negNum++; + } + proxyV = &negativeExamples; proxyN = &timeStampNegativeNext; proxyT = &timeStampsNegative; diff --git a/modules/tracking/src/tldModel.hpp b/modules/tracking/src/tldModel.hpp index 1386872ac..8aa57f18c 100644 --- a/modules/tracking/src/tldModel.hpp +++ b/modules/tracking/src/tldModel.hpp @@ -66,6 +66,8 @@ namespace cv Ptr detector; std::vector > positiveExamples, negativeExamples; + Mat posExp, negExp; + int posNum, negNum; std::vector timeStampsPositive, timeStampsNegative; int timeStampPositiveNext, timeStampNegativeNext; double originalVariance_;