Added OCL versions of Sr and Sc functions

87190737 · Vladimir · 26f16e64 · 87190737 · 87190737 · 87190737
Commit 87190737 authored Jun 22, 2015 by Vladimir
6 changed files
--- a/modules/tracking/src/opencl/tldDetector.cl
+++ b/modules/tracking/src/opencl/tldDetector.cl
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+
+
+
+__kernel void NCC(__global const uchar *patch,
+				 __global const uchar *positiveSamples,
+				 __global const uchar *negativeSamples,
+				 __global float *ncc,
+				 int posNum,
+				 int negNum)
+{
+	int id = get_global_id(0);
+	if (id >= 1000) return;
+	bool posFlg;
+
+	if (id < 500)
+		posFlg = true;
+	if (id >= 500)
+	{
+		//Negative index
+		id = id - 500;
+		posFlg = false;
+	}
+
+	//Variables
+	int s1 = 0, s2 = 0, n1 = 0, n2 = 0, prod = 0;
+	float sq1 = 0, sq2 = 0, ares = 0;
+	int N = 225;
+	//NCC with positive patch
+	if (posFlg && id < posNum)
+	{
+		for (int i = 0; i < N; i++)
+		{
+			
+			s1 += positiveSamples[id * N + i];
+			s2 += patch[i];
+			n1 += positiveSamples[id * N + i] * positiveSamples[id * N + i];
+			n2 += patch[i] * patch[i];
+			prod += positiveSamples[id * N + i] * patch[i];
+		}
+		sq1 = sqrt(max(0.0, n1 - 1.0 * s1 * s1 / N));
+		sq2 = sqrt(max(0.0, n2 - 1.0 * s2 * s2 / N));
+		ares = (sq2 == 0) ? sq1 / fabs(sq1) : (prod - s1 * s2 / N) / sq1 / sq2;
+		ncc[id] = ares;		
+	}
+
+	//NCC with negative patch
+	if (!posFlg && id < negNum)
+	{
+		for (int i = 0; i < N; i++)
+		{
+
+			s1 += negativeSamples[id * N + i];
+			s2 += patch[i];
+			n1 += negativeSamples[id * N + i] * negativeSamples[id * N + i];
+			n2 += patch[i] * patch[i];
+			prod += negativeSamples[id * N + i] * patch[i];
+		}
+		sq1 = sqrt(max(0.0, n1 - 1.0 * s1 * s1 / N));
+		sq2 = sqrt(max(0.0, n2 - 1.0 * s2 * s2 / N));
+		ares = (sq2 == 0) ? sq1 / fabs(sq1) : (prod - s1 * s2 / N) / sq1 / sq2;
+		ncc[id+500] = ares;
+	}
+}
--- a/modules/tracking/src/precomp.hpp
+++ b/modules/tracking/src/precomp.hpp
@@ -44,5 +44,6 @@

 #include "opencv2/tracking.hpp"
 #include "opencv2/core/utility.hpp"
+#include "opencv2/core/ocl.hpp"

 #endif
--- a/modules/tracking/src/tldDetector.cpp
+++ b/modules/tracking/src/tldDetector.cpp
@@ -65,11 +65,119 @@ namespace cv
 		// Calculate Relative similarity of the patch (NN-Model)
 		double TLDDetector::Sr(const Mat_<uchar>& patch)
 		{
+			/*
+			int64 e1, e2;
+			float t;
+			e1 = getTickCount();
 			double splus = 0.0, sminus = 0.0;
 			for (int i = 0; i < (int)(*positiveExamples).size(); i++)
 				splus = std::max(splus, 0.5 * (NCC((*positiveExamples)[i], patch) + 1.0));
 			for (int i = 0; i < (int)(*negativeExamples).size(); i++)
 				sminus = std::max(sminus, 0.5 * (NCC((*negativeExamples)[i], patch) + 1.0));
+			e2 = getTickCount();
+			t = (e2 - e1) / getTickFrequency()*1000.0;
+			printf("Sr: %f\n", t);
+			if (splus + sminus == 0.0)
+				return 0.0;
+			return splus / (sminus + splus);
+			*/
+			int64 e1, e2;
+			float t;
+			e1 = getTickCount();
+			double splus = 0.0, sminus = 0.0;
+			Mat_<uchar> modelSample(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE);
+			for (int i = 0; i < *posNum; i++)
+			{
+				modelSample.data = &(posExp->data[i * 225]);
+				splus = std::max(splus, 0.5 * (NCC(modelSample, patch) + 1.0));
+			}
+			for (int i = 0; i < *negNum; i++)
+			{
+				modelSample.data = &(negExp->data[i * 225]);
+				sminus = std::max(sminus, 0.5 * (NCC(modelSample, patch) + 1.0));
+			}
+			e2 = getTickCount();
+			t = (e2 - e1) / getTickFrequency()*1000.0;
+			printf("Sr CPU: %f\n", t);
+			if (splus + sminus == 0.0)
+				return 0.0;
+			return splus / (sminus + splus);
+		}
+
+		double TLDDetector::ocl_Sr(const Mat_<uchar>& patch)
+		{
+			int64 e1, e2, e3, e4;
+			float t;
+			e1 = getTickCount();
+			double splus = 0.0, sminus = 0.0;
+			
+			e3 = getTickCount();
+
+			UMat devPatch = patch.getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
+			UMat devPositiveSamples = posExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
+			UMat devNegativeSamples = negExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
+			UMat devNCC(1, 2*MAX_EXAMPLES_IN_MODEL, CV_32FC1, ACCESS_RW, USAGE_ALLOCATE_DEVICE_MEMORY);
+
+			
+			ocl::Kernel k;
+			ocl::ProgramSource src = ocl::tracking::tldDetector_oclsrc;
+			String error;
+			ocl::Program prog(src, NULL, error);
+			k.create("NCC", prog);
+			if (k.empty())
+				printf("Kernel create failed!!!\n");
+			k.args(
+				ocl::KernelArg::PtrReadOnly(devPatch),
+				ocl::KernelArg::PtrReadOnly(devPositiveSamples),
+				ocl::KernelArg::PtrReadOnly(devNegativeSamples),
+				ocl::KernelArg::PtrWriteOnly(devNCC),
+				(int)posNum,
+				(int)negNum);
+
+			e4 = getTickCount();
+			t = (e4 - e3) / getTickFrequency()*1000.0;
+			//printf("Mem Cpy GPU: %f\n", t);
+
+			size_t globSize = 1000;
+			size_t localSize = 128;		
+			e3 = getTickCount();
+			if (!k.run(1, &globSize, &localSize, true))
+				printf("Kernel Run Error!!!");
+			e4 = getTickCount();
+			t = (e4 - e3) / getTickFrequency()*1000.0;
+			//printf("Kernel Run GPU: %f\n", t);
+
+			e3 = getTickCount();
+			Mat resNCC = devNCC.getMat(ACCESS_READ);
+			e4 = getTickCount();
+			t = (e4 - e3) / getTickFrequency()*1000.0;
+			//printf("Read Mem GPU: %f\n", t);
+			
+			////Compare
+			//Mat_<uchar> modelSample(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE);
+			//for (int i = 0; i < 200; i+=17)
+			//{
+			//	modelSample.data = &(posExp->data[i * 225]);
+			//	printf("%f\t%f\n\n", resNCC.at<float>(i), NCC(modelSample, patch));
+			//}
+
+			//for (int i = 0; i < 200; i+=23)
+			//{
+			//	modelSample.data = &(negExp->data[i * 225]);
+			//	printf("%f\t%f\n", resNCC.at<float>(500+i), NCC(modelSample, patch));
+			//}
+
+			
+			for (int i = 0; i < *posNum; i++)
+				splus = std::max(splus, 0.5 * (resNCC.at<float>(i) + 1.0));
+
+			for (int i = 0; i < *negNum; i++)
+				sminus = std::max(sminus, 0.5 * (resNCC.at<float>(i+500) +1.0));
+
+			e2 = getTickCount();
+			t = (e2 - e1) / getTickFrequency()*1000.0;
+			//printf("Sr GPU: %f\n\n", t);
+
 			if (splus + sminus == 0.0)
 				return 0.0;
 			return splus / (sminus + splus);
@@ -78,6 +186,10 @@ namespace cv
 		// Calculate Conservative similarity of the patch (NN-Model)
 		double TLDDetector::Sc(const Mat_<uchar>& patch)
 		{
+			/*
+			int64 e1, e2;
+			float t;
+			e1 = getTickCount();
 			double splus = 0.0, sminus = 0.0;
 			int med = getMedian((*timeStampsPositive));
 			for (int i = 0; i < (int)(*positiveExamples).size(); i++)
@@ -87,6 +199,118 @@ namespace cv
 			}
 			for (int i = 0; i < (int)(*negativeExamples).size(); i++)
 				sminus = std::max(sminus, 0.5 * (NCC((*negativeExamples)[i], patch) + 1.0));
+			e2 = getTickCount();
+			t = (e2 - e1) / getTickFrequency()*1000.0;
+			printf("Sc: %f\n", t);
+			if (splus + sminus == 0.0)
+				return 0.0;
+			
+			return splus / (sminus + splus);
+			*/
+
+			int64 e1, e2;
+			float t;
+			e1 = getTickCount();
+			double splus = 0.0, sminus = 0.0;
+			Mat_<uchar> modelSample(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE);
+			int med = getMedian((*timeStampsPositive));
+			for (int i = 0; i < *posNum; i++)
+			{
+				if ((int)(*timeStampsPositive)[i] <= med)
+				{
+					modelSample.data = &(posExp->data[i * 225]);
+					splus = std::max(splus, 0.5 * (NCC(modelSample, patch) + 1.0));
+				}
+			}
+			for (int i = 0; i < *negNum; i++)
+			{
+				modelSample.data = &(negExp->data[i * 225]);
+				sminus = std::max(sminus, 0.5 * (NCC(modelSample, patch) + 1.0));
+			}
+			e2 = getTickCount();
+			t = (e2 - e1) / getTickFrequency()*1000.0;
+			printf("Sc: %f\n", t);
+			if (splus + sminus == 0.0)
+				return 0.0;
+
+			return splus / (sminus + splus);
+		}
+
+		double TLDDetector::ocl_Sc(const Mat_<uchar>& patch)
+		{
+			int64 e1, e2, e3, e4;
+			float t;
+			e1 = getTickCount();
+			double splus = 0.0, sminus = 0.0;
+
+			e3 = getTickCount();
+
+			UMat devPatch = patch.getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
+			UMat devPositiveSamples = posExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
+			UMat devNegativeSamples = negExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
+			UMat devNCC(1, 2 * MAX_EXAMPLES_IN_MODEL, CV_32FC1, ACCESS_RW, USAGE_ALLOCATE_DEVICE_MEMORY);
+
+
+			ocl::Kernel k;
+			ocl::ProgramSource src = ocl::tracking::tldDetector_oclsrc;
+			String error;
+			ocl::Program prog(src, NULL, error);
+			k.create("NCC", prog);
+			if (k.empty())
+				printf("Kernel create failed!!!\n");
+			k.args(
+				ocl::KernelArg::PtrReadOnly(devPatch),
+				ocl::KernelArg::PtrReadOnly(devPositiveSamples),
+				ocl::KernelArg::PtrReadOnly(devNegativeSamples),
+				ocl::KernelArg::PtrWriteOnly(devNCC),
+				(int)posNum,
+				(int)negNum);
+
+			e4 = getTickCount();
+			t = (e4 - e3) / getTickFrequency()*1000.0;
+			//printf("Mem Cpy GPU: %f\n", t);
+
+			size_t globSize = 1000;
+			size_t localSize = 128;
+			e3 = getTickCount();
+			if (!k.run(1, &globSize, &localSize, true))
+				printf("Kernel Run Error!!!");
+			e4 = getTickCount();
+			t = (e4 - e3) / getTickFrequency()*1000.0;
+			//printf("Kernel Run GPU: %f\n", t);
+
+			e3 = getTickCount();
+			Mat resNCC = devNCC.getMat(ACCESS_READ);
+			e4 = getTickCount();
+			t = (e4 - e3) / getTickFrequency()*1000.0;
+			//printf("Read Mem GPU: %f\n", t);
+
+			////Compare
+			//Mat_<uchar> modelSample(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE);
+			//for (int i = 0; i < 200; i+=17)
+			//{
+			//	modelSample.data = &(posExp->data[i * 225]);
+			//	printf("%f\t%f\n\n", resNCC.at<float>(i), NCC(modelSample, patch));
+			//}
+
+			//for (int i = 0; i < 200; i+=23)
+			//{
+			//	modelSample.data = &(negExp->data[i * 225]);
+			//	printf("%f\t%f\n", resNCC.at<float>(500+i), NCC(modelSample, patch));
+			//}
+
+			int med = getMedian((*timeStampsPositive));
+			for (int i = 0; i < *posNum; i++)
+				if ((int)(*timeStampsPositive)[i] <= med)
+					splus = std::max(splus, 0.5 * (resNCC.at<float>(i) +1.0));
+
+			for (int i = 0; i < *negNum; i++)
+				sminus = std::max(sminus, 0.5 * (resNCC.at<float>(i + 500) + 1.0));
+
+			e2 = getTickCount();
+			t = (e2 - e1) / getTickFrequency()*1000.0;
+			//printf("Sc GPU: %f\n\n", t);
+
 			if (splus + sminus == 0.0)
 				return 0.0;
 			return splus / (sminus + splus);
@@ -167,7 +391,8 @@ namespace cv

 						labPatch.rect = Rect2d(dx * i * scale, dy * j * scale, initSize.width * scale, initSize.height * scale);
 						resample(resized_img, Rect2d(Point(dx * i, dy * j), initSize), standardPatch);
-						tmp = Sr(standardPatch);
+						
+						tmp = ocl_Sr(standardPatch);

 						////To fix: Check the paper, probably this cause wrong learning
 						//
@@ -185,7 +410,7 @@ namespace cv
 						{
 							npos++;
 						}
-						tmp = Sc(standardPatch);
+						tmp = ocl_Sc(standardPatch);
 						if (tmp > maxSc)
 						{
 							maxSc = tmp;

--- a/modules/tracking/src/tldDetector.hpp
+++ b/modules/tracking/src/tldDetector.hpp
@@ -43,6 +43,7 @@
 #define OPENCV_TLD_DETECTOR

 #include "precomp.hpp"
+#include "opencl_kernels_tracking.hpp"
 #include "tldEnsembleClassifier.hpp"
 #include "tldUtils.hpp"

@@ -74,9 +75,13 @@ namespace cv
 			inline double ensembleClassifierNum(const uchar* data);
 			inline void prepareClassifiers(int rowstep);
 			double Sr(const Mat_<uchar>& patch);
+			double ocl_Sr(const Mat_<uchar>& patch);
 			double Sc(const Mat_<uchar>& patch);
+			double ocl_Sc(const Mat_<uchar>& patch);

 			std::vector<TLDEnsembleClassifier> classifiers;
+			Mat *posExp, *negExp;
+			int *posNum, *negNum;
 			std::vector<Mat_<uchar> > *positiveExamples, *negativeExamples;
 			std::vector<int> *timeStampsPositive, *timeStampsNegative;
 			double *originalVariancePtr;
@@ -88,6 +93,7 @@ namespace cv
 				bool isObject, shouldBeIntegrated;
 			};
 			bool detect(const Mat& img, const Mat& imgBlurred, Rect2d& res, std::vector<LabeledPatch>& patches, Size initSize);
+			bool ocl_detect(const Mat& img, const Mat& imgBlurred, Rect2d& res, std::vector<LabeledPatch>& patches, Size initSize);
 		protected:
 			
 			

--- a/modules/tracking/src/tldModel.cpp
+++ b/modules/tracking/src/tldModel.cpp
@@ -56,7 +56,16 @@ namespace cv
 			detector = Ptr<TLDDetector>(new TLDDetector());

 			//Propagate data to Detector
-			detector->positiveExamples = &positiveExamples;
+			posNum = 0;
+			negNum = 0;
+			posExp = Mat(Size(225, 500), CV_8UC1);
+			negExp = Mat(Size(225, 500), CV_8UC1);
+			detector->posNum = &posNum;
+			detector->negNum = &negNum;
+			detector->posExp = &posExp;
+			detector->negExp = &negExp;
+
+			detector->positiveExamples = &positiveExamples;			
 			detector->negativeExamples = &negativeExamples;
 			detector->timeStampsPositive = &timeStampsPositive;
 			detector->timeStampsNegative = &timeStampsNegative;
@@ -78,6 +87,7 @@ namespace cv

 			//Generate initial positive samples and put them to the model
 			positiveExamples.reserve(200);
+			
 			for (int i = 0; i < (int)closest.size(); i++)
 			{
 				for (int j = 0; j < 20; j++)
@@ -239,12 +249,30 @@ namespace cv
 			std::vector<int>* proxyT;
 			if (positive)
 			{
+				if (posNum < 500)
+				{
+					uchar *patchPtr = example.data;
+					uchar *modelPtr = posExp.data;
+					for (int i = 0; i < STANDARD_PATCH_SIZE*STANDARD_PATCH_SIZE; i++)
+						modelPtr[posNum*STANDARD_PATCH_SIZE*STANDARD_PATCH_SIZE + i] = patchPtr[i];
+					posNum++;
+				}
+
 				proxyV = &positiveExamples;
 				proxyN = &timeStampPositiveNext;
 				proxyT = &timeStampsPositive;
 			}
 			else
 			{
+				if (negNum < 500)
+				{
+					uchar *patchPtr = example.data;
+					uchar *modelPtr = negExp.data;
+					for (int i = 0; i < STANDARD_PATCH_SIZE*STANDARD_PATCH_SIZE; i++)
+						modelPtr[negNum*STANDARD_PATCH_SIZE*STANDARD_PATCH_SIZE + i] = patchPtr[i];
+					negNum++;
+				}
+
 				proxyV = &negativeExamples;
 				proxyN = &timeStampNegativeNext;
 				proxyT = &timeStampsNegative;

--- a/modules/tracking/src/tldModel.hpp
+++ b/modules/tracking/src/tldModel.hpp
@@ -67,6 +67,8 @@ namespace cv
 			Ptr<TLDDetector> detector;

 			std::vector<Mat_<uchar> > positiveExamples, negativeExamples;
+			Mat posExp, negExp;
+			int posNum, negNum;
 			std::vector<int> timeStampsPositive, timeStampsNegative;
 			int timeStampPositiveNext, timeStampNegativeNext;
 			double originalVariance_;