2-nd level of parallelization + detector remake

1. Added 2-nd level of parallelization of NN on OpenCL 2. Restructured detector - now all filters work independently: Variance Filter->Ensemble->NN, through "buffers"

2-nd level of parallelization + detector remake
1. Added 2-nd level of parallelization of NN on OpenCL 2. Restructured detector - now all filters work independently: Variance Filter->Ensemble->NN, through "buffers"
d4011aae · Vladimir · 87190737 · d4011aae · d4011aae · d4011aae
Commit d4011aae authored Jun 28, 2015 by Vladimir
5 changed files
--- a/modules/tracking/samples/tld_test.cpp
+++ b/modules/tracking/samples/tld_test.cpp
@@ -48,8 +48,8 @@
 using namespace std;
 using namespace cv;

-#define NUM_TEST_FRAMES 500
-#define TEST_VIDEO_INDEX 1		//TLD Dataset Video Index from 1-10
+#define NUM_TEST_FRAMES 100
+#define TEST_VIDEO_INDEX 7		//TLD Dataset Video Index from 1-10
 //#define RECORD_VIDEO_FLG

 static Mat image;

--- a/modules/tracking/src/opencl/tldDetector.cl
+++ b/modules/tracking/src/opencl/tldDetector.cl
@@ -31,12 +31,11 @@ __kernel void NCC(__global const uchar *patch,
 	int s1 = 0, s2 = 0, n1 = 0, n2 = 0, prod = 0;
 	float sq1 = 0, sq2 = 0, ares = 0;
 	int N = 225;
-	//NCC with positive patch
+	//NCC with positive sample
 	if (posFlg && id < posNum)
 	{
 		for (int i = 0; i < N; i++)
 		{
-			
 			s1 += positiveSamples[id * N + i];
 			s2 += patch[i];
 			n1 += positiveSamples[id * N + i] * positiveSamples[id * N + i];
@@ -49,7 +48,7 @@ __kernel void NCC(__global const uchar *patch,
 		ncc[id] = ares;
 	}

-	//NCC with negative patch
+	//NCC with negative sample
 	if (!posFlg && id < negNum)
 	{
 		for (int i = 0; i < N; i++)
@@ -67,3 +66,68 @@ __kernel void NCC(__global const uchar *patch,
 		ncc[id+500] = ares;
 	}
 }
+
+__kernel void batchNCC(__global const uchar *patches,
+	__global const uchar *positiveSamples,
+	__global const uchar *negativeSamples,
+	__global float *posNcc,
+	__global float *negNcc,
+	int posNum,
+	int negNum,
+	int patchNum)
+{
+	int id = get_global_id(0);
+	bool posFlg;
+
+	if (id < 500*patchNum)
+		posFlg = true;
+	if (id >= 500*patchNum)
+	{
+		//Negative index
+		id = id - 500*patchNum;
+		posFlg = false;
+	}
+
+	int modelSampleID = id % 500;
+	int patchID = id / 500;
+
+	//Variables
+	int s1 = 0, s2 = 0, n1 = 0, n2 = 0, prod = 0;
+	float sq1 = 0, sq2 = 0, ares = 0;
+	int N = 225;
+
+	//NCC with positive sample
+	if (posFlg && modelSampleID < posNum)
+	{
+		for (int i = 0; i < N; i++)
+		{
+			s1 += positiveSamples[modelSampleID * N + i];
+			s2 += patches[patchID*N + i];
+			n1 += positiveSamples[modelSampleID * N + i] * positiveSamples[modelSampleID * N + i];
+			n2 += patches[patchID*N + i] * patches[patchID*N + i];
+			prod += positiveSamples[modelSampleID * N + i] * patches[patchID*N + i];
+		}
+		sq1 = sqrt(max(0.0, n1 - 1.0 * s1 * s1 / N));
+		sq2 = sqrt(max(0.0, n2 - 1.0 * s2 * s2 / N));
+		ares = (sq2 == 0) ? sq1 / fabs(sq1) : (prod - s1 * s2 / N) / sq1 / sq2;
+		posNcc[id] = ares;
+	}
+
+	//NCC with negative sample
+	if (!posFlg && modelSampleID < negNum)
+	{
+		for (int i = 0; i < N; i++)
+		{
+
+			s1 += negativeSamples[modelSampleID * N + i];
+			s2 += patches[patchID*N + i];
+			n1 += negativeSamples[modelSampleID * N + i] * negativeSamples[modelSampleID * N + i];
+			n2 += patches[patchID*N + i] * patches[patchID*N + i];
+			prod += negativeSamples[modelSampleID * N + i] * patches[patchID*N + i];
+		}
+		sq1 = sqrt(max(0.0, n1 - 1.0 * s1 * s1 / N));
+		sq2 = sqrt(max(0.0, n2 - 1.0 * s2 * s2 / N));
+		ares = (sq2 == 0) ? sq1 / fabs(sq1) : (prod - s1 * s2 / N) / sq1 / sq2;
+		negNcc[id] = ares;
+	}
+}
--- a/modules/tracking/src/tldDetector.cpp
+++ b/modules/tracking/src/tldDetector.cpp
--- a/modules/tracking/src/tldDetector.hpp
+++ b/modules/tracking/src/tldDetector.hpp
@@ -78,6 +78,7 @@ namespace cv
 			double ocl_Sr(const Mat_<uchar>& patch);
 			double Sc(const Mat_<uchar>& patch);
 			double ocl_Sc(const Mat_<uchar>& patch);
+			void ocl_batchSrSc(const Mat_<uchar>& patches, double *resultSr, double *resultSc, int numOfPatches);

 			std::vector<TLDEnsembleClassifier> classifiers;
 			Mat *posExp, *negExp;

--- a/modules/tracking/src/tldTracker.cpp
+++ b/modules/tracking/src/tldTracker.cpp
@@ -120,7 +120,7 @@ bool TrackerTLDImpl::updateImpl(const Mat& image, Rect2d& boundingBox)
    {
        Rect2d tmpCandid = boundingBox;
        if( ( (i == 0) && !data->failedLastTime && trackerProxy->update(image, tmpCandid) ) ||
-			((i == 1) && (tldModel->detector->detect(imageForDetector, image_blurred, tmpCandid, detectorResults, tldModel->getMinSize()))))
+			((i == 1) && (tldModel->detector->ocl_detect(imageForDetector, image_blurred, tmpCandid, detectorResults, tldModel->getMinSize()))))
        {
            candidates.push_back(tmpCandid);
            if( i == 0 )