Commit 87190737 authored by Vladimir's avatar Vladimir

Added OCL versions of Sr and Sc functions

parent 26f16e64
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
__kernel void NCC(__global const uchar *patch,
__global const uchar *positiveSamples,
__global const uchar *negativeSamples,
__global float *ncc,
int posNum,
int negNum)
{
int id = get_global_id(0);
if (id >= 1000) return;
bool posFlg;
if (id < 500)
posFlg = true;
if (id >= 500)
{
//Negative index
id = id - 500;
posFlg = false;
}
//Variables
int s1 = 0, s2 = 0, n1 = 0, n2 = 0, prod = 0;
float sq1 = 0, sq2 = 0, ares = 0;
int N = 225;
//NCC with positive patch
if (posFlg && id < posNum)
{
for (int i = 0; i < N; i++)
{
s1 += positiveSamples[id * N + i];
s2 += patch[i];
n1 += positiveSamples[id * N + i] * positiveSamples[id * N + i];
n2 += patch[i] * patch[i];
prod += positiveSamples[id * N + i] * patch[i];
}
sq1 = sqrt(max(0.0, n1 - 1.0 * s1 * s1 / N));
sq2 = sqrt(max(0.0, n2 - 1.0 * s2 * s2 / N));
ares = (sq2 == 0) ? sq1 / fabs(sq1) : (prod - s1 * s2 / N) / sq1 / sq2;
ncc[id] = ares;
}
//NCC with negative patch
if (!posFlg && id < negNum)
{
for (int i = 0; i < N; i++)
{
s1 += negativeSamples[id * N + i];
s2 += patch[i];
n1 += negativeSamples[id * N + i] * negativeSamples[id * N + i];
n2 += patch[i] * patch[i];
prod += negativeSamples[id * N + i] * patch[i];
}
sq1 = sqrt(max(0.0, n1 - 1.0 * s1 * s1 / N));
sq2 = sqrt(max(0.0, n2 - 1.0 * s2 * s2 / N));
ares = (sq2 == 0) ? sq1 / fabs(sq1) : (prod - s1 * s2 / N) / sq1 / sq2;
ncc[id+500] = ares;
}
}
......@@ -44,5 +44,6 @@
#include "opencv2/tracking.hpp"
#include "opencv2/core/utility.hpp"
#include "opencv2/core/ocl.hpp"
#endif
......@@ -65,11 +65,119 @@ namespace cv
// Calculate Relative similarity of the patch (NN-Model)
double TLDDetector::Sr(const Mat_<uchar>& patch)
{
/*
int64 e1, e2;
float t;
e1 = getTickCount();
double splus = 0.0, sminus = 0.0;
for (int i = 0; i < (int)(*positiveExamples).size(); i++)
splus = std::max(splus, 0.5 * (NCC((*positiveExamples)[i], patch) + 1.0));
for (int i = 0; i < (int)(*negativeExamples).size(); i++)
sminus = std::max(sminus, 0.5 * (NCC((*negativeExamples)[i], patch) + 1.0));
e2 = getTickCount();
t = (e2 - e1) / getTickFrequency()*1000.0;
printf("Sr: %f\n", t);
if (splus + sminus == 0.0)
return 0.0;
return splus / (sminus + splus);
*/
int64 e1, e2;
float t;
e1 = getTickCount();
double splus = 0.0, sminus = 0.0;
Mat_<uchar> modelSample(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE);
for (int i = 0; i < *posNum; i++)
{
modelSample.data = &(posExp->data[i * 225]);
splus = std::max(splus, 0.5 * (NCC(modelSample, patch) + 1.0));
}
for (int i = 0; i < *negNum; i++)
{
modelSample.data = &(negExp->data[i * 225]);
sminus = std::max(sminus, 0.5 * (NCC(modelSample, patch) + 1.0));
}
e2 = getTickCount();
t = (e2 - e1) / getTickFrequency()*1000.0;
printf("Sr CPU: %f\n", t);
if (splus + sminus == 0.0)
return 0.0;
return splus / (sminus + splus);
}
double TLDDetector::ocl_Sr(const Mat_<uchar>& patch)
{
int64 e1, e2, e3, e4;
float t;
e1 = getTickCount();
double splus = 0.0, sminus = 0.0;
e3 = getTickCount();
UMat devPatch = patch.getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
UMat devPositiveSamples = posExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
UMat devNegativeSamples = negExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
UMat devNCC(1, 2*MAX_EXAMPLES_IN_MODEL, CV_32FC1, ACCESS_RW, USAGE_ALLOCATE_DEVICE_MEMORY);
ocl::Kernel k;
ocl::ProgramSource src = ocl::tracking::tldDetector_oclsrc;
String error;
ocl::Program prog(src, NULL, error);
k.create("NCC", prog);
if (k.empty())
printf("Kernel create failed!!!\n");
k.args(
ocl::KernelArg::PtrReadOnly(devPatch),
ocl::KernelArg::PtrReadOnly(devPositiveSamples),
ocl::KernelArg::PtrReadOnly(devNegativeSamples),
ocl::KernelArg::PtrWriteOnly(devNCC),
(int)posNum,
(int)negNum);
e4 = getTickCount();
t = (e4 - e3) / getTickFrequency()*1000.0;
//printf("Mem Cpy GPU: %f\n", t);
size_t globSize = 1000;
size_t localSize = 128;
e3 = getTickCount();
if (!k.run(1, &globSize, &localSize, true))
printf("Kernel Run Error!!!");
e4 = getTickCount();
t = (e4 - e3) / getTickFrequency()*1000.0;
//printf("Kernel Run GPU: %f\n", t);
e3 = getTickCount();
Mat resNCC = devNCC.getMat(ACCESS_READ);
e4 = getTickCount();
t = (e4 - e3) / getTickFrequency()*1000.0;
//printf("Read Mem GPU: %f\n", t);
////Compare
//Mat_<uchar> modelSample(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE);
//for (int i = 0; i < 200; i+=17)
//{
// modelSample.data = &(posExp->data[i * 225]);
// printf("%f\t%f\n\n", resNCC.at<float>(i), NCC(modelSample, patch));
//}
//for (int i = 0; i < 200; i+=23)
//{
// modelSample.data = &(negExp->data[i * 225]);
// printf("%f\t%f\n", resNCC.at<float>(500+i), NCC(modelSample, patch));
//}
for (int i = 0; i < *posNum; i++)
splus = std::max(splus, 0.5 * (resNCC.at<float>(i) + 1.0));
for (int i = 0; i < *negNum; i++)
sminus = std::max(sminus, 0.5 * (resNCC.at<float>(i+500) +1.0));
e2 = getTickCount();
t = (e2 - e1) / getTickFrequency()*1000.0;
//printf("Sr GPU: %f\n\n", t);
if (splus + sminus == 0.0)
return 0.0;
return splus / (sminus + splus);
......@@ -78,6 +186,10 @@ namespace cv
// Calculate Conservative similarity of the patch (NN-Model)
double TLDDetector::Sc(const Mat_<uchar>& patch)
{
/*
int64 e1, e2;
float t;
e1 = getTickCount();
double splus = 0.0, sminus = 0.0;
int med = getMedian((*timeStampsPositive));
for (int i = 0; i < (int)(*positiveExamples).size(); i++)
......@@ -87,6 +199,118 @@ namespace cv
}
for (int i = 0; i < (int)(*negativeExamples).size(); i++)
sminus = std::max(sminus, 0.5 * (NCC((*negativeExamples)[i], patch) + 1.0));
e2 = getTickCount();
t = (e2 - e1) / getTickFrequency()*1000.0;
printf("Sc: %f\n", t);
if (splus + sminus == 0.0)
return 0.0;
return splus / (sminus + splus);
*/
int64 e1, e2;
float t;
e1 = getTickCount();
double splus = 0.0, sminus = 0.0;
Mat_<uchar> modelSample(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE);
int med = getMedian((*timeStampsPositive));
for (int i = 0; i < *posNum; i++)
{
if ((int)(*timeStampsPositive)[i] <= med)
{
modelSample.data = &(posExp->data[i * 225]);
splus = std::max(splus, 0.5 * (NCC(modelSample, patch) + 1.0));
}
}
for (int i = 0; i < *negNum; i++)
{
modelSample.data = &(negExp->data[i * 225]);
sminus = std::max(sminus, 0.5 * (NCC(modelSample, patch) + 1.0));
}
e2 = getTickCount();
t = (e2 - e1) / getTickFrequency()*1000.0;
printf("Sc: %f\n", t);
if (splus + sminus == 0.0)
return 0.0;
return splus / (sminus + splus);
}
double TLDDetector::ocl_Sc(const Mat_<uchar>& patch)
{
int64 e1, e2, e3, e4;
float t;
e1 = getTickCount();
double splus = 0.0, sminus = 0.0;
e3 = getTickCount();
UMat devPatch = patch.getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
UMat devPositiveSamples = posExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
UMat devNegativeSamples = negExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
UMat devNCC(1, 2 * MAX_EXAMPLES_IN_MODEL, CV_32FC1, ACCESS_RW, USAGE_ALLOCATE_DEVICE_MEMORY);
ocl::Kernel k;
ocl::ProgramSource src = ocl::tracking::tldDetector_oclsrc;
String error;
ocl::Program prog(src, NULL, error);
k.create("NCC", prog);
if (k.empty())
printf("Kernel create failed!!!\n");
k.args(
ocl::KernelArg::PtrReadOnly(devPatch),
ocl::KernelArg::PtrReadOnly(devPositiveSamples),
ocl::KernelArg::PtrReadOnly(devNegativeSamples),
ocl::KernelArg::PtrWriteOnly(devNCC),
(int)posNum,
(int)negNum);
e4 = getTickCount();
t = (e4 - e3) / getTickFrequency()*1000.0;
//printf("Mem Cpy GPU: %f\n", t);
size_t globSize = 1000;
size_t localSize = 128;
e3 = getTickCount();
if (!k.run(1, &globSize, &localSize, true))
printf("Kernel Run Error!!!");
e4 = getTickCount();
t = (e4 - e3) / getTickFrequency()*1000.0;
//printf("Kernel Run GPU: %f\n", t);
e3 = getTickCount();
Mat resNCC = devNCC.getMat(ACCESS_READ);
e4 = getTickCount();
t = (e4 - e3) / getTickFrequency()*1000.0;
//printf("Read Mem GPU: %f\n", t);
////Compare
//Mat_<uchar> modelSample(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE);
//for (int i = 0; i < 200; i+=17)
//{
// modelSample.data = &(posExp->data[i * 225]);
// printf("%f\t%f\n\n", resNCC.at<float>(i), NCC(modelSample, patch));
//}
//for (int i = 0; i < 200; i+=23)
//{
// modelSample.data = &(negExp->data[i * 225]);
// printf("%f\t%f\n", resNCC.at<float>(500+i), NCC(modelSample, patch));
//}
int med = getMedian((*timeStampsPositive));
for (int i = 0; i < *posNum; i++)
if ((int)(*timeStampsPositive)[i] <= med)
splus = std::max(splus, 0.5 * (resNCC.at<float>(i) +1.0));
for (int i = 0; i < *negNum; i++)
sminus = std::max(sminus, 0.5 * (resNCC.at<float>(i + 500) + 1.0));
e2 = getTickCount();
t = (e2 - e1) / getTickFrequency()*1000.0;
//printf("Sc GPU: %f\n\n", t);
if (splus + sminus == 0.0)
return 0.0;
return splus / (sminus + splus);
......@@ -167,7 +391,8 @@ namespace cv
labPatch.rect = Rect2d(dx * i * scale, dy * j * scale, initSize.width * scale, initSize.height * scale);
resample(resized_img, Rect2d(Point(dx * i, dy * j), initSize), standardPatch);
tmp = Sr(standardPatch);
tmp = ocl_Sr(standardPatch);
////To fix: Check the paper, probably this cause wrong learning
//
......@@ -185,7 +410,7 @@ namespace cv
{
npos++;
}
tmp = Sc(standardPatch);
tmp = ocl_Sc(standardPatch);
if (tmp > maxSc)
{
maxSc = tmp;
......
......@@ -43,6 +43,7 @@
#define OPENCV_TLD_DETECTOR
#include "precomp.hpp"
#include "opencl_kernels_tracking.hpp"
#include "tldEnsembleClassifier.hpp"
#include "tldUtils.hpp"
......@@ -74,9 +75,13 @@ namespace cv
inline double ensembleClassifierNum(const uchar* data);
inline void prepareClassifiers(int rowstep);
double Sr(const Mat_<uchar>& patch);
double ocl_Sr(const Mat_<uchar>& patch);
double Sc(const Mat_<uchar>& patch);
double ocl_Sc(const Mat_<uchar>& patch);
std::vector<TLDEnsembleClassifier> classifiers;
Mat *posExp, *negExp;
int *posNum, *negNum;
std::vector<Mat_<uchar> > *positiveExamples, *negativeExamples;
std::vector<int> *timeStampsPositive, *timeStampsNegative;
double *originalVariancePtr;
......@@ -88,6 +93,7 @@ namespace cv
bool isObject, shouldBeIntegrated;
};
bool detect(const Mat& img, const Mat& imgBlurred, Rect2d& res, std::vector<LabeledPatch>& patches, Size initSize);
bool ocl_detect(const Mat& img, const Mat& imgBlurred, Rect2d& res, std::vector<LabeledPatch>& patches, Size initSize);
protected:
......
......@@ -56,7 +56,16 @@ namespace cv
detector = Ptr<TLDDetector>(new TLDDetector());
//Propagate data to Detector
detector->positiveExamples = &positiveExamples;
posNum = 0;
negNum = 0;
posExp = Mat(Size(225, 500), CV_8UC1);
negExp = Mat(Size(225, 500), CV_8UC1);
detector->posNum = &posNum;
detector->negNum = &negNum;
detector->posExp = &posExp;
detector->negExp = &negExp;
detector->positiveExamples = &positiveExamples;
detector->negativeExamples = &negativeExamples;
detector->timeStampsPositive = &timeStampsPositive;
detector->timeStampsNegative = &timeStampsNegative;
......@@ -78,6 +87,7 @@ namespace cv
//Generate initial positive samples and put them to the model
positiveExamples.reserve(200);
for (int i = 0; i < (int)closest.size(); i++)
{
for (int j = 0; j < 20; j++)
......@@ -239,12 +249,30 @@ namespace cv
std::vector<int>* proxyT;
if (positive)
{
if (posNum < 500)
{
uchar *patchPtr = example.data;
uchar *modelPtr = posExp.data;
for (int i = 0; i < STANDARD_PATCH_SIZE*STANDARD_PATCH_SIZE; i++)
modelPtr[posNum*STANDARD_PATCH_SIZE*STANDARD_PATCH_SIZE + i] = patchPtr[i];
posNum++;
}
proxyV = &positiveExamples;
proxyN = &timeStampPositiveNext;
proxyT = &timeStampsPositive;
}
else
{
if (negNum < 500)
{
uchar *patchPtr = example.data;
uchar *modelPtr = negExp.data;
for (int i = 0; i < STANDARD_PATCH_SIZE*STANDARD_PATCH_SIZE; i++)
modelPtr[negNum*STANDARD_PATCH_SIZE*STANDARD_PATCH_SIZE + i] = patchPtr[i];
negNum++;
}
proxyV = &negativeExamples;
proxyN = &timeStampNegativeNext;
proxyT = &timeStampsNegative;
......
......@@ -67,6 +67,8 @@ namespace cv
Ptr<TLDDetector> detector;
std::vector<Mat_<uchar> > positiveExamples, negativeExamples;
Mat posExp, negExp;
int posNum, negNum;
std::vector<int> timeStampsPositive, timeStampsNegative;
int timeStampPositiveNext, timeStampNegativeNext;
double originalVariance_;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment