Commit 92c8b94b authored by Andrey Pavlenko's avatar Andrey Pavlenko Committed by OpenCV Buildbot

Merge pull request #2265 from vpisarev:ocl_facedetect_amd3

parents 9dede73d de4b1c66
......@@ -615,7 +615,7 @@ static void* initOpenCLAndLoad(const char* funcname)
initialized = true;
g_haveOpenCL = handle != 0 && dlsym(handle, oclFuncToCheck) != 0;
if( g_haveOpenCL )
fprintf(stderr, "Succesffuly loaded OpenCL v1.1+ runtime from %s\n", oclpath);
fprintf(stderr, "Successfully loaded OpenCL v1.1+ runtime from %s\n", oclpath);
else
fprintf(stderr, "Failed to load OpenCL runtime\n");
}
......@@ -1335,11 +1335,13 @@ inline bool operator < (const HashKey& h1, const HashKey& h2)
return h1.a < h2.a || (h1.a == h2.a && h1.b < h2.b);
}
static bool g_isOpenCLInitialized = false;
static bool g_isOpenCLAvailable = false;
bool haveOpenCL()
{
#ifdef HAVE_OPENCL
static bool g_isOpenCLInitialized = false;
static bool g_isOpenCLAvailable = false;
if (!g_isOpenCLInitialized)
{
try
......@@ -1354,6 +1356,9 @@ bool haveOpenCL()
g_isOpenCLInitialized = true;
}
return g_isOpenCLAvailable;
#else
return false;
#endif
}
bool useOpenCL()
......
......@@ -32,112 +32,6 @@ The following reference is for the detection part only. There is a separate appl
.. [Lienhart02] Rainer Lienhart and Jochen Maydt. An Extended Set of Haar-like Features for Rapid Object Detection. IEEE ICIP 2002, Vol. 1, pp. 900-903, Sep. 2002. This paper, as well as the extended technical report, can be retrieved at http://www.multimedia-computing.de/mediawiki//images/5/52/MRL-TR-May02-revised-Dec02.pdf
FeatureEvaluator
----------------
.. ocv:class:: FeatureEvaluator
Base class for computing feature values in cascade classifiers. ::
class CV_EXPORTS FeatureEvaluator
{
public:
enum { HAAR = 0, LBP = 1 }; // supported feature types
virtual ~FeatureEvaluator(); // destructor
virtual bool read(const FileNode& node);
virtual Ptr<FeatureEvaluator> clone() const;
virtual int getFeatureType() const;
virtual bool setImage(const Mat& img, Size origWinSize);
virtual bool setWindow(Point p);
virtual double calcOrd(int featureIdx) const;
virtual int calcCat(int featureIdx) const;
static Ptr<FeatureEvaluator> create(int type);
};
FeatureEvaluator::read
--------------------------
Reads parameters of features from the ``FileStorage`` node.
.. ocv:function:: bool FeatureEvaluator::read(const FileNode& node)
:param node: File node from which the feature parameters are read.
FeatureEvaluator::clone
---------------------------
Returns a full copy of the feature evaluator.
.. ocv:function:: Ptr<FeatureEvaluator> FeatureEvaluator::clone() const
FeatureEvaluator::getFeatureType
------------------------------------
Returns the feature type (``HAAR`` or ``LBP`` for now).
.. ocv:function:: int FeatureEvaluator::getFeatureType() const
FeatureEvaluator::setImage
------------------------------
Assigns an image to feature evaluator.
.. ocv:function:: bool FeatureEvaluator::setImage(InputArray img, Size origWinSize, Size sumSize)
:param img: Matrix of the type ``CV_8UC1`` containing an image where the features are computed.
:param origWinSize: Size of training images.
:param sumSize: The requested size of integral images (so if the integral image is smaller, it resides in the top-left corner of the larger image of requested size). Because the features are represented using offsets from the image origin, using the same sumSize for all scales helps to avoid constant readjustments of the features to different scales.
The method assigns an image, where the features will be computed, to the feature evaluator.
FeatureEvaluator::setWindow
-------------------------------
Assigns a window in the current image where the features will be computed.
.. ocv:function:: bool FeatureEvaluator::setWindow(Point p)
:param p: Upper left point of the window where the features are computed. Size of the window is equal to the size of training images.
FeatureEvaluator::calcOrd
-----------------------------
Computes the value of an ordered (numerical) feature.
.. ocv:function:: double FeatureEvaluator::calcOrd(int featureIdx) const
:param featureIdx: Index of the feature whose value is computed.
The function returns the computed value of an ordered feature.
FeatureEvaluator::calcCat
-----------------------------
Computes the value of a categorical feature.
.. ocv:function:: int FeatureEvaluator::calcCat(int featureIdx) const
:param featureIdx: Index of the feature whose value is computed.
The function returns the computed label of a categorical feature, which is the value from [0,... (number of categories - 1)].
FeatureEvaluator::create
----------------------------
Constructs the feature evaluator.
.. ocv:function:: Ptr<FeatureEvaluator> FeatureEvaluator::create(int type)
:param type: Type of features evaluated by cascade (``HAAR`` or ``LBP`` for now).
CascadeClassifier
-----------------
.. ocv:class:: CascadeClassifier
......
......@@ -121,29 +121,6 @@ CV_EXPORTS void groupRectangles_meanshift(std::vector<Rect>& rectList, std::ve
std::vector<double>& foundScales,
double detectThreshold = 0.0, Size winDetSize = Size(64, 128));
class CV_EXPORTS FeatureEvaluator
{
public:
enum { HAAR = 0,
LBP = 1,
HOG = 2
};
virtual ~FeatureEvaluator();
virtual bool read(const FileNode& node);
virtual Ptr<FeatureEvaluator> clone() const;
virtual int getFeatureType() const;
virtual bool setImage(InputArray img, Size origWinSize, Size sumSize);
virtual bool setWindow(Point p);
virtual double calcOrd(int featureIdx) const;
virtual int calcCat(int featureIdx) const;
static Ptr<FeatureEvaluator> create(int type);
};
template<> CV_EXPORTS void DefaultDeleter<CvHaarClassifierCascade>::operator ()(CvHaarClassifierCascade* obj) const;
enum { CASCADE_DO_CANNY_PRUNING = 1,
......
......@@ -24,14 +24,14 @@ OCL_PERF_TEST_P(Cascade_Image_MinSize, CascadeClassifier,
string("cv/cascadeandhog/images/class57.png") ),
testing::Values(30, 64, 90) ) )
{
const string cascasePath = get<0>(GetParam());
const string cascadePath = get<0>(GetParam());
const string imagePath = get<1>(GetParam());
int min_size = get<2>(GetParam());
Size minSize(min_size, min_size);
CascadeClassifier cc( getDataPath(cascasePath) );
CascadeClassifier cc( getDataPath(cascadePath) );
if (cc.empty())
FAIL() << "Can't load cascade file: " << getDataPath(cascasePath);
FAIL() << "Can't load cascade file: " << getDataPath(cascadePath);
Mat img = imread(getDataPath(imagePath), IMREAD_GRAYSCALE);
if (img.empty())
......
......@@ -46,71 +46,6 @@
#include "opencv2/objdetect/objdetect_c.h"
#include "opencl_kernels.hpp"
#if defined (LOG_CASCADE_STATISTIC)
struct Logger
{
enum { STADIES_NUM = 20 };
int gid;
cv::Mat mask;
cv::Size sz0;
int step;
Logger() : gid (0), step(2) {}
void setImage(const cv::Mat& image)
{
if (gid == 0)
sz0 = image.size();
mask.create(image.rows, image.cols * (STADIES_NUM + 1) + STADIES_NUM, CV_8UC1);
mask = cv::Scalar(0);
cv::Mat roi = mask(cv::Rect(cv::Point(0,0), image.size()));
image.copyTo(roi);
printf("%d) Size = (%d, %d)\n", gid, image.cols, image.rows);
for(int i = 0; i < STADIES_NUM; ++i)
{
int x = image.cols + i * (image.cols + 1);
cv::line(mask, cv::Point(x, 0), cv::Point(x, mask.rows-1), cv::Scalar(255));
}
if (sz0.width/image.cols > 2 && sz0.height/image.rows > 2)
step = 1;
}
void setPoint(const cv::Point& p, int passed_stadies)
{
int cols = mask.cols / (STADIES_NUM + 1);
passed_stadies = -passed_stadies;
passed_stadies = (passed_stadies == -1) ? STADIES_NUM : passed_stadies;
unsigned char* ptr = mask.ptr<unsigned char>(p.y) + cols + 1 + p.x;
for(int i = 0; i < passed_stadies; ++i, ptr += cols + 1)
{
*ptr = 255;
if (step == 2)
{
ptr[1] = 255;
ptr[mask.step] = 255;
ptr[mask.step + 1] = 255;
}
}
};
void write()
{
char buf[4096];
sprintf(buf, "%04d.png", gid++);
cv::imwrite(buf, mask);
}
} logger;
#endif
namespace cv
{
......@@ -121,7 +56,8 @@ template<typename _Tp> void copyVectorToUMat(const std::vector<_Tp>& v, UMat& um
Mat(1, (int)(v.size()*sizeof(v[0])), CV_8U, (void*)&v[0]).copyTo(um);
}
void groupRectangles(std::vector<Rect>& rectList, int groupThreshold, double eps, std::vector<int>* weights, std::vector<double>* levelWeights)
void groupRectangles(std::vector<Rect>& rectList, int groupThreshold, double eps,
std::vector<int>* weights, std::vector<double>* levelWeights)
{
if( groupThreshold <= 0 || rectList.empty() )
{
......@@ -426,7 +362,8 @@ void groupRectangles(std::vector<Rect>& rectList, std::vector<int>& weights, int
groupRectangles(rectList, groupThreshold, eps, &weights, 0);
}
//used for cascade detection algorithm for ROC-curve calculating
void groupRectangles(std::vector<Rect>& rectList, std::vector<int>& rejectLevels, std::vector<double>& levelWeights, int groupThreshold, double eps)
void groupRectangles(std::vector<Rect>& rectList, std::vector<int>& rejectLevels,
std::vector<double>& levelWeights, int groupThreshold, double eps)
{
groupRectangles(rectList, groupThreshold, eps, &rejectLevels, &levelWeights);
}
......@@ -439,14 +376,138 @@ void groupRectangles_meanshift(std::vector<Rect>& rectList, std::vector<double>&
FeatureEvaluator::~FeatureEvaluator() {}
bool FeatureEvaluator::read(const FileNode&) {return true;}
bool FeatureEvaluator::read(const FileNode&, Size _origWinSize)
{
origWinSize = _origWinSize;
localSize = lbufSize = Size(0, 0);
if (scaleData.empty())
scaleData = makePtr<std::vector<ScaleData> >();
else
scaleData->clear();
return true;
}
Ptr<FeatureEvaluator> FeatureEvaluator::clone() const { return Ptr<FeatureEvaluator>(); }
int FeatureEvaluator::getFeatureType() const {return -1;}
bool FeatureEvaluator::setImage(InputArray, Size, Size) {return true;}
bool FeatureEvaluator::setWindow(Point) { return true; }
double FeatureEvaluator::calcOrd(int) const { return 0.; }
bool FeatureEvaluator::setWindow(Point, int) { return true; }
void FeatureEvaluator::getUMats(std::vector<UMat>& bufs)
{
if (!(sbufFlag & USBUF_VALID))
{
sbuf.copyTo(usbuf);
sbufFlag |= USBUF_VALID;
}
bufs.clear();
bufs.push_back(uscaleData);
bufs.push_back(usbuf);
bufs.push_back(ufbuf);
}
void FeatureEvaluator::getMats()
{
if (!(sbufFlag & SBUF_VALID))
{
usbuf.copyTo(sbuf);
sbufFlag |= SBUF_VALID;
}
}
float FeatureEvaluator::calcOrd(int) const { return 0.; }
int FeatureEvaluator::calcCat(int) const { return 0; }
bool FeatureEvaluator::updateScaleData( Size imgsz, const std::vector<float>& _scales )
{
if( scaleData.empty() )
scaleData = makePtr<std::vector<ScaleData> >();
size_t i, nscales = _scales.size();
bool recalcOptFeatures = nscales != scaleData->size();
scaleData->resize(nscales);
int layer_dy = 0;
Point layer_ofs(0,0);
Size prevBufSize = sbufSize;
sbufSize.width = std::max(sbufSize.width, (int)alignSize(cvRound(imgsz.width/_scales[0]) + 31, 32));
recalcOptFeatures = recalcOptFeatures || sbufSize.width != prevBufSize.width;
for( i = 0; i < nscales; i++ )
{
FeatureEvaluator::ScaleData& s = scaleData->at(i);
if( !recalcOptFeatures && fabs(s.scale - _scales[i]) > FLT_EPSILON*100*_scales[i] )
recalcOptFeatures = true;
float sc = _scales[i];
Size sz;
sz.width = cvRound(imgsz.width/sc);
sz.height = cvRound(imgsz.height/sc);
s.ystep = sc >= 2 ? 1 : 2;
s.scale = sc;
s.szi = Size(sz.width+1, sz.height+1);
if( layer_ofs.x + s.szi.width > sbufSize.width )
{
layer_ofs = Point(0, layer_ofs.y + layer_dy);
layer_dy = s.szi.height;
}
s.layer_ofs = layer_ofs.y*sbufSize.width + layer_ofs.x;
layer_ofs.x += s.szi.width;
}
layer_ofs.y += layer_dy;
sbufSize.height = std::max(sbufSize.height, layer_ofs.y);
recalcOptFeatures = recalcOptFeatures || sbufSize.height != prevBufSize.height;
return recalcOptFeatures;
}
bool FeatureEvaluator::setImage( InputArray _image, const std::vector<float>& _scales )
{
Size imgsz = _image.size();
bool recalcOptFeatures = updateScaleData(imgsz, _scales);
size_t i, nscales = scaleData->size();
Size sz0 = scaleData->at(0).szi;
sz0 = Size(std::max(rbuf.cols, (int)alignSize(sz0.width, 16)), std::max(rbuf.rows, sz0.height));
if (recalcOptFeatures)
{
computeOptFeatures();
copyVectorToUMat(*scaleData, uscaleData);
}
if (_image.isUMat() && localSize.area() > 0)
{
usbuf.create(sbufSize.height*nchannels, sbufSize.width, CV_32S);
urbuf.create(sz0, CV_8U);
for (i = 0; i < nscales; i++)
{
const ScaleData& s = scaleData->at(i);
UMat dst(urbuf, Rect(0, 0, s.szi.width - 1, s.szi.height - 1));
resize(_image, dst, dst.size(), 1. / s.scale, 1. / s.scale, INTER_LINEAR);
computeChannels((int)i, dst);
}
sbufFlag = USBUF_VALID;
}
else
{
Mat image = _image.getMat();
sbuf.create(sbufSize.height*nchannels, sbufSize.width, CV_32S);
rbuf.create(sz0, CV_8U);
for (i = 0; i < nscales; i++)
{
const ScaleData& s = scaleData->at(i);
Mat dst(s.szi.height - 1, s.szi.width - 1, CV_8U, rbuf.data);
resize(image, dst, dst.size(), 1. / s.scale, 1. / s.scale, INTER_LINEAR);
computeChannels((int)i, dst);
}
sbufFlag = SBUF_VALID;
}
return true;
}
//---------------------------------------------- HaarEvaluator ---------------------------------------
bool HaarEvaluator::Feature :: read( const FileNode& node )
......@@ -476,24 +537,32 @@ HaarEvaluator::HaarEvaluator()
{
optfeaturesPtr = 0;
pwin = 0;
localSize = Size(4, 2);
lbufSize = Size(0, 0);
nchannels = 0;
}
HaarEvaluator::~HaarEvaluator()
{
}
bool HaarEvaluator::read(const FileNode& node)
bool HaarEvaluator::read(const FileNode& node, Size _origWinSize)
{
if (!FeatureEvaluator::read(node, _origWinSize))
return false;
size_t i, n = node.size();
CV_Assert(n > 0);
if(features.empty())
features = makePtr<std::vector<Feature> >();
if(optfeatures.empty())
optfeatures = makePtr<std::vector<OptFeature> >();
if (optfeatures_lbuf.empty())
optfeatures_lbuf = makePtr<std::vector<OptFeature> >();
features->resize(n);
FileNodeIterator it = node.begin();
hasTiltedFeatures = false;
std::vector<Feature>& ff = *features;
sumSize0 = Size();
sbufSize = Size();
ufbuf.release();
for(i = 0; i < n; i++, ++it)
......@@ -503,143 +572,148 @@ bool HaarEvaluator::read(const FileNode& node)
if( ff[i].tilted )
hasTiltedFeatures = true;
}
nchannels = hasTiltedFeatures ? 3 : 2;
normrect = Rect(1, 1, origWinSize.width - 2, origWinSize.height - 2);
if (ocl::haveOpenCL())
{
String vname = ocl::Device::getDefault().vendor();
if (vname == "Advanced Micro Devices, Inc." ||
vname == "AMD")
localSize = Size(8, 8);
lbufSize = Size(origWinSize.width + localSize.width,
origWinSize.height + localSize.height);
if (lbufSize.area() > 1024)
lbufSize = Size(0, 0);
}
return true;
}
Ptr<FeatureEvaluator> HaarEvaluator::clone() const
{
Ptr<HaarEvaluator> ret = makePtr<HaarEvaluator>();
ret->origWinSize = origWinSize;
ret->features = features;
ret->optfeatures = optfeatures;
ret->optfeaturesPtr = optfeatures->empty() ? 0 : &(*(ret->optfeatures))[0];
ret->hasTiltedFeatures = hasTiltedFeatures;
ret->sum0 = sum0; ret->sqsum0 = sqsum0;
ret->sum = sum; ret->sqsum = sqsum;
ret->usum0 = usum0; ret->usqsum0 = usqsum0; ret->ufbuf = ufbuf;
ret->normrect = normrect;
memcpy( ret->nofs, nofs, 4*sizeof(nofs[0]) );
ret->pwin = pwin;
ret->varianceNormFactor = varianceNormFactor;
*ret = *this;
return ret;
}
bool HaarEvaluator::setImage( InputArray _image, Size _origWinSize, Size _sumSize )
{
Size imgsz = _image.size();
int cols = imgsz.width, rows = imgsz.height;
if (imgsz.width < origWinSize.width || imgsz.height < origWinSize.height)
return false;
origWinSize = _origWinSize;
normrect = Rect(1, 1, origWinSize.width-2, origWinSize.height-2);
int rn = _sumSize.height, cn = _sumSize.width, rn_scale = hasTiltedFeatures ? 2 : 1;
int sumStep, tofs = 0;
CV_Assert(rn >= rows+1 && cn >= cols+1);
void HaarEvaluator::computeChannels(int scaleIdx, InputArray img)
{
const ScaleData& s = scaleData->at(scaleIdx);
tofs = (int)sbufSize.area();
sqofs = hasTiltedFeatures ? tofs*2 : tofs;
if( _image.isUMat() )
if (img.isUMat())
{
usum0.create(rn*rn_scale, cn, CV_32S);
usqsum0.create(rn, cn, CV_32S);
usum = UMat(usum0, Rect(0, 0, cols+1, rows+1));
usqsum = UMat(usqsum0, Rect(0, 0, cols, rows));
if( hasTiltedFeatures )
int sx = s.layer_ofs % sbufSize.width;
int sy = s.layer_ofs / sbufSize.width;
int sqy = sy + (sqofs / sbufSize.width);
UMat sum(usbuf, Rect(sx, sy, s.szi.width, s.szi.height));
UMat sqsum(usbuf, Rect(sx, sqy, s.szi.width, s.szi.height));
sqsum.flags = (sqsum.flags & ~UMat::DEPTH_MASK) | CV_32F;
if (hasTiltedFeatures)
{
UMat utilted(usum0, Rect(0, _sumSize.height, cols+1, rows+1));
integral(_image, usum, noArray(), utilted, CV_32S);
tofs = (int)((utilted.offset - usum.offset)/sizeof(int));
int sty = sy + (tofs / sbufSize.width);
UMat tilted(usbuf, Rect(sx, sty, s.szi.width, s.szi.height));
integral(img, sum, sqsum, tilted, CV_32S, CV_32F);
}
else
{
integral(_image, usum, noArray(), noArray(), CV_32S);
UMatData* u = sqsum.u;
integral(img, sum, sqsum, noArray(), CV_32S, CV_32F);
CV_Assert(sqsum.u == u && sqsum.size() == s.szi && sqsum.type()==CV_32F);
}
sqrBoxFilter(_image, usqsum, CV_32S,
Size(normrect.width, normrect.height),
Point(0, 0), false);
/*sqrBoxFilter(_image.getMat(), sqsum, CV_32S,
Size(normrect.width, normrect.height),
Point(0, 0), false);
sqsum.copyTo(usqsum);*/
sumStep = (int)(usum.step/usum.elemSize());
}
else
{
sum0.create(rn*rn_scale, cn, CV_32S);
sqsum0.create(rn, cn, CV_32S);
sum = sum0(Rect(0, 0, cols+1, rows+1));
sqsum = sqsum0(Rect(0, 0, cols, rows));
Mat sum(s.szi, CV_32S, sbuf.ptr<int>() + s.layer_ofs, sbuf.step);
Mat sqsum(s.szi, CV_32F, sum.ptr<int>() + sqofs, sbuf.step);
if( hasTiltedFeatures )
if (hasTiltedFeatures)
{
Mat tilted = sum0(Rect(0, _sumSize.height, cols+1, rows+1));
integral(_image, sum, noArray(), tilted, CV_32S);
tofs = (int)((tilted.data - sum.data)/sizeof(int));
Mat tilted(s.szi, CV_32S, sum.ptr<int>() + tofs, sbuf.step);
integral(img, sum, sqsum, tilted, CV_32S, CV_32F);
}
else
integral(_image, sum, noArray(), noArray(), CV_32S);
sqrBoxFilter(_image, sqsum, CV_32S,
Size(normrect.width, normrect.height),
Point(0, 0), false);
sumStep = (int)(sum.step/sum.elemSize());
integral(img, sum, sqsum, noArray(), CV_32S, CV_32F);
}
}
CV_SUM_OFS( nofs[0], nofs[1], nofs[2], nofs[3], 0, normrect, sumStep );
void HaarEvaluator::computeOptFeatures()
{
int sstep = sbufSize.width;
CV_SUM_OFS( nofs[0], nofs[1], nofs[2], nofs[3], 0, normrect, sstep );
size_t fi, nfeatures = features->size();
const std::vector<Feature>& ff = *features;
optfeatures->resize(nfeatures);
optfeaturesPtr = &(*optfeatures)[0];
for( fi = 0; fi < nfeatures; fi++ )
optfeaturesPtr[fi].setOffsets( ff[fi], sstep, tofs );
optfeatures_lbuf->resize(nfeatures);
if( sumSize0 != _sumSize )
{
optfeatures->resize(nfeatures);
optfeaturesPtr = &(*optfeatures)[0];
for( fi = 0; fi < nfeatures; fi++ )
optfeaturesPtr[fi].setOffsets( ff[fi], sumStep, tofs );
}
if( _image.isUMat() && (sumSize0 != _sumSize || ufbuf.empty()) )
copyVectorToUMat(*optfeatures, ufbuf);
sumSize0 = _sumSize;
for( fi = 0; fi < nfeatures; fi++ )
optfeatures_lbuf->at(fi).setOffsets(ff[fi], lbufSize.width > 0 ? lbufSize.width : sstep, tofs);
return true;
copyVectorToUMat(*optfeatures_lbuf, ufbuf);
}
bool HaarEvaluator::setWindow( Point pt )
bool HaarEvaluator::setWindow( Point pt, int scaleIdx )
{
const ScaleData& s = getScaleData(scaleIdx);
if( pt.x < 0 || pt.y < 0 ||
pt.x + origWinSize.width >= sum.cols ||
pt.y + origWinSize.height >= sum.rows )
pt.x + origWinSize.width >= s.szi.width ||
pt.y + origWinSize.height >= s.szi.height )
return false;
const int* p = &sum.at<int>(pt);
int valsum = CALC_SUM_OFS(nofs, p);
double valsqsum = sqsum.at<int>(pt.y + normrect.y, pt.x + normrect.x);
pwin = &sbuf.at<int>(pt) + s.layer_ofs;
const float* pq = (const float*)(pwin + sqofs);
int valsum = CALC_SUM_OFS(nofs, pwin);
float valsqsum = CALC_SUM_OFS(nofs, pq);
double nf = (double)normrect.area() * valsqsum - (double)valsum * valsum;
if( nf > 0. )
nf = std::sqrt(nf);
else
nf = 1.;
varianceNormFactor = 1./nf;
pwin = p;
varianceNormFactor = (float)(1./nf);
return true;
}
void HaarEvaluator::OptFeature::setOffsets( const Feature& _f, int step, int _tofs )
{
weight[0] = _f.rect[0].weight;
weight[1] = _f.rect[1].weight;
weight[2] = _f.rect[2].weight;
if( _f.tilted )
{
CV_TILTED_OFS( ofs[0][0], ofs[0][1], ofs[0][2], ofs[0][3], _tofs, _f.rect[0].r, step );
CV_TILTED_OFS( ofs[1][0], ofs[1][1], ofs[1][2], ofs[1][3], _tofs, _f.rect[1].r, step );
CV_TILTED_OFS( ofs[2][0], ofs[2][1], ofs[2][2], ofs[2][3], _tofs, _f.rect[2].r, step );
}
else
{
CV_SUM_OFS( ofs[0][0], ofs[0][1], ofs[0][2], ofs[0][3], 0, _f.rect[0].r, step );
CV_SUM_OFS( ofs[1][0], ofs[1][1], ofs[1][2], ofs[1][3], 0, _f.rect[1].r, step );
CV_SUM_OFS( ofs[2][0], ofs[2][1], ofs[2][2], ofs[2][3], 0, _f.rect[2].r, step );
}
}
Rect HaarEvaluator::getNormRect() const
{
return normrect;
}
void HaarEvaluator::getUMats(std::vector<UMat>& bufs)
int HaarEvaluator::getSquaresOffset() const
{
bufs.clear();
bufs.push_back(usum);
bufs.push_back(usqsum);
bufs.push_back(ufbuf);
return sqofs;
}
//---------------------------------------------- LBPEvaluator -------------------------------------
......@@ -655,15 +729,26 @@ LBPEvaluator::LBPEvaluator()
{
features = makePtr<std::vector<Feature> >();
optfeatures = makePtr<std::vector<OptFeature> >();
scaleData = makePtr<std::vector<ScaleData> >();
}
LBPEvaluator::~LBPEvaluator()
{
}
bool LBPEvaluator::read( const FileNode& node )
bool LBPEvaluator::read( const FileNode& node, Size _origWinSize )
{
if (!FeatureEvaluator::read(node, _origWinSize))
return false;
if(features.empty())
features = makePtr<std::vector<Feature> >();
if(optfeatures.empty())
optfeatures = makePtr<std::vector<OptFeature> >();
if (optfeatures_lbuf.empty())
optfeatures_lbuf = makePtr<std::vector<OptFeature> >();
features->resize(node.size());
optfeaturesPtr = &(*optfeatures)[0];
optfeaturesPtr = 0;
FileNodeIterator it = node.begin(), it_end = node.end();
std::vector<Feature>& ff = *features;
for(int i = 0; it != it_end; ++it, i++)
......@@ -671,274 +756,92 @@ bool LBPEvaluator::read( const FileNode& node )
if(!ff[i].read(*it))
return false;
}
nchannels = 1;
if (ocl::haveOpenCL())
{
const ocl::Device& device = ocl::Device::getDefault();
String vname = device.vendor();
if ((vname == "Advanced Micro Devices, Inc." ||
vname == "AMD") && !device.hostUnifiedMemory())
localSize = Size(8, 8);
}
return true;
}
Ptr<FeatureEvaluator> LBPEvaluator::clone() const
{
Ptr<LBPEvaluator> ret = makePtr<LBPEvaluator>();
ret->origWinSize = origWinSize;
ret->features = features;
ret->optfeatures = optfeatures;
ret->optfeaturesPtr = ret->optfeatures.empty() ? 0 : &(*ret->optfeatures)[0];
ret->sum0 = sum0, ret->sum = sum;
ret->pwin = pwin;
*ret = *this;
return ret;
}
bool LBPEvaluator::setImage( InputArray _image, Size _origWinSize, Size _sumSize )
void LBPEvaluator::computeChannels(int scaleIdx, InputArray _img)
{
Size imgsz = _image.size();
int cols = imgsz.width, rows = imgsz.height;
if (imgsz.width < origWinSize.width || imgsz.height < origWinSize.height)
return false;
origWinSize = _origWinSize;
const ScaleData& s = scaleData->at(scaleIdx);
int rn = _sumSize.height, cn = _sumSize.width;
int sumStep;
CV_Assert(rn >= rows+1 && cn >= cols+1);
if( _image.isUMat() )
if (_img.isUMat())
{
usum0.create(rn, cn, CV_32S);
usum = UMat(usum0, Rect(0, 0, cols+1, rows+1));
integral(_image, usum, noArray(), noArray(), CV_32S);
sumStep = (int)(usum.step/usum.elemSize());
int sx = s.layer_ofs % sbufSize.width;
int sy = s.layer_ofs / sbufSize.width;
UMat sum(usbuf, Rect(sx, sy, s.szi.width, s.szi.height));
integral(_img, sum, noArray(), noArray(), CV_32S);
}
else
{
sum0.create(rn, cn, CV_32S);
sum = sum0(Rect(0, 0, cols+1, rows+1));
integral(_image, sum, noArray(), noArray(), CV_32S);
sumStep = (int)(sum.step/sum.elemSize());
}
size_t fi, nfeatures = features->size();
const std::vector<Feature>& ff = *features;
if( sumSize0 != _sumSize )
{
optfeatures->resize(nfeatures);
optfeaturesPtr = &(*optfeatures)[0];
for( fi = 0; fi < nfeatures; fi++ )
optfeaturesPtr[fi].setOffsets( ff[fi], sumStep );
Mat sum(s.szi, CV_32S, sbuf.ptr<int>() + s.layer_ofs, sbuf.step);
integral(_img, sum, noArray(), noArray(), CV_32S);
}
if( _image.isUMat() && (sumSize0 != _sumSize || ufbuf.empty()) )
copyVectorToUMat(*optfeatures, ufbuf);
sumSize0 = _sumSize;
return true;
}
bool LBPEvaluator::setWindow( Point pt )
void LBPEvaluator::computeOptFeatures()
{
if( pt.x < 0 || pt.y < 0 ||
pt.x + origWinSize.width >= sum.cols ||
pt.y + origWinSize.height >= sum.rows )
return false;
pwin = &sum.at<int>(pt);
return true;
}
int sstep = sbufSize.width;
void LBPEvaluator::getUMats(std::vector<UMat>& bufs)
{
bufs.clear();
bufs.push_back(usum);
bufs.push_back(ufbuf);
}
//---------------------------------------------- HOGEvaluator ---------------------------------------
bool HOGEvaluator::Feature :: read( const FileNode& node )
{
FileNode rnode = node[CC_RECT];
FileNodeIterator it = rnode.begin();
it >> rect[0].x >> rect[0].y >> rect[0].width >> rect[0].height >> featComponent;
rect[1].x = rect[0].x + rect[0].width;
rect[1].y = rect[0].y;
rect[2].x = rect[0].x;
rect[2].y = rect[0].y + rect[0].height;
rect[3].x = rect[0].x + rect[0].width;
rect[3].y = rect[0].y + rect[0].height;
rect[1].width = rect[2].width = rect[3].width = rect[0].width;
rect[1].height = rect[2].height = rect[3].height = rect[0].height;
return true;
size_t fi, nfeatures = features->size();
const std::vector<Feature>& ff = *features;
optfeatures->resize(nfeatures);
optfeaturesPtr = &(*optfeatures)[0];
for( fi = 0; fi < nfeatures; fi++ )
optfeaturesPtr[fi].setOffsets( ff[fi], sstep );
copyVectorToUMat(*optfeatures, ufbuf);
}
HOGEvaluator::HOGEvaluator()
{
features = makePtr<std::vector<Feature> >();
}
HOGEvaluator::~HOGEvaluator()
void LBPEvaluator::OptFeature::setOffsets( const Feature& _f, int step )
{
}
Rect tr = _f.rect;
int w0 = tr.width;
int h0 = tr.height;
bool HOGEvaluator::read( const FileNode& node )
{
features->resize(node.size());
featuresPtr = &(*features)[0];
FileNodeIterator it = node.begin(), it_end = node.end();
for(int i = 0; it != it_end; ++it, i++)
{
if(!featuresPtr[i].read(*it))
return false;
}
return true;
CV_SUM_OFS( ofs[0], ofs[1], ofs[4], ofs[5], 0, tr, step );
tr.x += 2*w0;
CV_SUM_OFS( ofs[2], ofs[3], ofs[6], ofs[7], 0, tr, step );
tr.y += 2*h0;
CV_SUM_OFS( ofs[10], ofs[11], ofs[14], ofs[15], 0, tr, step );
tr.x -= 2*w0;
CV_SUM_OFS( ofs[8], ofs[9], ofs[12], ofs[13], 0, tr, step );
}
Ptr<FeatureEvaluator> HOGEvaluator::clone() const
{
Ptr<HOGEvaluator> ret = makePtr<HOGEvaluator>();
ret->origWinSize = origWinSize;
ret->features = features;
ret->featuresPtr = &(*ret->features)[0];
ret->offset = offset;
ret->hist = hist;
ret->normSum = normSum;
return ret;
}
bool HOGEvaluator::setImage( InputArray _image, Size winSize, Size )
bool LBPEvaluator::setWindow( Point pt, int scaleIdx )
{
Mat image = _image.getMat();
int rows = image.rows + 1;
int cols = image.cols + 1;
origWinSize = winSize;
if( image.cols < origWinSize.width || image.rows < origWinSize.height )
return false;
hist.clear();
for( int bin = 0; bin < Feature::BIN_NUM; bin++ )
{
hist.push_back( Mat(rows, cols, CV_32FC1) );
}
normSum.create( rows, cols, CV_32FC1 );
CV_Assert(0 <= scaleIdx && scaleIdx < (int)scaleData->size());
const ScaleData& s = scaleData->at(scaleIdx);
integralHistogram( image, hist, normSum, Feature::BIN_NUM );
size_t featIdx, featCount = features->size();
for( featIdx = 0; featIdx < featCount; featIdx++ )
{
featuresPtr[featIdx].updatePtrs( hist, normSum );
}
return true;
}
bool HOGEvaluator::setWindow(Point pt)
{
if( pt.x < 0 || pt.y < 0 ||
pt.x + origWinSize.width >= hist[0].cols-2 ||
pt.y + origWinSize.height >= hist[0].rows-2 )
pt.x + origWinSize.width >= s.szi.width ||
pt.y + origWinSize.height >= s.szi.height )
return false;
offset = pt.y * ((int)hist[0].step/sizeof(float)) + pt.x;
pwin = &sbuf.at<int>(pt) + s.layer_ofs;
return true;
}
void HOGEvaluator::integralHistogram(const Mat &img, std::vector<Mat> &histogram, Mat &norm, int nbins) const
{
CV_Assert( img.type() == CV_8U || img.type() == CV_8UC3 );
int x, y, binIdx;
Size gradSize(img.size());
Size histSize(histogram[0].size());
Mat grad(gradSize, CV_32F);
Mat qangle(gradSize, CV_8U);
AutoBuffer<int> mapbuf(gradSize.width + gradSize.height + 4);
int* xmap = (int*)mapbuf + 1;
int* ymap = xmap + gradSize.width + 2;
const int borderType = (int)BORDER_REPLICATE;
for( x = -1; x < gradSize.width + 1; x++ )
xmap[x] = borderInterpolate(x, gradSize.width, borderType);
for( y = -1; y < gradSize.height + 1; y++ )
ymap[y] = borderInterpolate(y, gradSize.height, borderType);
int width = gradSize.width;
AutoBuffer<float> _dbuf(width*4);
float* dbuf = _dbuf;
Mat Dx(1, width, CV_32F, dbuf);
Mat Dy(1, width, CV_32F, dbuf + width);
Mat Mag(1, width, CV_32F, dbuf + width*2);
Mat Angle(1, width, CV_32F, dbuf + width*3);
float angleScale = (float)(nbins/CV_PI);
for( y = 0; y < gradSize.height; y++ )
{
const uchar* currPtr = img.data + img.step*ymap[y];
const uchar* prevPtr = img.data + img.step*ymap[y-1];
const uchar* nextPtr = img.data + img.step*ymap[y+1];
float* gradPtr = (float*)grad.ptr(y);
uchar* qanglePtr = (uchar*)qangle.ptr(y);
for( x = 0; x < width; x++ )
{
dbuf[x] = (float)(currPtr[xmap[x+1]] - currPtr[xmap[x-1]]);
dbuf[width + x] = (float)(nextPtr[xmap[x]] - prevPtr[xmap[x]]);
}
cartToPolar( Dx, Dy, Mag, Angle, false );
for( x = 0; x < width; x++ )
{
float mag = dbuf[x+width*2];
float angle = dbuf[x+width*3];
angle = angle*angleScale - 0.5f;
int bidx = cvFloor(angle);
angle -= bidx;
if( bidx < 0 )
bidx += nbins;
else if( bidx >= nbins )
bidx -= nbins;
qanglePtr[x] = (uchar)bidx;
gradPtr[x] = mag;
}
}
integral(grad, norm, grad.depth());
float* histBuf;
const float* magBuf;
const uchar* binsBuf;
int binsStep = (int)( qangle.step / sizeof(uchar) );
int histStep = (int)( histogram[0].step / sizeof(float) );
int magStep = (int)( grad.step / sizeof(float) );
for( binIdx = 0; binIdx < nbins; binIdx++ )
{
histBuf = (float*)histogram[binIdx].data;
magBuf = (const float*)grad.data;
binsBuf = (const uchar*)qangle.data;
memset( histBuf, 0, histSize.width * sizeof(histBuf[0]) );
histBuf += histStep + 1;
for( y = 0; y < qangle.rows; y++ )
{
histBuf[-1] = 0.f;
float strSum = 0.f;
for( x = 0; x < qangle.cols; x++ )
{
if( binsBuf[x] == binIdx )
strSum += magBuf[x];
histBuf[x] = histBuf[-histStep + x] + strSum;
}
histBuf += histStep;
binsBuf += binsStep;
magBuf += magStep;
}
}
}
Ptr<FeatureEvaluator> FeatureEvaluator::create( int featureType )
{
return featureType == HAAR ? Ptr<FeatureEvaluator>(new HaarEvaluator) :
featureType == LBP ? Ptr<FeatureEvaluator>(new LBPEvaluator) :
featureType == HOG ? Ptr<FeatureEvaluator>(new HOGEvaluator) :
Ptr<FeatureEvaluator>();
}
......@@ -981,24 +884,21 @@ void CascadeClassifierImpl::read(const FileNode& node)
read_(node);
}
int CascadeClassifierImpl::runAt( Ptr<FeatureEvaluator>& evaluator, Point pt, double& weight )
int CascadeClassifierImpl::runAt( Ptr<FeatureEvaluator>& evaluator, Point pt, int scaleIdx, double& weight )
{
CV_Assert( !oldCascade );
assert( data.featureType == FeatureEvaluator::HAAR ||
assert( !oldCascade &&
(data.featureType == FeatureEvaluator::HAAR ||
data.featureType == FeatureEvaluator::LBP ||
data.featureType == FeatureEvaluator::HOG );
data.featureType == FeatureEvaluator::HOG) );
if( !evaluator->setWindow(pt) )
if( !evaluator->setWindow(pt, scaleIdx) )
return -1;
if( data.isStumpBased() )
if( data.maxNodesPerTree == 1 )
{
if( data.featureType == FeatureEvaluator::HAAR )
return predictOrderedStump<HaarEvaluator>( *this, evaluator, weight );
else if( data.featureType == FeatureEvaluator::LBP )
return predictCategoricalStump<LBPEvaluator>( *this, evaluator, weight );
else if( data.featureType == FeatureEvaluator::HOG )
return predictOrderedStump<HOGEvaluator>( *this, evaluator, weight );
else
return -2;
}
......@@ -1008,8 +908,6 @@ int CascadeClassifierImpl::runAt( Ptr<FeatureEvaluator>& evaluator, Point pt, do
return predictOrdered<HaarEvaluator>( *this, evaluator, weight );
else if( data.featureType == FeatureEvaluator::LBP )
return predictCategorical<LBPEvaluator>( *this, evaluator, weight );
else if( data.featureType == FeatureEvaluator::HOG )
return predictOrdered<HOGEvaluator>( *this, evaluator, weight );
else
return -2;
}
......@@ -1036,14 +934,17 @@ Ptr<BaseCascadeClassifier::MaskGenerator> createFaceDetectionMaskGenerator()
class CascadeClassifierInvoker : public ParallelLoopBody
{
public:
CascadeClassifierInvoker( CascadeClassifierImpl& _cc, Size _sz1, int _stripSize, int _yStep, double _factor,
std::vector<Rect>& _vec, std::vector<int>& _levels, std::vector<double>& _weights, bool outputLevels, const Mat& _mask, Mutex* _mtx)
CascadeClassifierInvoker( CascadeClassifierImpl& _cc, int _nscales, int _nstripes,
const FeatureEvaluator::ScaleData* _scaleData,
const int* _stripeSizes, std::vector<Rect>& _vec,
std::vector<int>& _levels, std::vector<double>& _weights,
bool outputLevels, const Mat& _mask, Mutex* _mtx)
{
classifier = &_cc;
processingRectSize = _sz1;
stripSize = _stripSize;
yStep = _yStep;
scalingFactor = _factor;
nscales = _nscales;
nstripes = _nstripes;
scaleData = _scaleData;
stripeSizes = _stripeSizes;
rectangles = &_vec;
rejectLevels = outputLevels ? &_levels : 0;
levelWeights = outputLevels ? &_weights : 0;
......@@ -1054,201 +955,203 @@ public:
void operator()(const Range& range) const
{
Ptr<FeatureEvaluator> evaluator = classifier->featureEvaluator->clone();
double gypWeight = 0.;
Size origWinSize = classifier->data.origWinSize;
Size winSize(cvRound(classifier->data.origWinSize.width * scalingFactor),
cvRound(classifier->data.origWinSize.height * scalingFactor));
int y1 = range.start * stripSize;
int y2 = std::min(range.end * stripSize, processingRectSize.height);
for( int y = y1; y < y2; y += yStep )
for( int scaleIdx = 0; scaleIdx < nscales; scaleIdx++ )
{
for( int x = 0; x < processingRectSize.width; x += yStep )
const FeatureEvaluator::ScaleData& s = scaleData[scaleIdx];
float scalingFactor = s.scale;
int yStep = s.ystep;
int stripeSize = stripeSizes[scaleIdx];
int y0 = range.start*stripeSize;
Size szw = s.getWorkingSize(origWinSize);
int y1 = std::min(range.end*stripeSize, szw.height);
Size winSize(cvRound(origWinSize.width * scalingFactor),
cvRound(origWinSize.height * scalingFactor));
for( int y = y0; y < y1; y += yStep )
{
if ( (!mask.empty()) && (mask.at<uchar>(Point(x,y))==0)) {
continue;
}
double gypWeight;
int result = classifier->runAt(evaluator, Point(x, y), gypWeight);
#if defined (LOG_CASCADE_STATISTIC)
logger.setPoint(Point(x, y), result);
#endif
if( rejectLevels )
for( int x = 0; x < szw.width; x += yStep )
{
if( result == 1 )
result = -(int)classifier->data.stages.size();
if( classifier->data.stages.size() + result == 0 )
int result = classifier->runAt(evaluator, Point(x, y), scaleIdx, gypWeight);
if( rejectLevels )
{
if( result == 1 )
result = -(int)classifier->data.stages.size();
if( classifier->data.stages.size() + result == 0 )
{
mtx->lock();
rectangles->push_back(Rect(cvRound(x*scalingFactor),
cvRound(y*scalingFactor),
winSize.width, winSize.height));
rejectLevels->push_back(-result);
levelWeights->push_back(gypWeight);
mtx->unlock();
}
}
else if( result > 0 )
{
mtx->lock();
rectangles->push_back(Rect(cvRound(x*scalingFactor), cvRound(y*scalingFactor), winSize.width, winSize.height));
rejectLevels->push_back(-result);
levelWeights->push_back(gypWeight);
rectangles->push_back(Rect(cvRound(x*scalingFactor),
cvRound(y*scalingFactor),
winSize.width, winSize.height));
mtx->unlock();
}
if( result == 0 )
x += yStep;
}
else if( result > 0 )
{
mtx->lock();
rectangles->push_back(Rect(cvRound(x*scalingFactor), cvRound(y*scalingFactor),
winSize.width, winSize.height));
mtx->unlock();
}
if( result == 0 )
x += yStep;
}
}
}
CascadeClassifierImpl* classifier;
std::vector<Rect>* rectangles;
Size processingRectSize;
int stripSize, yStep;
double scalingFactor;
int nscales, nstripes;
const FeatureEvaluator::ScaleData* scaleData;
const int* stripeSizes;
std::vector<int> *rejectLevels;
std::vector<double> *levelWeights;
std::vector<float> scales;
Mat mask;
Mutex* mtx;
};
struct getRect { Rect operator ()(const CvAvgComp& e) const { return e.rect; } };
struct getNeighbors { int operator ()(const CvAvgComp& e) const { return e.neighbors; } };
bool CascadeClassifierImpl::detectSingleScale( InputArray _image, Size processingRectSize,
int yStep, double factor, std::vector<Rect>& candidates,
std::vector<int>& levels, std::vector<double>& weights,
Size sumSize0, bool outputRejectLevels )
{
if( !featureEvaluator->setImage(_image, data.origWinSize, sumSize0) )
return false;
#if defined (LOG_CASCADE_STATISTIC)
logger.setImage(image);
#endif
Mat currentMask;
if (maskGenerator) {
Mat image = _image.getMat();
currentMask=maskGenerator->generateMask(image);
}
std::vector<Rect> candidatesVector;
std::vector<int> rejectLevels;
std::vector<double> levelWeights;
int stripCount, stripSize;
const int PTS_PER_THREAD = 1000;
stripCount = ((processingRectSize.width/yStep)*(processingRectSize.height + yStep-1)/yStep + PTS_PER_THREAD/2)/PTS_PER_THREAD;
stripCount = std::min(std::max(stripCount, 1), 100);
stripSize = (((processingRectSize.height + stripCount - 1)/stripCount + yStep-1)/yStep)*yStep;
if( outputRejectLevels )
{
parallel_for_(Range(0, stripCount), CascadeClassifierInvoker( *this, processingRectSize, stripSize, yStep, factor,
candidatesVector, rejectLevels, levelWeights, true, currentMask, &mtx));
levels.insert( levels.end(), rejectLevels.begin(), rejectLevels.end() );
weights.insert( weights.end(), levelWeights.begin(), levelWeights.end() );
}
else
{
parallel_for_(Range(0, stripCount), CascadeClassifierInvoker( *this, processingRectSize, stripSize, yStep, factor,
candidatesVector, rejectLevels, levelWeights, false, currentMask, &mtx));
}
candidates.insert( candidates.end(), candidatesVector.begin(), candidatesVector.end() );
#if defined (LOG_CASCADE_STATISTIC)
logger.write();
#endif
return true;
}
bool CascadeClassifierImpl::ocl_detectSingleScale( InputArray _image, Size processingRectSize,
int yStep, double factor, Size sumSize0 )
bool CascadeClassifierImpl::ocl_detectMultiScaleNoGrouping( const std::vector<float>& scales,
std::vector<Rect>& candidates )
{
int featureType = getFeatureType();
std::vector<UMat> bufs;
size_t globalsize[] = { processingRectSize.width/yStep, processingRectSize.height/yStep };
featureEvaluator->getUMats(bufs);
Size localsz = featureEvaluator->getLocalSize();
if( localsz.area() == 0 )
return false;
Size lbufSize = featureEvaluator->getLocalBufSize();
size_t localsize[] = { localsz.width, localsz.height };
const int grp_per_CU = 12;
size_t globalsize[] = { grp_per_CU*ocl::Device::getDefault().maxComputeUnits()*localsize[0], localsize[1] };
bool ok = false;
ufacepos.create(1, MAX_FACES*3+1, CV_32S);
UMat ufacepos_count(ufacepos, Rect(0, 0, 1, 1));
ufacepos_count.setTo(Scalar::all(0));
if( ustages.empty() )
{
copyVectorToUMat(data.stages, ustages);
copyVectorToUMat(data.stumps, ustumps);
if (!data.stumps.empty())
copyVectorToUMat(data.stumps, unodes);
else
copyVectorToUMat(data.nodes, unodes);
copyVectorToUMat(data.leaves, uleaves);
if( !data.subsets.empty() )
copyVectorToUMat(data.subsets, usubsets);
}
int nstages = (int)data.stages.size();
if( featureType == FeatureEvaluator::HAAR )
{
Ptr<HaarEvaluator> haar = featureEvaluator.dynamicCast<HaarEvaluator>();
if( haar.empty() )
return false;
haar->setImage(_image, data.origWinSize, sumSize0);
if( haarKernel.empty() )
{
haarKernel.create("runHaarClassifierStump", ocl::objdetect::cascadedetect_oclsrc, "");
String opts;
if (lbufSize.area())
opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D SUM_BUF_SIZE=%d -D SUM_BUF_STEP=%d -D NODE_COUNT=%d",
localsz.width, localsz.height, lbufSize.area(), lbufSize.width, data.maxNodesPerTree);
else
opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D NODE_COUNT=%d",
localsz.width, localsz.height, data.maxNodesPerTree);
haarKernel.create("runHaarClassifier", ocl::objdetect::cascadedetect_oclsrc, opts);
if( haarKernel.empty() )
return false;
}
haar->getUMats(bufs);
Rect normrect = haar->getNormRect();
int sqofs = haar->getSquaresOffset();
int splitstage_ocl = 1;
haarKernel.args(ocl::KernelArg::ReadOnlyNoSize(bufs[0]), // sum
ocl::KernelArg::ReadOnlyNoSize(bufs[1]), // sqsum
haarKernel.args((int)scales.size(),
ocl::KernelArg::PtrReadOnly(bufs[0]), // scaleData
ocl::KernelArg::ReadOnlyNoSize(bufs[1]), // sum
ocl::KernelArg::PtrReadOnly(bufs[2]), // optfeatures
// cascade classifier
(int)data.stages.size(),
splitstage_ocl, nstages,
ocl::KernelArg::PtrReadOnly(ustages),
ocl::KernelArg::PtrReadOnly(ustumps),
ocl::KernelArg::PtrReadOnly(unodes),
ocl::KernelArg::PtrReadOnly(uleaves),
ocl::KernelArg::PtrWriteOnly(ufacepos), // positions
processingRectSize,
yStep, (float)factor,
normrect, data.origWinSize, (int)MAX_FACES);
ok = haarKernel.run(2, globalsize, 0, true);
normrect, sqofs, data.origWinSize, (int)MAX_FACES);
ok = haarKernel.run(2, globalsize, localsize, true);
}
else if( featureType == FeatureEvaluator::LBP )
{
if (data.maxNodesPerTree > 1)
return false;
Ptr<LBPEvaluator> lbp = featureEvaluator.dynamicCast<LBPEvaluator>();
if( lbp.empty() )
return false;
lbp->setImage(_image, data.origWinSize, sumSize0);
if( lbpKernel.empty() )
{
lbpKernel.create("runLBPClassifierStump", ocl::objdetect::cascadedetect_oclsrc, "");
String opts;
if (lbufSize.area())
opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D SUM_BUF_SIZE=%d -D SUM_BUF_STEP=%d",
localsz.width, localsz.height, lbufSize.area(), lbufSize.width);
else
opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d", localsz.width, localsz.height);
lbpKernel.create("runLBPClassifierStumpSimple", ocl::objdetect::cascadedetect_oclsrc, opts);
if( lbpKernel.empty() )
return false;
}
lbp->getUMats(bufs);
int splitstage_ocl = 1;
int subsetSize = (data.ncategories + 31)/32;
lbpKernel.args(ocl::KernelArg::ReadOnlyNoSize(bufs[0]), // sum
ocl::KernelArg::PtrReadOnly(bufs[1]), // optfeatures
lbpKernel.args((int)scales.size(),
ocl::KernelArg::PtrReadOnly(bufs[0]), // scaleData
ocl::KernelArg::ReadOnlyNoSize(bufs[1]), // sum
ocl::KernelArg::PtrReadOnly(bufs[2]), // optfeatures
// cascade classifier
splitstage_ocl, nstages,
ocl::KernelArg::PtrReadOnly(ustages),
ocl::KernelArg::PtrReadOnly(unodes),
ocl::KernelArg::PtrReadOnly(usubsets),
subsetSize,
ocl::KernelArg::PtrWriteOnly(ufacepos), // positions
data.origWinSize, (int)MAX_FACES);
ok = lbpKernel.run(2, globalsize, localsize, true);
}
// cascade classifier
(int)data.stages.size(),
ocl::KernelArg::PtrReadOnly(ustages),
ocl::KernelArg::PtrReadOnly(ustumps),
ocl::KernelArg::PtrReadOnly(usubsets),
subsetSize,
if( ok )
{
Mat facepos = ufacepos.getMat(ACCESS_READ);
const int* fptr = facepos.ptr<int>();
int nfaces = fptr[0];
nfaces = std::min(nfaces, (int)MAX_FACES);
ocl::KernelArg::PtrWriteOnly(ufacepos), // positions
processingRectSize,
yStep, (float)factor,
data.origWinSize, (int)MAX_FACES);
ok = lbpKernel.run(2, globalsize, 0, true);
for( int i = 0; i < nfaces; i++ )
{
const FeatureEvaluator::ScaleData& s = featureEvaluator->getScaleData(fptr[i*3 + 1]);
candidates.push_back(Rect(cvRound(fptr[i*3 + 2]*s.scale),
cvRound(fptr[i*3 + 3]*s.scale),
cvRound(data.origWinSize.width*s.scale),
cvRound(data.origWinSize.height*s.scale)));
}
}
//CV_Assert(ok);
return ok;
}
......@@ -1296,11 +1199,10 @@ void CascadeClassifierImpl::detectMultiScaleNoGrouping( InputArray _image, std::
double scaleFactor, Size minObjectSize, Size maxObjectSize,
bool outputRejectLevels )
{
int featureType = getFeatureType();
Size imgsz = _image.size();
int imgtype = _image.type();
Mat grayImage, imageBuffer;
Mat grayImage;
_InputArray gray;
candidates.clear();
rejectLevels.clear();
......@@ -1309,120 +1211,86 @@ void CascadeClassifierImpl::detectMultiScaleNoGrouping( InputArray _image, std::
if( maxObjectSize.height == 0 || maxObjectSize.width == 0 )
maxObjectSize = imgsz;
bool use_ocl = ocl::useOpenCL() &&
(featureType == FeatureEvaluator::HAAR ||
featureType == FeatureEvaluator::LBP) &&
ocl::Device::getDefault().type() != ocl::Device::TYPE_CPU &&
!isOldFormatCascade() &&
data.isStumpBased() &&
maskGenerator.empty() &&
!outputRejectLevels &&
tryOpenCL;
if( !use_ocl )
{
Mat image = _image.getMat();
if (maskGenerator)
maskGenerator->initializeMask(image);
grayImage = image;
if( CV_MAT_CN(imgtype) > 1 )
{
Mat temp;
cvtColor(grayImage, temp, COLOR_BGR2GRAY);
grayImage = temp;
}
bool use_ocl = tryOpenCL && ocl::useOpenCL() &&
featureEvaluator->getLocalSize().area() > 0 &&
ocl::Device::getDefault().type() != ocl::Device::TYPE_CPU &&
(data.minNodesPerTree == data.maxNodesPerTree) &&
!isOldFormatCascade() &&
maskGenerator.empty() &&
!outputRejectLevels;
imageBuffer.create(imgsz.height + 1, imgsz.width + 1, CV_8U);
}
else
/*if( use_ocl )
{
UMat uimage = _image.getUMat();
if( CV_MAT_CN(imgtype) > 1 )
cvtColor(uimage, ugrayImage, COLOR_BGR2GRAY);
if (_image.channels() > 1)
cvtColor(_image, ugrayImage, COLOR_BGR2GRAY);
else if (_image.isUMat())
ugrayImage = _image.getUMat();
else
uimage.copyTo(ugrayImage);
uimageBuffer.create(imgsz.height + 1, imgsz.width + 1, CV_8U);
_image.copyTo(ugrayImage);
gray = ugrayImage;
}
Size sumSize0((imgsz.width + SUM_ALIGN) & -SUM_ALIGN, imgsz.height+1);
if( use_ocl )
else*/
{
ufacepos.create(1, MAX_FACES*4 + 1, CV_32S);
UMat ufacecount(ufacepos, Rect(0,0,1,1));
ufacecount.setTo(Scalar::all(0));
if (_image.channels() > 1)
cvtColor(_image, grayImage, COLOR_BGR2GRAY);
else if (_image.isMat())
grayImage = _image.getMat();
else
_image.copyTo(grayImage);
gray = grayImage;
}
std::vector<float> scales;
scales.reserve(1024);
for( double factor = 1; ; factor *= scaleFactor )
{
Size originalWindowSize = getOriginalWindowSize();
Size windowSize( cvRound(originalWindowSize.width*factor), cvRound(originalWindowSize.height*factor) );
Size scaledImageSize( cvRound( imgsz.width/factor ), cvRound( imgsz.height/factor ) );
Size processingRectSize( scaledImageSize.width - originalWindowSize.width,
scaledImageSize.height - originalWindowSize.height );
if( processingRectSize.width <= 0 || processingRectSize.height <= 0 )
break;
if( windowSize.width > maxObjectSize.width || windowSize.height > maxObjectSize.height )
if( windowSize.width > maxObjectSize.width || windowSize.height > maxObjectSize.height ||
windowSize.width > imgsz.width || windowSize.height > imgsz.height )
break;
if( windowSize.width < minObjectSize.width || windowSize.height < minObjectSize.height )
continue;
scales.push_back((float)factor);
}
int yStep;
if( getFeatureType() == cv::FeatureEvaluator::HOG )
{
yStep = 4;
}
else
{
yStep = factor > 2. ? 1 : 2;
}
if( use_ocl )
{
UMat uscaledImage(uimageBuffer, Rect(0, 0, scaledImageSize.width, scaledImageSize.height));
resize( ugrayImage, uscaledImage, scaledImageSize, 0, 0, INTER_LINEAR );
if( ocl_detectSingleScale( uscaledImage, processingRectSize, yStep, factor, sumSize0 ) )
continue;
/////// if the OpenCL branch has been executed but failed, fall back to CPU: /////
tryOpenCL = false; // for this cascade do not try OpenCL anymore
// since we may already have some partial results from OpenCL code (unlikely, but still),
// we just recursively call the function again, but with tryOpenCL==false it will
// go with CPU route, so there is no infinite recursion
detectMultiScaleNoGrouping( _image, candidates, rejectLevels, levelWeights,
scaleFactor, minObjectSize, maxObjectSize,
outputRejectLevels);
return;
}
else
{
Mat scaledImage( scaledImageSize, CV_8U, imageBuffer.data );
resize( grayImage, scaledImage, scaledImageSize, 0, 0, INTER_LINEAR );
if( !featureEvaluator->setImage(gray, scales) )
return;
if( !detectSingleScale( scaledImage, processingRectSize, yStep, factor, candidates,
rejectLevels, levelWeights, sumSize0, outputRejectLevels ) )
break;
}
}
// OpenCL code
if( use_ocl && ocl_detectMultiScaleNoGrouping( scales, candidates ))
return;
tryOpenCL = false;
if( use_ocl && tryOpenCL )
// CPU code
featureEvaluator->getMats();
{
Mat facepos = ufacepos.getMat(ACCESS_READ);
const int* fptr = facepos.ptr<int>();
int i, nfaces = fptr[0];
for( i = 0; i < nfaces; i++ )
Mat currentMask;
if (maskGenerator)
currentMask = maskGenerator->generateMask(gray.getMat());
size_t i, nscales = scales.size();
cv::AutoBuffer<int> stripeSizeBuf(nscales);
int* stripeSizes = stripeSizeBuf;
const FeatureEvaluator::ScaleData* s = &featureEvaluator->getScaleData(0);
Size szw = s->getWorkingSize(data.origWinSize);
int nstripes = cvCeil(szw.width/32.);
for( i = 0; i < nscales; i++ )
{
candidates.push_back(Rect(fptr[i*4+1], fptr[i*4+2], fptr[i*4+3], fptr[i*4+4]));
szw = s[i].getWorkingSize(data.origWinSize);
stripeSizes[i] = std::max((szw.height/s[i].ystep + nstripes-1)/nstripes, 1)*s[i].ystep;
}
CascadeClassifierInvoker invoker(*this, (int)nscales, nstripes, s, stripeSizes,
candidates, rejectLevels, levelWeights,
outputRejectLevels, currentMask, &mtx);
parallel_for_(Range(0, nstripes), invoker);
}
}
void CascadeClassifierImpl::detectMultiScale( InputArray _image, std::vector<Rect>& objects,
std::vector<int>& rejectLevels,
std::vector<double>& levelWeights,
......@@ -1462,10 +1330,9 @@ void CascadeClassifierImpl::detectMultiScale( InputArray _image, std::vector<Rec
double scaleFactor, int minNeighbors,
int flags, Size minObjectSize, Size maxObjectSize)
{
Mat image = _image.getMat();
std::vector<int> fakeLevels;
std::vector<double> fakeWeights;
detectMultiScale( image, objects, fakeLevels, fakeWeights, scaleFactor,
detectMultiScale( _image, objects, fakeLevels, fakeWeights, scaleFactor,
minNeighbors, flags, minObjectSize, maxObjectSize );
}
......@@ -1550,6 +1417,7 @@ bool CascadeClassifierImpl::Data::read(const FileNode &root)
stumps.clear();
FileNodeIterator it = fn.begin(), it_end = fn.end();
minNodesPerTree = INT_MAX;
maxNodesPerTree = 0;
for( int si = 0; it != it_end; si++, ++it )
......@@ -1576,6 +1444,7 @@ bool CascadeClassifierImpl::Data::read(const FileNode &root)
DTree tree;
tree.nodeCount = (int)internalNodes.size()/nodeStep;
minNodesPerTree = std::min(minNodesPerTree, tree.nodeCount);
maxNodesPerTree = std::max(maxNodesPerTree, tree.nodeCount);
classifiers.push_back(tree);
......@@ -1613,7 +1482,7 @@ bool CascadeClassifierImpl::Data::read(const FileNode &root)
}
}
if( isStumpBased() )
if( maxNodesPerTree == 1 )
{
int nodeOfs = 0, leafOfs = 0;
size_t nstages = stages.size();
......@@ -1641,7 +1510,8 @@ bool CascadeClassifierImpl::read_(const FileNode& root)
haarKernel = ocl::Kernel();
lbpKernel = ocl::Kernel();
ustages.release();
ustumps.release();
unodes.release();
uleaves.release();
if( !data.read(root) )
return false;
......@@ -1651,7 +1521,7 @@ bool CascadeClassifierImpl::read_(const FileNode& root)
if( fn.empty() )
return false;
return featureEvaluator->read(fn);
return featureEvaluator->read(fn, data.origWinSize);
}
template<> void DefaultDeleter<CvHaarClassifierCascade>::operator ()(CvHaarClassifierCascade* obj) const
......
......@@ -3,6 +3,72 @@
namespace cv
{
class FeatureEvaluator
{
public:
enum
{
HAAR = 0,
LBP = 1,
HOG = 2
};
struct ScaleData
{
ScaleData() { scale = 0.f; layer_ofs = ystep = 0; }
Size getWorkingSize(Size winSize) const
{
return Size(std::max(szi.width - winSize.width, 0),
std::max(szi.height - winSize.height, 0));
}
float scale;
Size szi;
int layer_ofs, ystep;
};
virtual ~FeatureEvaluator();
virtual bool read(const FileNode& node, Size origWinSize);
virtual Ptr<FeatureEvaluator> clone() const;
virtual int getFeatureType() const;
int getNumChannels() const { return nchannels; }
virtual bool setImage(InputArray img, const std::vector<float>& scales);
virtual bool setWindow(Point p, int scaleIdx);
const ScaleData& getScaleData(int scaleIdx) const
{
CV_Assert( 0 <= scaleIdx && scaleIdx < (int)scaleData->size());
return scaleData->at(scaleIdx);
}
virtual void getUMats(std::vector<UMat>& bufs);
virtual void getMats();
Size getLocalSize() const { return localSize; }
Size getLocalBufSize() const { return lbufSize; }
virtual float calcOrd(int featureIdx) const;
virtual int calcCat(int featureIdx) const;
static Ptr<FeatureEvaluator> create(int type);
protected:
enum { SBUF_VALID=1, USBUF_VALID=2 };
int sbufFlag;
bool updateScaleData( Size imgsz, const std::vector<float>& _scales );
virtual void computeChannels( int, InputArray ) {}
virtual void computeOptFeatures() {}
Size origWinSize, sbufSize, localSize, lbufSize;
int nchannels;
Mat sbuf, rbuf;
UMat urbuf, usbuf, ufbuf, uscaleData;
Ptr<std::vector<ScaleData> > scaleData;
};
class CascadeClassifierImpl : public BaseCascadeClassifier
{
public:
......@@ -54,9 +120,8 @@ protected:
int yStep, double factor, std::vector<Rect>& candidates,
std::vector<int>& rejectLevels, std::vector<double>& levelWeights,
Size sumSize0, bool outputRejectLevels = false );
bool ocl_detectSingleScale( InputArray image, Size processingRectSize,
int yStep, double factor, Size sumSize0 );
bool ocl_detectMultiScaleNoGrouping( const std::vector<float>& scales,
std::vector<Rect>& candidates );
void detectMultiScaleNoGrouping( InputArray image, std::vector<Rect>& candidates,
std::vector<int>& rejectLevels, std::vector<double>& levelWeights,
......@@ -72,6 +137,7 @@ protected:
};
friend class CascadeClassifierInvoker;
friend class SparseCascadeClassifierInvoker;
template<class FEval>
friend int predictOrdered( CascadeClassifierImpl& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight);
......@@ -85,7 +151,7 @@ protected:
template<class FEval>
friend int predictCategoricalStump( CascadeClassifierImpl& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight);
int runAt( Ptr<FeatureEvaluator>& feval, Point pt, double& weight );
int runAt( Ptr<FeatureEvaluator>& feval, Point pt, int scaleIdx, double& weight );
class Data
{
......@@ -126,12 +192,10 @@ protected:
bool read(const FileNode &node);
bool isStumpBased() const { return maxNodesPerTree == 1; }
int stageType;
int featureType;
int ncategories;
int maxNodesPerTree;
int minNodesPerTree, maxNodesPerTree;
Size origWinSize;
std::vector<Stage> stages;
......@@ -147,8 +211,8 @@ protected:
Ptr<CvHaarClassifierCascade> oldCascade;
Ptr<MaskGenerator> maskGenerator;
UMat ugrayImage, uimageBuffer;
UMat ufacepos, ustages, ustumps, usubsets;
UMat ugrayImage;
UMat ufacepos, ustages, unodes, uleaves, usubsets;
ocl::Kernel haarKernel, lbpKernel;
bool tryOpenCL;
......@@ -268,7 +332,6 @@ public:
enum { RECT_NUM = Feature::RECT_NUM };
float calc( const int* pwin ) const;
void setOffsets( const Feature& _f, int step, int tofs );
int ofs[RECT_NUM][4];
......@@ -278,35 +341,34 @@ public:
HaarEvaluator();
virtual ~HaarEvaluator();
virtual bool read( const FileNode& node );
virtual bool read( const FileNode& node, Size origWinSize);
virtual Ptr<FeatureEvaluator> clone() const;
virtual int getFeatureType() const { return FeatureEvaluator::HAAR; }
virtual bool setImage(InputArray, Size origWinSize, Size sumSize);
virtual bool setWindow(Point pt);
virtual Rect getNormRect() const;
virtual void getUMats(std::vector<UMat>& bufs);
virtual bool setWindow(Point p, int scaleIdx);
Rect getNormRect() const;
int getSquaresOffset() const;
double operator()(int featureIdx) const
float operator()(int featureIdx) const
{ return optfeaturesPtr[featureIdx].calc(pwin) * varianceNormFactor; }
virtual double calcOrd(int featureIdx) const
virtual float calcOrd(int featureIdx) const
{ return (*this)(featureIdx); }
protected:
Size origWinSize, sumSize0;
virtual void computeChannels( int i, InputArray img );
virtual void computeOptFeatures();
Ptr<std::vector<Feature> > features;
Ptr<std::vector<OptFeature> > optfeatures;
OptFeature* optfeaturesPtr; // optimization
Ptr<std::vector<OptFeature> > optfeatures_lbuf;
bool hasTiltedFeatures;
Mat sum0, sum, sqsum0, sqsum;
UMat usum0, usum, usqsum0, usqsum, ufbuf;
int tofs, sqofs;
Vec4i nofs;
Rect normrect;
int nofs[4];
const int* pwin;
double varianceNormFactor;
OptFeature* optfeaturesPtr; // optimization
float varianceNormFactor;
};
inline HaarEvaluator::Feature :: Feature()
......@@ -336,28 +398,6 @@ inline float HaarEvaluator::OptFeature :: calc( const int* ptr ) const
return ret;
}
inline void HaarEvaluator::OptFeature :: setOffsets( const Feature& _f, int step, int tofs )
{
weight[0] = _f.rect[0].weight;
weight[1] = _f.rect[1].weight;
weight[2] = _f.rect[2].weight;
Rect r2 = weight[2] > 0 ? _f.rect[2].r : Rect(0,0,0,0);
if (_f.tilted)
{
CV_TILTED_OFS( ofs[0][0], ofs[0][1], ofs[0][2], ofs[0][3], tofs, _f.rect[0].r, step );
CV_TILTED_OFS( ofs[1][0], ofs[1][1], ofs[1][2], ofs[1][3], tofs, _f.rect[1].r, step );
CV_TILTED_PTRS( ofs[2][0], ofs[2][1], ofs[2][2], ofs[2][3], tofs, r2, step );
}
else
{
CV_SUM_OFS( ofs[0][0], ofs[0][1], ofs[0][2], ofs[0][3], 0, _f.rect[0].r, step );
CV_SUM_OFS( ofs[1][0], ofs[1][1], ofs[1][2], ofs[1][3], 0, _f.rect[1].r, step );
CV_SUM_OFS( ofs[2][0], ofs[2][1], ofs[2][2], ofs[2][3], 0, r2, step );
}
}
//---------------------------------------------- LBPEvaluator -------------------------------------
class LBPEvaluator : public FeatureEvaluator
......@@ -367,7 +407,7 @@ public:
{
Feature();
Feature( int x, int y, int _block_w, int _block_h ) :
rect(x, y, _block_w, _block_h) {}
rect(x, y, _block_w, _block_h) {}
bool read(const FileNode& node );
......@@ -386,27 +426,25 @@ public:
LBPEvaluator();
virtual ~LBPEvaluator();
virtual bool read( const FileNode& node );
virtual bool read( const FileNode& node, Size origWinSize );
virtual Ptr<FeatureEvaluator> clone() const;
virtual int getFeatureType() const { return FeatureEvaluator::LBP; }
virtual bool setImage(InputArray image, Size _origWinSize, Size);
virtual bool setWindow(Point pt);
virtual void getUMats(std::vector<UMat>& bufs);
virtual bool setWindow(Point p, int scaleIdx);
int operator()(int featureIdx) const
{ return optfeaturesPtr[featureIdx].calc(pwin); }
virtual int calcCat(int featureIdx) const
{ return (*this)(featureIdx); }
protected:
Size origWinSize, sumSize0;
virtual void computeChannels( int i, InputArray img );
virtual void computeOptFeatures();
Ptr<std::vector<Feature> > features;
Ptr<std::vector<OptFeature> > optfeatures;
Ptr<std::vector<OptFeature> > optfeatures_lbuf;
OptFeature* optfeaturesPtr; // optimization
Mat sum0, sum;
UMat usum0, usum, ufbuf;
const int* pwin;
};
......@@ -436,98 +474,6 @@ inline int LBPEvaluator::OptFeature :: calc( const int* p ) const
(CALC_SUM_OFS_( ofs[4], ofs[5], ofs[8], ofs[9], p ) >= cval ? 1 : 0);
}
inline void LBPEvaluator::OptFeature :: setOffsets( const Feature& _f, int step )
{
Rect tr = _f.rect;
CV_SUM_OFS( ofs[0], ofs[1], ofs[4], ofs[5], 0, tr, step );
tr.x += 2*_f.rect.width;
CV_SUM_OFS( ofs[2], ofs[3], ofs[6], ofs[7], 0, tr, step );
tr.y += 2*_f.rect.height;
CV_SUM_OFS( ofs[10], ofs[11], ofs[14], ofs[15], 0, tr, step );
tr.x -= 2*_f.rect.width;
CV_SUM_OFS( ofs[8], ofs[9], ofs[12], ofs[13], 0, tr, step );
}
//---------------------------------------------- HOGEvaluator -------------------------------------------
class HOGEvaluator : public FeatureEvaluator
{
public:
struct Feature
{
Feature();
float calc( int offset ) const;
void updatePtrs( const std::vector<Mat>& _hist, const Mat &_normSum );
bool read( const FileNode& node );
enum { CELL_NUM = 4, BIN_NUM = 9 };
Rect rect[CELL_NUM];
int featComponent; //component index from 0 to 35
const float* pF[4]; //for feature calculation
const float* pN[4]; //for normalization calculation
};
HOGEvaluator();
virtual ~HOGEvaluator();
virtual bool read( const FileNode& node );
virtual Ptr<FeatureEvaluator> clone() const;
virtual int getFeatureType() const { return FeatureEvaluator::HOG; }
virtual bool setImage( InputArray image, Size winSize, Size );
virtual bool setWindow( Point pt );
double operator()(int featureIdx) const
{
return featuresPtr[featureIdx].calc(offset);
}
virtual double calcOrd( int featureIdx ) const
{
return (*this)(featureIdx);
}
private:
virtual void integralHistogram( const Mat& srcImage, std::vector<Mat> &histogram, Mat &norm, int nbins ) const;
Size origWinSize;
Ptr<std::vector<Feature> > features;
Feature* featuresPtr;
std::vector<Mat> hist;
Mat normSum;
int offset;
};
inline HOGEvaluator::Feature :: Feature()
{
rect[0] = rect[1] = rect[2] = rect[3] = Rect();
pF[0] = pF[1] = pF[2] = pF[3] = 0;
pN[0] = pN[1] = pN[2] = pN[3] = 0;
featComponent = 0;
}
inline float HOGEvaluator::Feature :: calc( int _offset ) const
{
float res = CALC_SUM(pF, _offset);
float normFactor = CALC_SUM(pN, _offset);
res = (res > 0.001f) ? (res / ( normFactor + 0.001f) ) : 0.f;
return res;
}
inline void HOGEvaluator::Feature :: updatePtrs( const std::vector<Mat> &_hist, const Mat &_normSum )
{
int binIdx = featComponent % BIN_NUM;
int cellIdx = featComponent / BIN_NUM;
Rect normRect = Rect( rect[0].x, rect[0].y, 2*rect[0].width, 2*rect[0].height );
const float* featBuf = (const float*)_hist[binIdx].data;
size_t featStep = _hist[0].step / sizeof(featBuf[0]);
const float* normBuf = (const float*)_normSum.data;
size_t normStep = _normSum.step / sizeof(normBuf[0]);
CV_SUM_PTRS( pF[0], pF[1], pF[2], pF[3], featBuf, rect[cellIdx], featStep );
CV_SUM_PTRS( pN[0], pN[1], pN[2], pN[3], normBuf, normRect, normStep );
}
//---------------------------------------------- predictor functions -------------------------------------
......@@ -662,11 +608,7 @@ inline int predictCategoricalStump( CascadeClassifierImpl& cascade,
const CascadeClassifierImpl::Data::Stump* cascadeStumps = &cascade.data.stumps[0];
const CascadeClassifierImpl::Data::Stage* cascadeStages = &cascade.data.stages[0];
#ifdef HAVE_TEGRA_OPTIMIZATION
float tmp = 0; // float accumulator -- float operations are quicker
#else
double tmp = 0;
#endif
float tmp = 0;
for( int si = 0; si < nstages; si++ )
{
const CascadeClassifierImpl::Data::Stage& stage = cascadeStages[si];
......
///////////////////////////// OpenCL kernels for face detection //////////////////////////////
////////////////////////////// see the opencv/doc/license.txt ///////////////////////////////
//
// the code has been derived from the OpenCL Haar cascade kernel by
//
// Niko Li, newlife20080214@gmail.com
// Wang Weiyan, wangweiyanster@gmail.com
// Jia Haipeng, jiahaipeng95@gmail.com
// Nathan, liujun@multicorewareinc.com
// Peng Xiao, pengxiao@outlook.com
// Erping Pang, erping@multicorewareinc.com
//
typedef struct __attribute__((aligned(4))) OptHaarFeature
{
int4 ofs[3] __attribute__((aligned (4)));
......@@ -20,6 +32,12 @@ typedef struct __attribute__((aligned(4))) Stump
}
Stump;
typedef struct __attribute__((aligned(4))) Node
{
int4 n __attribute__((aligned (4)));
}
Node;
typedef struct __attribute__((aligned (4))) Stage
{
int first __attribute__((aligned (4)));
......@@ -28,151 +46,614 @@ typedef struct __attribute__((aligned (4))) Stage
}
Stage;
__kernel void runHaarClassifierStump(
typedef struct __attribute__((aligned (4))) ScaleData
{
float scale __attribute__((aligned (4)));
int szi_width __attribute__((aligned (4)));
int szi_height __attribute__((aligned (4)));
int layer_ofs __attribute__((aligned (4)));
int ystep __attribute__((aligned (4)));
}
ScaleData;
#ifndef SUM_BUF_SIZE
#define SUM_BUF_SIZE 0
#endif
#ifndef NODE_COUNT
#define NODE_COUNT 1
#endif
__kernel __attribute__((reqd_work_group_size(LOCAL_SIZE_X,LOCAL_SIZE_Y,1)))
void runHaarClassifier(
int nscales, __global const ScaleData* scaleData,
__global const int* sum,
int sumstep, int sumoffset,
__global const int* sqsum,
int sqsumstep, int sqsumoffset,
int _sumstep, int sumoffset,
__global const OptHaarFeature* optfeatures,
int nstages,
int splitstage, int nstages,
__global const Stage* stages,
__global const Stump* stumps,
__global const Node* nodes,
__global const float* leaves0,
volatile __global int* facepos,
int2 imgsize, int xyscale, float factor,
int4 normrect, int2 windowsize, int maxFaces)
int4 normrect, int sqofs, int2 windowsize, int maxFaces)
{
int ix = get_global_id(0)*xyscale;
int iy = get_global_id(1)*xyscale;
sumstep /= sizeof(int);
sqsumstep /= sizeof(int);
int lx = get_local_id(0);
int ly = get_local_id(1);
int groupIdx = get_group_id(0);
int i, ngroups = get_global_size(0)/LOCAL_SIZE_X;
int scaleIdx, tileIdx, stageIdx;
int sumstep = (int)(_sumstep/sizeof(int));
int4 nofs0 = (int4)(mad24(normrect.y, sumstep, normrect.x),
mad24(normrect.y, sumstep, normrect.x + normrect.z),
mad24(normrect.y + normrect.w, sumstep, normrect.x),
mad24(normrect.y + normrect.w, sumstep, normrect.x + normrect.z));
int normarea = normrect.z * normrect.w;
float invarea = 1.f/normarea;
int lidx = ly*LOCAL_SIZE_X + lx;
if( ix < imgsize.x && iy < imgsize.y )
#if SUM_BUF_SIZE > 0
int4 nofs = (int4)(mad24(normrect.y, SUM_BUF_STEP, normrect.x),
mad24(normrect.y, SUM_BUF_STEP, normrect.x + normrect.z),
mad24(normrect.y + normrect.w, SUM_BUF_STEP, normrect.x),
mad24(normrect.y + normrect.w, SUM_BUF_STEP, normrect.x + normrect.z));
#else
int4 nofs = nofs0;
#endif
#define LOCAL_SIZE (LOCAL_SIZE_X*LOCAL_SIZE_Y)
__local int lstore[SUM_BUF_SIZE + LOCAL_SIZE*5/2+1];
#if SUM_BUF_SIZE > 0
__local int* ibuf = lstore;
__local int* lcount = ibuf + SUM_BUF_SIZE;
#else
__local int* lcount = lstore;
#endif
__local float* lnf = (__local float*)(lcount + 1);
__local float* lpartsum = lnf + LOCAL_SIZE;
__local short* lbuf = (__local short*)(lpartsum + LOCAL_SIZE);
for( scaleIdx = nscales-1; scaleIdx >= 0; scaleIdx-- )
{
int stageIdx;
__global const Stump* stump = stumps;
__global const int* psum = sum + mad24(iy, sumstep, ix);
__global const int* pnsum = psum + mad24(normrect.y, sumstep, normrect.x);
int normarea = normrect.z * normrect.w;
float invarea = 1.f/normarea;
float sval = (pnsum[0] - pnsum[normrect.z] - pnsum[mul24(normrect.w, sumstep)] +
pnsum[mad24(normrect.w, sumstep, normrect.z)])*invarea;
float sqval = (sqsum[mad24(iy + normrect.y, sqsumstep, ix + normrect.x)])*invarea;
float nf = (float)normarea * sqrt(max(sqval - sval * sval, 0.f));
nf = nf > 0 ? nf : 1.f;
for( stageIdx = 0; stageIdx < nstages; stageIdx++ )
__global const ScaleData* s = scaleData + scaleIdx;
int ystep = s->ystep;
int2 worksize = (int2)(max(s->szi_width - windowsize.x, 0), max(s->szi_height - windowsize.y, 0));
int2 ntiles = (int2)((worksize.x + LOCAL_SIZE_X-1)/LOCAL_SIZE_X,
(worksize.y + LOCAL_SIZE_Y-1)/LOCAL_SIZE_Y);
int totalTiles = ntiles.x*ntiles.y;
for( tileIdx = groupIdx; tileIdx < totalTiles; tileIdx += ngroups )
{
int i, ntrees = stages[stageIdx].ntrees;
float s = 0.f;
for( i = 0; i < ntrees; i++, stump++ )
int ix0 = (tileIdx % ntiles.x)*LOCAL_SIZE_X;
int iy0 = (tileIdx / ntiles.x)*LOCAL_SIZE_Y;
int ix = lx, iy = ly;
__global const int* psum0 = sum + mad24(iy0, sumstep, ix0) + s->layer_ofs;
__global const int* psum1 = psum0 + mad24(iy, sumstep, ix);
if( ix0 >= worksize.x || iy0 >= worksize.y )
continue;
#if SUM_BUF_SIZE > 0
for( i = lidx*4; i < SUM_BUF_SIZE; i += LOCAL_SIZE_X*LOCAL_SIZE_Y*4 )
{
float4 st = stump->st;
__global const OptHaarFeature* f = optfeatures + as_int(st.x);
float4 weight = f->weight;
int4 ofs = f->ofs[0];
sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x;
ofs = f->ofs[1];
sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.y;
if( weight.z > 0 )
int dy = i/SUM_BUF_STEP, dx = i - dy*SUM_BUF_STEP;
vstore4(vload4(0, psum0 + mad24(dy, sumstep, dx)), 0, ibuf+i);
}
barrier(CLK_LOCAL_MEM_FENCE);
#endif
if( lidx == 0 )
lcount[0] = 0;
barrier(CLK_LOCAL_MEM_FENCE);
if( ix0 + ix < worksize.x && iy0 + iy < worksize.y )
{
#if NODE_COUNT==1
__global const Stump* stump = (__global const Stump*)nodes;
#else
__global const Node* node = nodes;
__global const float* leaves = leaves0;
#endif
#if SUM_BUF_SIZE > 0
__local const int* psum = ibuf + mad24(iy, SUM_BUF_STEP, ix);
#else
__global const int* psum = psum1;
#endif
__global const float* psqsum = (__global const float*)(psum1 + sqofs);
float sval = (psum[nofs.x] - psum[nofs.y] - psum[nofs.z] + psum[nofs.w])*invarea;
float sqval = (psqsum[nofs0.x] - psqsum[nofs0.y] - psqsum[nofs0.z] + psqsum[nofs0.w])*invarea;
float nf = (float)normarea * sqrt(max(sqval - sval * sval, 0.f));
nf = nf > 0 ? nf : 1.f;
for( stageIdx = 0; stageIdx < splitstage; stageIdx++ )
{
ofs = f->ofs[2];
sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.z;
int ntrees = stages[stageIdx].ntrees;
float s = 0.f;
#if NODE_COUNT==1
for( i = 0; i < ntrees; i++ )
{
float4 st = stump[i].st;
__global const OptHaarFeature* f = optfeatures + as_int(st.x);
float4 weight = f->weight;
int4 ofs = f->ofs[0];
sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x;
ofs = f->ofs[1];
sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.y;
if( weight.z > 0 )
{
ofs = f->ofs[2];
sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.z;
}
s += (sval < st.y*nf) ? st.z : st.w;
}
stump += ntrees;
#else
for( i = 0; i < ntrees; i++, node += NODE_COUNT, leaves += NODE_COUNT+1 )
{
int idx = 0;
do
{
int4 n = node[idx].n;
__global const OptHaarFeature* f = optfeatures + n.x;
float4 weight = f->weight;
int4 ofs = f->ofs[0];
sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x;
ofs = f->ofs[1];
sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.y;
if( weight.z > 0 )
{
ofs = f->ofs[2];
sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.z;
}
idx = (sval < as_float(n.y)*nf) ? n.z : n.w;
}
while(idx > 0);
s += leaves[-idx];
}
#endif
if( s < stages[stageIdx].threshold )
break;
}
s += (sval < st.y*nf) ? st.z : st.w;
if( stageIdx == splitstage && (ystep == 1 || ((ix | iy) & 1) == 0) )
{
int count = atomic_inc(lcount);
lbuf[count] = (int)(ix | (iy << 8));
lnf[count] = nf;
}
}
if( s < stages[stageIdx].threshold )
break;
}
for( stageIdx = splitstage; stageIdx < nstages; stageIdx++ )
{
int nrects = lcount[0];
if( stageIdx == nstages )
{
int nfaces = atomic_inc(facepos);
if( nfaces < maxFaces )
barrier(CLK_LOCAL_MEM_FENCE);
if( nrects == 0 )
break;
if( lidx == 0 )
lcount[0] = 0;
{
#if NODE_COUNT == 1
__global const Stump* stump = (__global const Stump*)nodes + stages[stageIdx].first;
#else
__global const Node* node = nodes + stages[stageIdx].first*NODE_COUNT;
__global const float* leaves = leaves0 + stages[stageIdx].first*(NODE_COUNT+1);
#endif
int nparts = LOCAL_SIZE / nrects;
int ntrees = stages[stageIdx].ntrees;
int ntrees_p = (ntrees + nparts - 1)/nparts;
int nr = lidx / nparts;
int partidx = -1, idxval = 0;
float partsum = 0.f, nf = 0.f;
if( nr < nrects )
{
partidx = lidx % nparts;
idxval = lbuf[nr];
nf = lnf[nr];
{
int ntrees0 = ntrees_p*partidx;
int ntrees1 = min(ntrees0 + ntrees_p, ntrees);
int ix1 = idxval & 255, iy1 = idxval >> 8;
#if SUM_BUF_SIZE > 0
__local const int* psum = ibuf + mad24(iy1, SUM_BUF_STEP, ix1);
#else
__global const int* psum = psum0 + mad24(iy1, sumstep, ix1);
#endif
#if NODE_COUNT == 1
for( i = ntrees0; i < ntrees1; i++ )
{
float4 st = stump[i].st;
__global const OptHaarFeature* f = optfeatures + as_int(st.x);
float4 weight = f->weight;
int4 ofs = f->ofs[0];
float sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x;
ofs = f->ofs[1];
sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.y;
//if( weight.z > 0 )
{
ofs = f->ofs[2];
sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.z;
}
partsum += (sval < st.y*nf) ? st.z : st.w;
}
#else
for( i = ntrees0; i < ntrees1; i++ )
{
int idx = 0;
do
{
int4 n = node[i*2 + idx].n;
__global const OptHaarFeature* f = optfeatures + n.x;
float4 weight = f->weight;
int4 ofs = f->ofs[0];
float sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x;
ofs = f->ofs[1];
sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.y;
if( weight.z > 0 )
{
ofs = f->ofs[2];
sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.z;
}
idx = (sval < as_float(n.y)*nf) ? n.z : n.w;
}
while(idx > 0);
partsum += leaves[i*3-idx];
}
#endif
}
}
lpartsum[lidx] = partsum;
barrier(CLK_LOCAL_MEM_FENCE);
if( partidx == 0 )
{
float s = lpartsum[nr*nparts];
for( i = 1; i < nparts; i++ )
s += lpartsum[i + nr*nparts];
if( s >= stages[stageIdx].threshold )
{
int count = atomic_inc(lcount);
lbuf[count] = idxval;
lnf[count] = nf;
}
}
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if( stageIdx == nstages )
{
volatile __global int* face = facepos + 1 + nfaces*4;
face[0] = convert_int_rte(ix*factor);
face[1] = convert_int_rte(iy*factor);
face[2] = convert_int_rte(windowsize.x*factor);
face[3] = convert_int_rte(windowsize.y*factor);
int nrects = lcount[0];
if( lidx < nrects )
{
int nfaces = atomic_inc(facepos);
if( nfaces < maxFaces )
{
volatile __global int* face = facepos + 1 + nfaces*3;
int val = lbuf[lidx];
face[0] = scaleIdx;
face[1] = ix0 + (val & 255);
face[2] = iy0 + (val >> 8);
}
}
}
}
}
}
#undef CALC_SUM_OFS_
#define CALC_SUM_OFS_(p0, p1, p2, p3, ptr) \
((ptr)[p0] - (ptr)[p1] - (ptr)[p2] + (ptr)[p3])
__kernel void runLBPClassifierStump(
__kernel void runLBPClassifierStumpSimple(
int nscales, __global const ScaleData* scaleData,
__global const int* sum,
int sumstep, int sumoffset,
int _sumstep, int sumoffset,
__global const OptLBPFeature* optfeatures,
int nstages,
int splitstage, int nstages,
__global const Stage* stages,
__global const Stump* stumps,
__global const int* bitsets,
int bitsetSize,
volatile __global int* facepos,
int2 imgsize, int xyscale, float factor,
int2 windowsize, int maxFaces)
{
int ix = get_global_id(0)*xyscale;
int iy = get_global_id(1)*xyscale;
sumstep /= sizeof(int);
int lx = get_local_id(0);
int ly = get_local_id(1);
int local_size_x = get_local_size(0);
int local_size_y = get_local_size(1);
int groupIdx = get_group_id(1)*get_num_groups(0) + get_group_id(0);
int ngroups = get_num_groups(0)*get_num_groups(1);
int scaleIdx, tileIdx, stageIdx;
int startStage = 0, endStage = nstages;
int sumstep = (int)(_sumstep/sizeof(int));
if( ix < imgsize.x && iy < imgsize.y )
for( scaleIdx = nscales-1; scaleIdx >= 0; scaleIdx-- )
{
int stageIdx;
__global const Stump* stump = stumps;
__global const int* p = sum + mad24(iy, sumstep, ix);
__global const ScaleData* s = scaleData + scaleIdx;
int ystep = s->ystep;
int2 worksize = (int2)(max(s->szi_width - windowsize.x, 0), max(s->szi_height - windowsize.y, 0));
int2 ntiles = (int2)((worksize.x/ystep + local_size_x-1)/local_size_x,
(worksize.y/ystep + local_size_y-1)/local_size_y);
int totalTiles = ntiles.x*ntiles.y;
for( stageIdx = 0; stageIdx < nstages; stageIdx++ )
for( tileIdx = groupIdx; tileIdx < totalTiles; tileIdx += ngroups )
{
int i, ntrees = stages[stageIdx].ntrees;
float s = 0.f;
for( i = 0; i < ntrees; i++, stump++, bitsets += bitsetSize )
int iy = ((tileIdx / ntiles.x)*local_size_y + ly)*ystep;
int ix = ((tileIdx % ntiles.x)*local_size_x + lx)*ystep;
if( ix < worksize.x && iy < worksize.y )
{
float4 st = stump->st;
__global const OptLBPFeature* f = optfeatures + as_int(st.x);
int16 ofs = f->ofs;
__global const int* p = sum + mad24(iy, sumstep, ix) + s->layer_ofs;
__global const Stump* stump = stumps;
__global const int* bitset = bitsets;
#define CALC_SUM_OFS_(p0, p1, p2, p3, ptr) \
((ptr)[p0] - (ptr)[p1] - (ptr)[p2] + (ptr)[p3])
for( stageIdx = 0; stageIdx < endStage; stageIdx++ )
{
int i, ntrees = stages[stageIdx].ntrees;
float s = 0.f;
for( i = 0; i < ntrees; i++, stump++, bitset += bitsetSize )
{
float4 st = stump->st;
__global const OptLBPFeature* f = optfeatures + as_int(st.x);
int16 ofs = f->ofs;
int cval = CALC_SUM_OFS_( ofs.s5, ofs.s6, ofs.s9, ofs.sa, p );
int cval = CALC_SUM_OFS_( ofs.s5, ofs.s6, ofs.s9, ofs.sa, p );
int mask, idx = (CALC_SUM_OFS_( ofs.s0, ofs.s1, ofs.s4, ofs.s5, p ) >= cval ? 4 : 0); // 0
idx |= (CALC_SUM_OFS_( ofs.s1, ofs.s2, ofs.s5, ofs.s6, p ) >= cval ? 2 : 0); // 1
idx |= (CALC_SUM_OFS_( ofs.s2, ofs.s3, ofs.s6, ofs.s7, p ) >= cval ? 1 : 0); // 2
int mask, idx = (CALC_SUM_OFS_( ofs.s0, ofs.s1, ofs.s4, ofs.s5, p ) >= cval ? 4 : 0); // 0
idx |= (CALC_SUM_OFS_( ofs.s1, ofs.s2, ofs.s5, ofs.s6, p ) >= cval ? 2 : 0); // 1
idx |= (CALC_SUM_OFS_( ofs.s2, ofs.s3, ofs.s6, ofs.s7, p ) >= cval ? 1 : 0); // 2
mask = (CALC_SUM_OFS_( ofs.s6, ofs.s7, ofs.sa, ofs.sb, p ) >= cval ? 16 : 0); // 5
mask |= (CALC_SUM_OFS_( ofs.sa, ofs.sb, ofs.se, ofs.sf, p ) >= cval ? 8 : 0); // 8
mask |= (CALC_SUM_OFS_( ofs.s9, ofs.sa, ofs.sd, ofs.se, p ) >= cval ? 4 : 0); // 7
mask |= (CALC_SUM_OFS_( ofs.s8, ofs.s9, ofs.sc, ofs.sd, p ) >= cval ? 2 : 0); // 6
mask |= (CALC_SUM_OFS_( ofs.s4, ofs.s5, ofs.s8, ofs.s9, p ) >= cval ? 1 : 0); // 7
mask = (CALC_SUM_OFS_( ofs.s6, ofs.s7, ofs.sa, ofs.sb, p ) >= cval ? 16 : 0); // 5
mask |= (CALC_SUM_OFS_( ofs.sa, ofs.sb, ofs.se, ofs.sf, p ) >= cval ? 8 : 0); // 8
mask |= (CALC_SUM_OFS_( ofs.s9, ofs.sa, ofs.sd, ofs.se, p ) >= cval ? 4 : 0); // 7
mask |= (CALC_SUM_OFS_( ofs.s8, ofs.s9, ofs.sc, ofs.sd, p ) >= cval ? 2 : 0); // 6
mask |= (CALC_SUM_OFS_( ofs.s4, ofs.s5, ofs.s8, ofs.s9, p ) >= cval ? 1 : 0); // 7
s += (bitsets[idx] & (1 << mask)) ? st.z : st.w;
}
s += (bitset[idx] & (1 << mask)) ? st.z : st.w;
}
if( s < stages[stageIdx].threshold )
break;
}
if( s < stages[stageIdx].threshold )
break;
if( stageIdx == nstages )
{
int nfaces = atomic_inc(facepos);
if( nfaces < maxFaces )
{
volatile __global int* face = facepos + 1 + nfaces*3;
face[0] = scaleIdx;
face[1] = ix;
face[2] = iy;
}
}
}
}
}
}
__kernel __attribute__((reqd_work_group_size(LOCAL_SIZE_X,LOCAL_SIZE_Y,1)))
void runLBPClassifierStump(
int nscales, __global const ScaleData* scaleData,
__global const int* sum,
int _sumstep, int sumoffset,
__global const OptLBPFeature* optfeatures,
int splitstage, int nstages,
__global const Stage* stages,
__global const Stump* stumps,
__global const int* bitsets,
int bitsetSize,
volatile __global int* facepos,
int2 windowsize, int maxFaces)
{
int lx = get_local_id(0);
int ly = get_local_id(1);
int groupIdx = get_group_id(0);
int i, ngroups = get_global_size(0)/LOCAL_SIZE_X;
int scaleIdx, tileIdx, stageIdx;
int sumstep = (int)(_sumstep/sizeof(int));
int lidx = ly*LOCAL_SIZE_X + lx;
if( stageIdx == nstages )
#define LOCAL_SIZE (LOCAL_SIZE_X*LOCAL_SIZE_Y)
__local int lstore[SUM_BUF_SIZE + LOCAL_SIZE*3/2+1];
#if SUM_BUF_SIZE > 0
__local int* ibuf = lstore;
__local int* lcount = ibuf + SUM_BUF_SIZE;
#else
__local int* lcount = lstore;
#endif
__local float* lpartsum = (__local float*)(lcount + 1);
__local short* lbuf = (__local short*)(lpartsum + LOCAL_SIZE);
for( scaleIdx = nscales-1; scaleIdx >= 0; scaleIdx-- )
{
__global const ScaleData* s = scaleData + scaleIdx;
int ystep = s->ystep;
int2 worksize = (int2)(max(s->szi_width - windowsize.x, 0), max(s->szi_height - windowsize.y, 0));
int2 ntiles = (int2)((worksize.x + LOCAL_SIZE_X-1)/LOCAL_SIZE_X,
(worksize.y + LOCAL_SIZE_Y-1)/LOCAL_SIZE_Y);
int totalTiles = ntiles.x*ntiles.y;
for( tileIdx = groupIdx; tileIdx < totalTiles; tileIdx += ngroups )
{
int nfaces = atomic_inc(facepos);
if( nfaces < maxFaces )
int ix0 = (tileIdx % ntiles.x)*LOCAL_SIZE_X;
int iy0 = (tileIdx / ntiles.x)*LOCAL_SIZE_Y;
int ix = lx, iy = ly;
__global const int* psum0 = sum + mad24(iy0, sumstep, ix0) + s->layer_ofs;
if( ix0 >= worksize.x || iy0 >= worksize.y )
continue;
#if SUM_BUF_SIZE > 0
for( i = lidx*4; i < SUM_BUF_SIZE; i += LOCAL_SIZE_X*LOCAL_SIZE_Y*4 )
{
int dy = i/SUM_BUF_STEP, dx = i - dy*SUM_BUF_STEP;
vstore4(vload4(0, psum0 + mad24(dy, sumstep, dx)), 0, ibuf+i);
}
barrier(CLK_LOCAL_MEM_FENCE);
#endif
if( lidx == 0 )
lcount[0] = 0;
barrier(CLK_LOCAL_MEM_FENCE);
if( ix0 + ix < worksize.x && iy0 + iy < worksize.y )
{
__global const Stump* stump = stumps;
__global const int* bitset = bitsets;
#if SUM_BUF_SIZE > 0
__local const int* p = ibuf + mad24(iy, SUM_BUF_STEP, ix);
#else
__global const int* p = psum0 + mad24(iy, sumstep, ix);
#endif
for( stageIdx = 0; stageIdx < splitstage; stageIdx++ )
{
int ntrees = stages[stageIdx].ntrees;
float s = 0.f;
for( i = 0; i < ntrees; i++, stump++, bitset += bitsetSize )
{
float4 st = stump->st;
__global const OptLBPFeature* f = optfeatures + as_int(st.x);
int16 ofs = f->ofs;
int cval = CALC_SUM_OFS_( ofs.s5, ofs.s6, ofs.s9, ofs.sa, p );
int mask, idx = (CALC_SUM_OFS_( ofs.s0, ofs.s1, ofs.s4, ofs.s5, p ) >= cval ? 4 : 0); // 0
idx |= (CALC_SUM_OFS_( ofs.s1, ofs.s2, ofs.s5, ofs.s6, p ) >= cval ? 2 : 0); // 1
idx |= (CALC_SUM_OFS_( ofs.s2, ofs.s3, ofs.s6, ofs.s7, p ) >= cval ? 1 : 0); // 2
mask = (CALC_SUM_OFS_( ofs.s6, ofs.s7, ofs.sa, ofs.sb, p ) >= cval ? 16 : 0); // 5
mask |= (CALC_SUM_OFS_( ofs.sa, ofs.sb, ofs.se, ofs.sf, p ) >= cval ? 8 : 0); // 8
mask |= (CALC_SUM_OFS_( ofs.s9, ofs.sa, ofs.sd, ofs.se, p ) >= cval ? 4 : 0); // 7
mask |= (CALC_SUM_OFS_( ofs.s8, ofs.s9, ofs.sc, ofs.sd, p ) >= cval ? 2 : 0); // 6
mask |= (CALC_SUM_OFS_( ofs.s4, ofs.s5, ofs.s8, ofs.s9, p ) >= cval ? 1 : 0); // 7
s += (bitset[idx] & (1 << mask)) ? st.z : st.w;
}
if( s < stages[stageIdx].threshold )
break;
}
if( stageIdx == splitstage && (ystep == 1 || ((ix | iy) & 1) == 0) )
{
int count = atomic_inc(lcount);
lbuf[count] = (int)(ix | (iy << 8));
}
}
for( stageIdx = splitstage; stageIdx < nstages; stageIdx++ )
{
volatile __global int* face = facepos + 1 + nfaces*4;
face[0] = convert_int_rte(ix*factor);
face[1] = convert_int_rte(iy*factor);
face[2] = convert_int_rte(windowsize.x*factor);
face[3] = convert_int_rte(windowsize.y*factor);
int nrects = lcount[0];
barrier(CLK_LOCAL_MEM_FENCE);
if( nrects == 0 )
break;
if( lidx == 0 )
lcount[0] = 0;
{
__global const Stump* stump = stumps + stages[stageIdx].first;
__global const int* bitset = bitsets + stages[stageIdx].first*bitsetSize;
int nparts = LOCAL_SIZE / nrects;
int ntrees = stages[stageIdx].ntrees;
int ntrees_p = (ntrees + nparts - 1)/nparts;
int nr = lidx / nparts;
int partidx = -1, idxval = 0;
float partsum = 0.f, nf = 0.f;
if( nr < nrects )
{
partidx = lidx % nparts;
idxval = lbuf[nr];
{
int ntrees0 = ntrees_p*partidx;
int ntrees1 = min(ntrees0 + ntrees_p, ntrees);
int ix1 = idxval & 255, iy1 = idxval >> 8;
#if SUM_BUF_SIZE > 0
__local const int* p = ibuf + mad24(iy1, SUM_BUF_STEP, ix1);
#else
__global const int* p = psum0 + mad24(iy1, sumstep, ix1);
#endif
for( i = ntrees0; i < ntrees1; i++ )
{
float4 st = stump[i].st;
__global const OptLBPFeature* f = optfeatures + as_int(st.x);
int16 ofs = f->ofs;
#define CALC_SUM_OFS_(p0, p1, p2, p3, ptr) \
((ptr)[p0] - (ptr)[p1] - (ptr)[p2] + (ptr)[p3])
int cval = CALC_SUM_OFS_( ofs.s5, ofs.s6, ofs.s9, ofs.sa, p );
int mask, idx = (CALC_SUM_OFS_( ofs.s0, ofs.s1, ofs.s4, ofs.s5, p ) >= cval ? 4 : 0); // 0
idx |= (CALC_SUM_OFS_( ofs.s1, ofs.s2, ofs.s5, ofs.s6, p ) >= cval ? 2 : 0); // 1
idx |= (CALC_SUM_OFS_( ofs.s2, ofs.s3, ofs.s6, ofs.s7, p ) >= cval ? 1 : 0); // 2
mask = (CALC_SUM_OFS_( ofs.s6, ofs.s7, ofs.sa, ofs.sb, p ) >= cval ? 16 : 0); // 5
mask |= (CALC_SUM_OFS_( ofs.sa, ofs.sb, ofs.se, ofs.sf, p ) >= cval ? 8 : 0); // 8
mask |= (CALC_SUM_OFS_( ofs.s9, ofs.sa, ofs.sd, ofs.se, p ) >= cval ? 4 : 0); // 7
mask |= (CALC_SUM_OFS_( ofs.s8, ofs.s9, ofs.sc, ofs.sd, p ) >= cval ? 2 : 0); // 6
mask |= (CALC_SUM_OFS_( ofs.s4, ofs.s5, ofs.s8, ofs.s9, p ) >= cval ? 1 : 0); // 7
partsum += (bitset[i*bitsetSize + idx] & (1 << mask)) ? st.z : st.w;
}
}
}
lpartsum[lidx] = partsum;
barrier(CLK_LOCAL_MEM_FENCE);
if( partidx == 0 )
{
float s = lpartsum[nr*nparts];
for( i = 1; i < nparts; i++ )
s += lpartsum[i + nr*nparts];
if( s >= stages[stageIdx].threshold )
{
int count = atomic_inc(lcount);
lbuf[count] = idxval;
}
}
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if( stageIdx == nstages )
{
int nrects = lcount[0];
if( lidx < nrects )
{
int nfaces = atomic_inc(facepos);
if( nfaces < maxFaces )
{
volatile __global int* face = facepos + 1 + nfaces*3;
int val = lbuf[lidx];
face[0] = scaleIdx;
face[1] = ix0 + (val & 255);
face[2] = iy0 + (val >> 8);
}
}
}
}
}
......
......@@ -257,6 +257,7 @@ int CV_DetectorTest::runTestCase( int detectorIdx, vector<vector<Rect> >& object
string dataPath = ts->get_data_path(), detectorFilename;
if( !detectorFilenames[detectorIdx].empty() )
detectorFilename = dataPath + detectorFilenames[detectorIdx];
printf("detector %s\n", detectorFilename.c_str());
for( int ii = 0; ii < (int)imageFilenames.size(); ++ii )
{
......
......@@ -231,9 +231,14 @@ void detectAndDraw( UMat& img, Mat& canvas, CascadeClassifier& cascade,
smallImg.copyTo(canvas);
double fps = getTickFrequency()/t;
static double avgfps = 0;
static int nframes = 0;
nframes++;
double alpha = nframes > 50 ? 0.01 : 1./nframes;
avgfps = avgfps*(1-alpha) + fps*alpha;
putText(canvas, format("OpenCL: %s, fps: %.1f", ocl::useOpenCL() ? "ON" : "OFF", fps), Point(250, 50),
FONT_HERSHEY_SIMPLEX, 1, Scalar(0,255,0), 3);
putText(canvas, format("OpenCL: %s, fps: %.1f", ocl::useOpenCL() ? "ON" : "OFF", avgfps), Point(50, 30),
FONT_HERSHEY_SIMPLEX, 0.8, Scalar(0,255,0), 2);
for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ )
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment