Commit 30593ee5 authored by Vadim Pisarevsky's avatar Vadim Pisarevsky

3rd attempt to prepare patch with improved OpenCL kernels of CascadeClassifier.

parent 0fef7f8b
...@@ -615,7 +615,7 @@ static void* initOpenCLAndLoad(const char* funcname) ...@@ -615,7 +615,7 @@ static void* initOpenCLAndLoad(const char* funcname)
initialized = true; initialized = true;
g_haveOpenCL = handle != 0 && dlsym(handle, oclFuncToCheck) != 0; g_haveOpenCL = handle != 0 && dlsym(handle, oclFuncToCheck) != 0;
if( g_haveOpenCL ) if( g_haveOpenCL )
fprintf(stderr, "Succesffuly loaded OpenCL v1.1+ runtime from %s\n", oclpath); fprintf(stderr, "Successfully loaded OpenCL v1.1+ runtime from %s\n", oclpath);
else else
fprintf(stderr, "Failed to load OpenCL runtime\n"); fprintf(stderr, "Failed to load OpenCL runtime\n");
} }
...@@ -1335,11 +1335,13 @@ inline bool operator < (const HashKey& h1, const HashKey& h2) ...@@ -1335,11 +1335,13 @@ inline bool operator < (const HashKey& h1, const HashKey& h2)
return h1.a < h2.a || (h1.a == h2.a && h1.b < h2.b); return h1.a < h2.a || (h1.a == h2.a && h1.b < h2.b);
} }
static bool g_isOpenCLInitialized = false;
static bool g_isOpenCLAvailable = false;
bool haveOpenCL() bool haveOpenCL()
{ {
#ifdef HAVE_OPENCL
static bool g_isOpenCLInitialized = false;
static bool g_isOpenCLAvailable = false;
if (!g_isOpenCLInitialized) if (!g_isOpenCLInitialized)
{ {
try try
...@@ -1354,6 +1356,9 @@ bool haveOpenCL() ...@@ -1354,6 +1356,9 @@ bool haveOpenCL()
g_isOpenCLInitialized = true; g_isOpenCLInitialized = true;
} }
return g_isOpenCLAvailable; return g_isOpenCLAvailable;
#else
return false;
#endif
} }
bool useOpenCL() bool useOpenCL()
......
...@@ -32,112 +32,6 @@ The following reference is for the detection part only. There is a separate appl ...@@ -32,112 +32,6 @@ The following reference is for the detection part only. There is a separate appl
.. [Lienhart02] Rainer Lienhart and Jochen Maydt. An Extended Set of Haar-like Features for Rapid Object Detection. IEEE ICIP 2002, Vol. 1, pp. 900-903, Sep. 2002. This paper, as well as the extended technical report, can be retrieved at http://www.multimedia-computing.de/mediawiki//images/5/52/MRL-TR-May02-revised-Dec02.pdf .. [Lienhart02] Rainer Lienhart and Jochen Maydt. An Extended Set of Haar-like Features for Rapid Object Detection. IEEE ICIP 2002, Vol. 1, pp. 900-903, Sep. 2002. This paper, as well as the extended technical report, can be retrieved at http://www.multimedia-computing.de/mediawiki//images/5/52/MRL-TR-May02-revised-Dec02.pdf
FeatureEvaluator
----------------
.. ocv:class:: FeatureEvaluator
Base class for computing feature values in cascade classifiers. ::
class CV_EXPORTS FeatureEvaluator
{
public:
enum { HAAR = 0, LBP = 1 }; // supported feature types
virtual ~FeatureEvaluator(); // destructor
virtual bool read(const FileNode& node);
virtual Ptr<FeatureEvaluator> clone() const;
virtual int getFeatureType() const;
virtual bool setImage(const Mat& img, Size origWinSize);
virtual bool setWindow(Point p);
virtual double calcOrd(int featureIdx) const;
virtual int calcCat(int featureIdx) const;
static Ptr<FeatureEvaluator> create(int type);
};
FeatureEvaluator::read
--------------------------
Reads parameters of features from the ``FileStorage`` node.
.. ocv:function:: bool FeatureEvaluator::read(const FileNode& node)
:param node: File node from which the feature parameters are read.
FeatureEvaluator::clone
---------------------------
Returns a full copy of the feature evaluator.
.. ocv:function:: Ptr<FeatureEvaluator> FeatureEvaluator::clone() const
FeatureEvaluator::getFeatureType
------------------------------------
Returns the feature type (``HAAR`` or ``LBP`` for now).
.. ocv:function:: int FeatureEvaluator::getFeatureType() const
FeatureEvaluator::setImage
------------------------------
Assigns an image to feature evaluator.
.. ocv:function:: bool FeatureEvaluator::setImage(InputArray img, Size origWinSize, Size sumSize)
:param img: Matrix of the type ``CV_8UC1`` containing an image where the features are computed.
:param origWinSize: Size of training images.
:param sumSize: The requested size of integral images (so if the integral image is smaller, it resides in the top-left corner of the larger image of requested size). Because the features are represented using offsets from the image origin, using the same sumSize for all scales helps to avoid constant readjustments of the features to different scales.
The method assigns an image, where the features will be computed, to the feature evaluator.
FeatureEvaluator::setWindow
-------------------------------
Assigns a window in the current image where the features will be computed.
.. ocv:function:: bool FeatureEvaluator::setWindow(Point p)
:param p: Upper left point of the window where the features are computed. Size of the window is equal to the size of training images.
FeatureEvaluator::calcOrd
-----------------------------
Computes the value of an ordered (numerical) feature.
.. ocv:function:: double FeatureEvaluator::calcOrd(int featureIdx) const
:param featureIdx: Index of the feature whose value is computed.
The function returns the computed value of an ordered feature.
FeatureEvaluator::calcCat
-----------------------------
Computes the value of a categorical feature.
.. ocv:function:: int FeatureEvaluator::calcCat(int featureIdx) const
:param featureIdx: Index of the feature whose value is computed.
The function returns the computed label of a categorical feature, which is the value from [0,... (number of categories - 1)].
FeatureEvaluator::create
----------------------------
Constructs the feature evaluator.
.. ocv:function:: Ptr<FeatureEvaluator> FeatureEvaluator::create(int type)
:param type: Type of features evaluated by cascade (``HAAR`` or ``LBP`` for now).
CascadeClassifier CascadeClassifier
----------------- -----------------
.. ocv:class:: CascadeClassifier .. ocv:class:: CascadeClassifier
......
...@@ -121,29 +121,6 @@ CV_EXPORTS void groupRectangles_meanshift(std::vector<Rect>& rectList, std::ve ...@@ -121,29 +121,6 @@ CV_EXPORTS void groupRectangles_meanshift(std::vector<Rect>& rectList, std::ve
std::vector<double>& foundScales, std::vector<double>& foundScales,
double detectThreshold = 0.0, Size winDetSize = Size(64, 128)); double detectThreshold = 0.0, Size winDetSize = Size(64, 128));
class CV_EXPORTS FeatureEvaluator
{
public:
enum { HAAR = 0,
LBP = 1,
HOG = 2
};
virtual ~FeatureEvaluator();
virtual bool read(const FileNode& node);
virtual Ptr<FeatureEvaluator> clone() const;
virtual int getFeatureType() const;
virtual bool setImage(InputArray img, Size origWinSize, Size sumSize);
virtual bool setWindow(Point p);
virtual double calcOrd(int featureIdx) const;
virtual int calcCat(int featureIdx) const;
static Ptr<FeatureEvaluator> create(int type);
};
template<> CV_EXPORTS void DefaultDeleter<CvHaarClassifierCascade>::operator ()(CvHaarClassifierCascade* obj) const; template<> CV_EXPORTS void DefaultDeleter<CvHaarClassifierCascade>::operator ()(CvHaarClassifierCascade* obj) const;
enum { CASCADE_DO_CANNY_PRUNING = 1, enum { CASCADE_DO_CANNY_PRUNING = 1,
......
...@@ -24,14 +24,14 @@ OCL_PERF_TEST_P(Cascade_Image_MinSize, CascadeClassifier, ...@@ -24,14 +24,14 @@ OCL_PERF_TEST_P(Cascade_Image_MinSize, CascadeClassifier,
string("cv/cascadeandhog/images/class57.png") ), string("cv/cascadeandhog/images/class57.png") ),
testing::Values(30, 64, 90) ) ) testing::Values(30, 64, 90) ) )
{ {
const string cascasePath = get<0>(GetParam()); const string cascadePath = get<0>(GetParam());
const string imagePath = get<1>(GetParam()); const string imagePath = get<1>(GetParam());
int min_size = get<2>(GetParam()); int min_size = get<2>(GetParam());
Size minSize(min_size, min_size); Size minSize(min_size, min_size);
CascadeClassifier cc( getDataPath(cascasePath) ); CascadeClassifier cc( getDataPath(cascadePath) );
if (cc.empty()) if (cc.empty())
FAIL() << "Can't load cascade file: " << getDataPath(cascasePath); FAIL() << "Can't load cascade file: " << getDataPath(cascadePath);
Mat img = imread(getDataPath(imagePath), IMREAD_GRAYSCALE); Mat img = imread(getDataPath(imagePath), IMREAD_GRAYSCALE);
if (img.empty()) if (img.empty())
......
...@@ -46,71 +46,6 @@ ...@@ -46,71 +46,6 @@
#include "opencv2/objdetect/objdetect_c.h" #include "opencv2/objdetect/objdetect_c.h"
#include "opencl_kernels.hpp" #include "opencl_kernels.hpp"
#if defined (LOG_CASCADE_STATISTIC)
struct Logger
{
enum { STADIES_NUM = 20 };
int gid;
cv::Mat mask;
cv::Size sz0;
int step;
Logger() : gid (0), step(2) {}
void setImage(const cv::Mat& image)
{
if (gid == 0)
sz0 = image.size();
mask.create(image.rows, image.cols * (STADIES_NUM + 1) + STADIES_NUM, CV_8UC1);
mask = cv::Scalar(0);
cv::Mat roi = mask(cv::Rect(cv::Point(0,0), image.size()));
image.copyTo(roi);
printf("%d) Size = (%d, %d)\n", gid, image.cols, image.rows);
for(int i = 0; i < STADIES_NUM; ++i)
{
int x = image.cols + i * (image.cols + 1);
cv::line(mask, cv::Point(x, 0), cv::Point(x, mask.rows-1), cv::Scalar(255));
}
if (sz0.width/image.cols > 2 && sz0.height/image.rows > 2)
step = 1;
}
void setPoint(const cv::Point& p, int passed_stadies)
{
int cols = mask.cols / (STADIES_NUM + 1);
passed_stadies = -passed_stadies;
passed_stadies = (passed_stadies == -1) ? STADIES_NUM : passed_stadies;
unsigned char* ptr = mask.ptr<unsigned char>(p.y) + cols + 1 + p.x;
for(int i = 0; i < passed_stadies; ++i, ptr += cols + 1)
{
*ptr = 255;
if (step == 2)
{
ptr[1] = 255;
ptr[mask.step] = 255;
ptr[mask.step + 1] = 255;
}
}
};
void write()
{
char buf[4096];
sprintf(buf, "%04d.png", gid++);
cv::imwrite(buf, mask);
}
} logger;
#endif
namespace cv namespace cv
{ {
...@@ -121,7 +56,8 @@ template<typename _Tp> void copyVectorToUMat(const std::vector<_Tp>& v, UMat& um ...@@ -121,7 +56,8 @@ template<typename _Tp> void copyVectorToUMat(const std::vector<_Tp>& v, UMat& um
Mat(1, (int)(v.size()*sizeof(v[0])), CV_8U, (void*)&v[0]).copyTo(um); Mat(1, (int)(v.size()*sizeof(v[0])), CV_8U, (void*)&v[0]).copyTo(um);
} }
void groupRectangles(std::vector<Rect>& rectList, int groupThreshold, double eps, std::vector<int>* weights, std::vector<double>* levelWeights) void groupRectangles(std::vector<Rect>& rectList, int groupThreshold, double eps,
std::vector<int>* weights, std::vector<double>* levelWeights)
{ {
if( groupThreshold <= 0 || rectList.empty() ) if( groupThreshold <= 0 || rectList.empty() )
{ {
...@@ -426,7 +362,8 @@ void groupRectangles(std::vector<Rect>& rectList, std::vector<int>& weights, int ...@@ -426,7 +362,8 @@ void groupRectangles(std::vector<Rect>& rectList, std::vector<int>& weights, int
groupRectangles(rectList, groupThreshold, eps, &weights, 0); groupRectangles(rectList, groupThreshold, eps, &weights, 0);
} }
//used for cascade detection algorithm for ROC-curve calculating //used for cascade detection algorithm for ROC-curve calculating
void groupRectangles(std::vector<Rect>& rectList, std::vector<int>& rejectLevels, std::vector<double>& levelWeights, int groupThreshold, double eps) void groupRectangles(std::vector<Rect>& rectList, std::vector<int>& rejectLevels,
std::vector<double>& levelWeights, int groupThreshold, double eps)
{ {
groupRectangles(rectList, groupThreshold, eps, &rejectLevels, &levelWeights); groupRectangles(rectList, groupThreshold, eps, &rejectLevels, &levelWeights);
} }
...@@ -439,14 +376,138 @@ void groupRectangles_meanshift(std::vector<Rect>& rectList, std::vector<double>& ...@@ -439,14 +376,138 @@ void groupRectangles_meanshift(std::vector<Rect>& rectList, std::vector<double>&
FeatureEvaluator::~FeatureEvaluator() {} FeatureEvaluator::~FeatureEvaluator() {}
bool FeatureEvaluator::read(const FileNode&) {return true;}
bool FeatureEvaluator::read(const FileNode&, Size _origWinSize)
{
origWinSize = _origWinSize;
localSize = lbufSize = Size(0, 0);
if (scaleData.empty())
scaleData = makePtr<std::vector<ScaleData> >();
else
scaleData->clear();
return true;
}
Ptr<FeatureEvaluator> FeatureEvaluator::clone() const { return Ptr<FeatureEvaluator>(); } Ptr<FeatureEvaluator> FeatureEvaluator::clone() const { return Ptr<FeatureEvaluator>(); }
int FeatureEvaluator::getFeatureType() const {return -1;} int FeatureEvaluator::getFeatureType() const {return -1;}
bool FeatureEvaluator::setImage(InputArray, Size, Size) {return true;} bool FeatureEvaluator::setWindow(Point, int) { return true; }
bool FeatureEvaluator::setWindow(Point) { return true; } void FeatureEvaluator::getUMats(std::vector<UMat>& bufs)
double FeatureEvaluator::calcOrd(int) const { return 0.; } {
if (!(sbufFlag & USBUF_VALID))
{
sbuf.copyTo(usbuf);
sbufFlag |= USBUF_VALID;
}
bufs.clear();
bufs.push_back(uscaleData);
bufs.push_back(usbuf);
bufs.push_back(ufbuf);
}
void FeatureEvaluator::getMats()
{
if (!(sbufFlag & SBUF_VALID))
{
usbuf.copyTo(sbuf);
sbufFlag |= SBUF_VALID;
}
}
float FeatureEvaluator::calcOrd(int) const { return 0.; }
int FeatureEvaluator::calcCat(int) const { return 0; } int FeatureEvaluator::calcCat(int) const { return 0; }
bool FeatureEvaluator::updateScaleData( Size imgsz, const std::vector<float>& _scales )
{
if( scaleData.empty() )
scaleData = makePtr<std::vector<ScaleData> >();
size_t i, nscales = _scales.size();
bool recalcOptFeatures = nscales != scaleData->size();
scaleData->resize(nscales);
int layer_dy = 0;
Point layer_ofs(0,0);
Size prevBufSize = sbufSize;
sbufSize.width = std::max(sbufSize.width, (int)alignSize(cvRound(imgsz.width/_scales[0]) + 31, 32));
recalcOptFeatures = recalcOptFeatures || sbufSize.width != prevBufSize.width;
for( i = 0; i < nscales; i++ )
{
FeatureEvaluator::ScaleData& s = scaleData->at(i);
if( !recalcOptFeatures && fabs(s.scale - _scales[i]) > FLT_EPSILON*100*_scales[i] )
recalcOptFeatures = true;
float sc = _scales[i];
Size sz;
sz.width = cvRound(imgsz.width/sc);
sz.height = cvRound(imgsz.height/sc);
s.ystep = sc >= 2 ? 1 : 2;
s.scale = sc;
s.szi = Size(sz.width+1, sz.height+1);
if( layer_ofs.x + s.szi.width > sbufSize.width )
{
layer_ofs = Point(0, layer_ofs.y + layer_dy);
layer_dy = s.szi.height;
}
s.layer_ofs = layer_ofs.y*sbufSize.width + layer_ofs.x;
layer_ofs.x += s.szi.width;
}
layer_ofs.y += layer_dy;
sbufSize.height = std::max(sbufSize.height, layer_ofs.y);
recalcOptFeatures = recalcOptFeatures || sbufSize.height != prevBufSize.height;
return recalcOptFeatures;
}
bool FeatureEvaluator::setImage( InputArray _image, const std::vector<float>& _scales )
{
Size imgsz = _image.size();
bool recalcOptFeatures = updateScaleData(imgsz, _scales);
size_t i, nscales = scaleData->size();
Size sz0 = scaleData->at(0).szi;
sz0 = Size(std::max(rbuf.cols, (int)alignSize(sz0.width, 16)), std::max(rbuf.rows, sz0.height));
if (recalcOptFeatures)
{
computeOptFeatures();
copyVectorToUMat(*scaleData, uscaleData);
}
if (_image.isUMat() && localSize.area() > 0)
{
usbuf.create(sbufSize.height*nchannels, sbufSize.width, CV_32S);
urbuf.create(sz0, CV_8U);
for (i = 0; i < nscales; i++)
{
const ScaleData& s = scaleData->at(i);
UMat dst(urbuf, Rect(0, 0, s.szi.width - 1, s.szi.height - 1));
resize(_image, dst, dst.size(), 1. / s.scale, 1. / s.scale, INTER_LINEAR);
computeChannels((int)i, dst);
}
sbufFlag = USBUF_VALID;
}
else
{
Mat image = _image.getMat();
sbuf.create(sbufSize.height*nchannels, sbufSize.width, CV_32S);
rbuf.create(sz0, CV_8U);
for (i = 0; i < nscales; i++)
{
const ScaleData& s = scaleData->at(i);
Mat dst(s.szi.height - 1, s.szi.width - 1, CV_8U, rbuf.data);
resize(image, dst, dst.size(), 1. / s.scale, 1. / s.scale, INTER_LINEAR);
computeChannels((int)i, dst);
}
sbufFlag = SBUF_VALID;
}
return true;
}
//---------------------------------------------- HaarEvaluator --------------------------------------- //---------------------------------------------- HaarEvaluator ---------------------------------------
bool HaarEvaluator::Feature :: read( const FileNode& node ) bool HaarEvaluator::Feature :: read( const FileNode& node )
...@@ -476,24 +537,32 @@ HaarEvaluator::HaarEvaluator() ...@@ -476,24 +537,32 @@ HaarEvaluator::HaarEvaluator()
{ {
optfeaturesPtr = 0; optfeaturesPtr = 0;
pwin = 0; pwin = 0;
localSize = Size(4, 2);
lbufSize = Size(0, 0);
nchannels = 0;
} }
HaarEvaluator::~HaarEvaluator() HaarEvaluator::~HaarEvaluator()
{ {
} }
bool HaarEvaluator::read(const FileNode& node) bool HaarEvaluator::read(const FileNode& node, Size _origWinSize)
{ {
if (!FeatureEvaluator::read(node, _origWinSize))
return false;
size_t i, n = node.size(); size_t i, n = node.size();
CV_Assert(n > 0); CV_Assert(n > 0);
if(features.empty()) if(features.empty())
features = makePtr<std::vector<Feature> >(); features = makePtr<std::vector<Feature> >();
if(optfeatures.empty()) if(optfeatures.empty())
optfeatures = makePtr<std::vector<OptFeature> >(); optfeatures = makePtr<std::vector<OptFeature> >();
if (optfeatures_lbuf.empty())
optfeatures_lbuf = makePtr<std::vector<OptFeature> >();
features->resize(n); features->resize(n);
FileNodeIterator it = node.begin(); FileNodeIterator it = node.begin();
hasTiltedFeatures = false; hasTiltedFeatures = false;
std::vector<Feature>& ff = *features; std::vector<Feature>& ff = *features;
sumSize0 = Size(); sbufSize = Size();
ufbuf.release(); ufbuf.release();
for(i = 0; i < n; i++, ++it) for(i = 0; i < n; i++, ++it)
...@@ -503,143 +572,148 @@ bool HaarEvaluator::read(const FileNode& node) ...@@ -503,143 +572,148 @@ bool HaarEvaluator::read(const FileNode& node)
if( ff[i].tilted ) if( ff[i].tilted )
hasTiltedFeatures = true; hasTiltedFeatures = true;
} }
nchannels = hasTiltedFeatures ? 3 : 2;
normrect = Rect(1, 1, origWinSize.width - 2, origWinSize.height - 2);
if (ocl::haveOpenCL())
{
String vname = ocl::Device::getDefault().vendor();
if (vname == "Advanced Micro Devices, Inc." ||
vname == "AMD")
localSize = Size(8, 8);
lbufSize = Size(origWinSize.width + localSize.width,
origWinSize.height + localSize.height);
if (lbufSize.area() > 1024)
lbufSize = Size(0, 0);
}
return true; return true;
} }
Ptr<FeatureEvaluator> HaarEvaluator::clone() const Ptr<FeatureEvaluator> HaarEvaluator::clone() const
{ {
Ptr<HaarEvaluator> ret = makePtr<HaarEvaluator>(); Ptr<HaarEvaluator> ret = makePtr<HaarEvaluator>();
ret->origWinSize = origWinSize; *ret = *this;
ret->features = features;
ret->optfeatures = optfeatures;
ret->optfeaturesPtr = optfeatures->empty() ? 0 : &(*(ret->optfeatures))[0];
ret->hasTiltedFeatures = hasTiltedFeatures;
ret->sum0 = sum0; ret->sqsum0 = sqsum0;
ret->sum = sum; ret->sqsum = sqsum;
ret->usum0 = usum0; ret->usqsum0 = usqsum0; ret->ufbuf = ufbuf;
ret->normrect = normrect;
memcpy( ret->nofs, nofs, 4*sizeof(nofs[0]) );
ret->pwin = pwin;
ret->varianceNormFactor = varianceNormFactor;
return ret; return ret;
} }
bool HaarEvaluator::setImage( InputArray _image, Size _origWinSize, Size _sumSize )
{
Size imgsz = _image.size();
int cols = imgsz.width, rows = imgsz.height;
if (imgsz.width < origWinSize.width || imgsz.height < origWinSize.height)
return false;
origWinSize = _origWinSize;
normrect = Rect(1, 1, origWinSize.width-2, origWinSize.height-2);
int rn = _sumSize.height, cn = _sumSize.width, rn_scale = hasTiltedFeatures ? 2 : 1; void HaarEvaluator::computeChannels(int scaleIdx, InputArray img)
int sumStep, tofs = 0; {
CV_Assert(rn >= rows+1 && cn >= cols+1); const ScaleData& s = scaleData->at(scaleIdx);
tofs = (int)sbufSize.area();
sqofs = hasTiltedFeatures ? tofs*2 : tofs;
if( _image.isUMat() ) if (img.isUMat())
{ {
usum0.create(rn*rn_scale, cn, CV_32S); int sx = s.layer_ofs % sbufSize.width;
usqsum0.create(rn, cn, CV_32S); int sy = s.layer_ofs / sbufSize.width;
usum = UMat(usum0, Rect(0, 0, cols+1, rows+1)); int sqy = sy + (sqofs / sbufSize.width);
usqsum = UMat(usqsum0, Rect(0, 0, cols, rows)); UMat sum(usbuf, Rect(sx, sy, s.szi.width, s.szi.height));
UMat sqsum(usbuf, Rect(sx, sqy, s.szi.width, s.szi.height));
sqsum.flags = (sqsum.flags & ~UMat::DEPTH_MASK) | CV_32F;
if( hasTiltedFeatures ) if (hasTiltedFeatures)
{ {
UMat utilted(usum0, Rect(0, _sumSize.height, cols+1, rows+1)); int sty = sy + (tofs / sbufSize.width);
integral(_image, usum, noArray(), utilted, CV_32S); UMat tilted(usbuf, Rect(sx, sty, s.szi.width, s.szi.height));
tofs = (int)((utilted.offset - usum.offset)/sizeof(int)); integral(img, sum, sqsum, tilted, CV_32S, CV_32F);
} }
else else
{ {
integral(_image, usum, noArray(), noArray(), CV_32S); UMatData* u = sqsum.u;
integral(img, sum, sqsum, noArray(), CV_32S, CV_32F);
CV_Assert(sqsum.u == u && sqsum.size() == s.szi && sqsum.type()==CV_32F);
} }
sqrBoxFilter(_image, usqsum, CV_32S,
Size(normrect.width, normrect.height),
Point(0, 0), false);
/*sqrBoxFilter(_image.getMat(), sqsum, CV_32S,
Size(normrect.width, normrect.height),
Point(0, 0), false);
sqsum.copyTo(usqsum);*/
sumStep = (int)(usum.step/usum.elemSize());
} }
else else
{ {
sum0.create(rn*rn_scale, cn, CV_32S); Mat sum(s.szi, CV_32S, sbuf.ptr<int>() + s.layer_ofs, sbuf.step);
sqsum0.create(rn, cn, CV_32S); Mat sqsum(s.szi, CV_32F, sum.ptr<int>() + sqofs, sbuf.step);
sum = sum0(Rect(0, 0, cols+1, rows+1));
sqsum = sqsum0(Rect(0, 0, cols, rows));
if( hasTiltedFeatures ) if (hasTiltedFeatures)
{ {
Mat tilted = sum0(Rect(0, _sumSize.height, cols+1, rows+1)); Mat tilted(s.szi, CV_32S, sum.ptr<int>() + tofs, sbuf.step);
integral(_image, sum, noArray(), tilted, CV_32S); integral(img, sum, sqsum, tilted, CV_32S, CV_32F);
tofs = (int)((tilted.data - sum.data)/sizeof(int));
} }
else else
integral(_image, sum, noArray(), noArray(), CV_32S); integral(img, sum, sqsum, noArray(), CV_32S, CV_32F);
sqrBoxFilter(_image, sqsum, CV_32S,
Size(normrect.width, normrect.height),
Point(0, 0), false);
sumStep = (int)(sum.step/sum.elemSize());
} }
}
CV_SUM_OFS( nofs[0], nofs[1], nofs[2], nofs[3], 0, normrect, sumStep ); void HaarEvaluator::computeOptFeatures()
{
int sstep = sbufSize.width;
CV_SUM_OFS( nofs[0], nofs[1], nofs[2], nofs[3], 0, normrect, sstep );
size_t fi, nfeatures = features->size(); size_t fi, nfeatures = features->size();
const std::vector<Feature>& ff = *features; const std::vector<Feature>& ff = *features;
if( sumSize0 != _sumSize )
{
optfeatures->resize(nfeatures); optfeatures->resize(nfeatures);
optfeaturesPtr = &(*optfeatures)[0]; optfeaturesPtr = &(*optfeatures)[0];
for( fi = 0; fi < nfeatures; fi++ ) for( fi = 0; fi < nfeatures; fi++ )
optfeaturesPtr[fi].setOffsets( ff[fi], sumStep, tofs ); optfeaturesPtr[fi].setOffsets( ff[fi], sstep, tofs );
} optfeatures_lbuf->resize(nfeatures);
if( _image.isUMat() && (sumSize0 != _sumSize || ufbuf.empty()) )
copyVectorToUMat(*optfeatures, ufbuf);
sumSize0 = _sumSize;
return true; for( fi = 0; fi < nfeatures; fi++ )
optfeatures_lbuf->at(fi).setOffsets(ff[fi], lbufSize.width > 0 ? lbufSize.width : sstep, tofs);
copyVectorToUMat(*optfeatures_lbuf, ufbuf);
} }
bool HaarEvaluator::setWindow( Point pt ) bool HaarEvaluator::setWindow( Point pt, int scaleIdx )
{ {
const ScaleData& s = getScaleData(scaleIdx);
if( pt.x < 0 || pt.y < 0 || if( pt.x < 0 || pt.y < 0 ||
pt.x + origWinSize.width >= sum.cols || pt.x + origWinSize.width >= s.szi.width ||
pt.y + origWinSize.height >= sum.rows ) pt.y + origWinSize.height >= s.szi.height )
return false; return false;
const int* p = &sum.at<int>(pt); pwin = &sbuf.at<int>(pt) + s.layer_ofs;
int valsum = CALC_SUM_OFS(nofs, p); const float* pq = (const float*)(pwin + sqofs);
double valsqsum = sqsum.at<int>(pt.y + normrect.y, pt.x + normrect.x); int valsum = CALC_SUM_OFS(nofs, pwin);
float valsqsum = CALC_SUM_OFS(nofs, pq);
double nf = (double)normrect.area() * valsqsum - (double)valsum * valsum; double nf = (double)normrect.area() * valsqsum - (double)valsum * valsum;
if( nf > 0. ) if( nf > 0. )
nf = std::sqrt(nf); nf = std::sqrt(nf);
else else
nf = 1.; nf = 1.;
varianceNormFactor = 1./nf; varianceNormFactor = (float)(1./nf);
pwin = p;
return true; return true;
} }
void HaarEvaluator::OptFeature::setOffsets( const Feature& _f, int step, int _tofs )
{
weight[0] = _f.rect[0].weight;
weight[1] = _f.rect[1].weight;
weight[2] = _f.rect[2].weight;
if( _f.tilted )
{
CV_TILTED_OFS( ofs[0][0], ofs[0][1], ofs[0][2], ofs[0][3], _tofs, _f.rect[0].r, step );
CV_TILTED_OFS( ofs[1][0], ofs[1][1], ofs[1][2], ofs[1][3], _tofs, _f.rect[1].r, step );
CV_TILTED_OFS( ofs[2][0], ofs[2][1], ofs[2][2], ofs[2][3], _tofs, _f.rect[2].r, step );
}
else
{
CV_SUM_OFS( ofs[0][0], ofs[0][1], ofs[0][2], ofs[0][3], 0, _f.rect[0].r, step );
CV_SUM_OFS( ofs[1][0], ofs[1][1], ofs[1][2], ofs[1][3], 0, _f.rect[1].r, step );
CV_SUM_OFS( ofs[2][0], ofs[2][1], ofs[2][2], ofs[2][3], 0, _f.rect[2].r, step );
}
}
Rect HaarEvaluator::getNormRect() const Rect HaarEvaluator::getNormRect() const
{ {
return normrect; return normrect;
} }
void HaarEvaluator::getUMats(std::vector<UMat>& bufs) int HaarEvaluator::getSquaresOffset() const
{ {
bufs.clear(); return sqofs;
bufs.push_back(usum);
bufs.push_back(usqsum);
bufs.push_back(ufbuf);
} }
//---------------------------------------------- LBPEvaluator ------------------------------------- //---------------------------------------------- LBPEvaluator -------------------------------------
...@@ -655,15 +729,26 @@ LBPEvaluator::LBPEvaluator() ...@@ -655,15 +729,26 @@ LBPEvaluator::LBPEvaluator()
{ {
features = makePtr<std::vector<Feature> >(); features = makePtr<std::vector<Feature> >();
optfeatures = makePtr<std::vector<OptFeature> >(); optfeatures = makePtr<std::vector<OptFeature> >();
scaleData = makePtr<std::vector<ScaleData> >();
} }
LBPEvaluator::~LBPEvaluator() LBPEvaluator::~LBPEvaluator()
{ {
} }
bool LBPEvaluator::read( const FileNode& node ) bool LBPEvaluator::read( const FileNode& node, Size _origWinSize )
{ {
if (!FeatureEvaluator::read(node, _origWinSize))
return false;
if(features.empty())
features = makePtr<std::vector<Feature> >();
if(optfeatures.empty())
optfeatures = makePtr<std::vector<OptFeature> >();
if (optfeatures_lbuf.empty())
optfeatures_lbuf = makePtr<std::vector<OptFeature> >();
features->resize(node.size()); features->resize(node.size());
optfeaturesPtr = &(*optfeatures)[0]; optfeaturesPtr = 0;
FileNodeIterator it = node.begin(), it_end = node.end(); FileNodeIterator it = node.begin(), it_end = node.end();
std::vector<Feature>& ff = *features; std::vector<Feature>& ff = *features;
for(int i = 0; it != it_end; ++it, i++) for(int i = 0; it != it_end; ++it, i++)
...@@ -671,274 +756,92 @@ bool LBPEvaluator::read( const FileNode& node ) ...@@ -671,274 +756,92 @@ bool LBPEvaluator::read( const FileNode& node )
if(!ff[i].read(*it)) if(!ff[i].read(*it))
return false; return false;
} }
nchannels = 1;
if (ocl::haveOpenCL())
{
const ocl::Device& device = ocl::Device::getDefault();
String vname = device.vendor();
if ((vname == "Advanced Micro Devices, Inc." ||
vname == "AMD") && !device.hostUnifiedMemory())
localSize = Size(8, 8);
}
return true; return true;
} }
Ptr<FeatureEvaluator> LBPEvaluator::clone() const Ptr<FeatureEvaluator> LBPEvaluator::clone() const
{ {
Ptr<LBPEvaluator> ret = makePtr<LBPEvaluator>(); Ptr<LBPEvaluator> ret = makePtr<LBPEvaluator>();
ret->origWinSize = origWinSize; *ret = *this;
ret->features = features;
ret->optfeatures = optfeatures;
ret->optfeaturesPtr = ret->optfeatures.empty() ? 0 : &(*ret->optfeatures)[0];
ret->sum0 = sum0, ret->sum = sum;
ret->pwin = pwin;
return ret; return ret;
} }
bool LBPEvaluator::setImage( InputArray _image, Size _origWinSize, Size _sumSize ) void LBPEvaluator::computeChannels(int scaleIdx, InputArray _img)
{ {
Size imgsz = _image.size(); const ScaleData& s = scaleData->at(scaleIdx);
int cols = imgsz.width, rows = imgsz.height;
if (imgsz.width < origWinSize.width || imgsz.height < origWinSize.height)
return false;
origWinSize = _origWinSize;
int rn = _sumSize.height, cn = _sumSize.width;
int sumStep;
CV_Assert(rn >= rows+1 && cn >= cols+1);
if( _image.isUMat() ) if (_img.isUMat())
{ {
usum0.create(rn, cn, CV_32S); int sx = s.layer_ofs % sbufSize.width;
usum = UMat(usum0, Rect(0, 0, cols+1, rows+1)); int sy = s.layer_ofs / sbufSize.width;
UMat sum(usbuf, Rect(sx, sy, s.szi.width, s.szi.height));
integral(_image, usum, noArray(), noArray(), CV_32S); integral(_img, sum, noArray(), noArray(), CV_32S);
sumStep = (int)(usum.step/usum.elemSize());
} }
else else
{ {
sum0.create(rn, cn, CV_32S); Mat sum(s.szi, CV_32S, sbuf.ptr<int>() + s.layer_ofs, sbuf.step);
sum = sum0(Rect(0, 0, cols+1, rows+1)); integral(_img, sum, noArray(), noArray(), CV_32S);
integral(_image, sum, noArray(), noArray(), CV_32S);
sumStep = (int)(sum.step/sum.elemSize());
} }
}
void LBPEvaluator::computeOptFeatures()
{
int sstep = sbufSize.width;
size_t fi, nfeatures = features->size(); size_t fi, nfeatures = features->size();
const std::vector<Feature>& ff = *features; const std::vector<Feature>& ff = *features;
if( sumSize0 != _sumSize )
{
optfeatures->resize(nfeatures); optfeatures->resize(nfeatures);
optfeaturesPtr = &(*optfeatures)[0]; optfeaturesPtr = &(*optfeatures)[0];
for( fi = 0; fi < nfeatures; fi++ ) for( fi = 0; fi < nfeatures; fi++ )
optfeaturesPtr[fi].setOffsets( ff[fi], sumStep ); optfeaturesPtr[fi].setOffsets( ff[fi], sstep );
}
if( _image.isUMat() && (sumSize0 != _sumSize || ufbuf.empty()) )
copyVectorToUMat(*optfeatures, ufbuf); copyVectorToUMat(*optfeatures, ufbuf);
sumSize0 = _sumSize;
return true;
}
bool LBPEvaluator::setWindow( Point pt )
{
if( pt.x < 0 || pt.y < 0 ||
pt.x + origWinSize.width >= sum.cols ||
pt.y + origWinSize.height >= sum.rows )
return false;
pwin = &sum.at<int>(pt);
return true;
} }
void LBPEvaluator::getUMats(std::vector<UMat>& bufs) void LBPEvaluator::OptFeature::setOffsets( const Feature& _f, int step )
{ {
bufs.clear(); Rect tr = _f.rect;
bufs.push_back(usum); int w0 = tr.width;
bufs.push_back(ufbuf); int h0 = tr.height;
}
//---------------------------------------------- HOGEvaluator --------------------------------------- CV_SUM_OFS( ofs[0], ofs[1], ofs[4], ofs[5], 0, tr, step );
bool HOGEvaluator::Feature :: read( const FileNode& node ) tr.x += 2*w0;
{ CV_SUM_OFS( ofs[2], ofs[3], ofs[6], ofs[7], 0, tr, step );
FileNode rnode = node[CC_RECT]; tr.y += 2*h0;
FileNodeIterator it = rnode.begin(); CV_SUM_OFS( ofs[10], ofs[11], ofs[14], ofs[15], 0, tr, step );
it >> rect[0].x >> rect[0].y >> rect[0].width >> rect[0].height >> featComponent; tr.x -= 2*w0;
rect[1].x = rect[0].x + rect[0].width; CV_SUM_OFS( ofs[8], ofs[9], ofs[12], ofs[13], 0, tr, step );
rect[1].y = rect[0].y;
rect[2].x = rect[0].x;
rect[2].y = rect[0].y + rect[0].height;
rect[3].x = rect[0].x + rect[0].width;
rect[3].y = rect[0].y + rect[0].height;
rect[1].width = rect[2].width = rect[3].width = rect[0].width;
rect[1].height = rect[2].height = rect[3].height = rect[0].height;
return true;
}
HOGEvaluator::HOGEvaluator()
{
features = makePtr<std::vector<Feature> >();
} }
HOGEvaluator::~HOGEvaluator()
{
}
bool HOGEvaluator::read( const FileNode& node )
{
features->resize(node.size());
featuresPtr = &(*features)[0];
FileNodeIterator it = node.begin(), it_end = node.end();
for(int i = 0; it != it_end; ++it, i++)
{
if(!featuresPtr[i].read(*it))
return false;
}
return true;
}
Ptr<FeatureEvaluator> HOGEvaluator::clone() const bool LBPEvaluator::setWindow( Point pt, int scaleIdx )
{ {
Ptr<HOGEvaluator> ret = makePtr<HOGEvaluator>(); CV_Assert(0 <= scaleIdx && scaleIdx < (int)scaleData->size());
ret->origWinSize = origWinSize; const ScaleData& s = scaleData->at(scaleIdx);
ret->features = features;
ret->featuresPtr = &(*ret->features)[0];
ret->offset = offset;
ret->hist = hist;
ret->normSum = normSum;
return ret;
}
bool HOGEvaluator::setImage( InputArray _image, Size winSize, Size )
{
Mat image = _image.getMat();
int rows = image.rows + 1;
int cols = image.cols + 1;
origWinSize = winSize;
if( image.cols < origWinSize.width || image.rows < origWinSize.height )
return false;
hist.clear();
for( int bin = 0; bin < Feature::BIN_NUM; bin++ )
{
hist.push_back( Mat(rows, cols, CV_32FC1) );
}
normSum.create( rows, cols, CV_32FC1 );
integralHistogram( image, hist, normSum, Feature::BIN_NUM );
size_t featIdx, featCount = features->size();
for( featIdx = 0; featIdx < featCount; featIdx++ )
{
featuresPtr[featIdx].updatePtrs( hist, normSum );
}
return true;
}
bool HOGEvaluator::setWindow(Point pt)
{
if( pt.x < 0 || pt.y < 0 || if( pt.x < 0 || pt.y < 0 ||
pt.x + origWinSize.width >= hist[0].cols-2 || pt.x + origWinSize.width >= s.szi.width ||
pt.y + origWinSize.height >= hist[0].rows-2 ) pt.y + origWinSize.height >= s.szi.height )
return false; return false;
offset = pt.y * ((int)hist[0].step/sizeof(float)) + pt.x;
pwin = &sbuf.at<int>(pt) + s.layer_ofs;
return true; return true;
} }
void HOGEvaluator::integralHistogram(const Mat &img, std::vector<Mat> &histogram, Mat &norm, int nbins) const
{
CV_Assert( img.type() == CV_8U || img.type() == CV_8UC3 );
int x, y, binIdx;
Size gradSize(img.size());
Size histSize(histogram[0].size());
Mat grad(gradSize, CV_32F);
Mat qangle(gradSize, CV_8U);
AutoBuffer<int> mapbuf(gradSize.width + gradSize.height + 4);
int* xmap = (int*)mapbuf + 1;
int* ymap = xmap + gradSize.width + 2;
const int borderType = (int)BORDER_REPLICATE;
for( x = -1; x < gradSize.width + 1; x++ )
xmap[x] = borderInterpolate(x, gradSize.width, borderType);
for( y = -1; y < gradSize.height + 1; y++ )
ymap[y] = borderInterpolate(y, gradSize.height, borderType);
int width = gradSize.width;
AutoBuffer<float> _dbuf(width*4);
float* dbuf = _dbuf;
Mat Dx(1, width, CV_32F, dbuf);
Mat Dy(1, width, CV_32F, dbuf + width);
Mat Mag(1, width, CV_32F, dbuf + width*2);
Mat Angle(1, width, CV_32F, dbuf + width*3);
float angleScale = (float)(nbins/CV_PI);
for( y = 0; y < gradSize.height; y++ )
{
const uchar* currPtr = img.data + img.step*ymap[y];
const uchar* prevPtr = img.data + img.step*ymap[y-1];
const uchar* nextPtr = img.data + img.step*ymap[y+1];
float* gradPtr = (float*)grad.ptr(y);
uchar* qanglePtr = (uchar*)qangle.ptr(y);
for( x = 0; x < width; x++ )
{
dbuf[x] = (float)(currPtr[xmap[x+1]] - currPtr[xmap[x-1]]);
dbuf[width + x] = (float)(nextPtr[xmap[x]] - prevPtr[xmap[x]]);
}
cartToPolar( Dx, Dy, Mag, Angle, false );
for( x = 0; x < width; x++ )
{
float mag = dbuf[x+width*2];
float angle = dbuf[x+width*3];
angle = angle*angleScale - 0.5f;
int bidx = cvFloor(angle);
angle -= bidx;
if( bidx < 0 )
bidx += nbins;
else if( bidx >= nbins )
bidx -= nbins;
qanglePtr[x] = (uchar)bidx;
gradPtr[x] = mag;
}
}
integral(grad, norm, grad.depth());
float* histBuf;
const float* magBuf;
const uchar* binsBuf;
int binsStep = (int)( qangle.step / sizeof(uchar) );
int histStep = (int)( histogram[0].step / sizeof(float) );
int magStep = (int)( grad.step / sizeof(float) );
for( binIdx = 0; binIdx < nbins; binIdx++ )
{
histBuf = (float*)histogram[binIdx].data;
magBuf = (const float*)grad.data;
binsBuf = (const uchar*)qangle.data;
memset( histBuf, 0, histSize.width * sizeof(histBuf[0]) );
histBuf += histStep + 1;
for( y = 0; y < qangle.rows; y++ )
{
histBuf[-1] = 0.f;
float strSum = 0.f;
for( x = 0; x < qangle.cols; x++ )
{
if( binsBuf[x] == binIdx )
strSum += magBuf[x];
histBuf[x] = histBuf[-histStep + x] + strSum;
}
histBuf += histStep;
binsBuf += binsStep;
magBuf += magStep;
}
}
}
Ptr<FeatureEvaluator> FeatureEvaluator::create( int featureType ) Ptr<FeatureEvaluator> FeatureEvaluator::create( int featureType )
{ {
return featureType == HAAR ? Ptr<FeatureEvaluator>(new HaarEvaluator) : return featureType == HAAR ? Ptr<FeatureEvaluator>(new HaarEvaluator) :
featureType == LBP ? Ptr<FeatureEvaluator>(new LBPEvaluator) : featureType == LBP ? Ptr<FeatureEvaluator>(new LBPEvaluator) :
featureType == HOG ? Ptr<FeatureEvaluator>(new HOGEvaluator) :
Ptr<FeatureEvaluator>(); Ptr<FeatureEvaluator>();
} }
...@@ -981,24 +884,21 @@ void CascadeClassifierImpl::read(const FileNode& node) ...@@ -981,24 +884,21 @@ void CascadeClassifierImpl::read(const FileNode& node)
read_(node); read_(node);
} }
int CascadeClassifierImpl::runAt( Ptr<FeatureEvaluator>& evaluator, Point pt, double& weight ) int CascadeClassifierImpl::runAt( Ptr<FeatureEvaluator>& evaluator, Point pt, int scaleIdx, double& weight )
{ {
CV_Assert( !oldCascade ); assert( !oldCascade &&
(data.featureType == FeatureEvaluator::HAAR ||
assert( data.featureType == FeatureEvaluator::HAAR ||
data.featureType == FeatureEvaluator::LBP || data.featureType == FeatureEvaluator::LBP ||
data.featureType == FeatureEvaluator::HOG ); data.featureType == FeatureEvaluator::HOG) );
if( !evaluator->setWindow(pt) ) if( !evaluator->setWindow(pt, scaleIdx) )
return -1; return -1;
if( data.isStumpBased() ) if( data.maxNodesPerTree == 1 )
{ {
if( data.featureType == FeatureEvaluator::HAAR ) if( data.featureType == FeatureEvaluator::HAAR )
return predictOrderedStump<HaarEvaluator>( *this, evaluator, weight ); return predictOrderedStump<HaarEvaluator>( *this, evaluator, weight );
else if( data.featureType == FeatureEvaluator::LBP ) else if( data.featureType == FeatureEvaluator::LBP )
return predictCategoricalStump<LBPEvaluator>( *this, evaluator, weight ); return predictCategoricalStump<LBPEvaluator>( *this, evaluator, weight );
else if( data.featureType == FeatureEvaluator::HOG )
return predictOrderedStump<HOGEvaluator>( *this, evaluator, weight );
else else
return -2; return -2;
} }
...@@ -1008,8 +908,6 @@ int CascadeClassifierImpl::runAt( Ptr<FeatureEvaluator>& evaluator, Point pt, do ...@@ -1008,8 +908,6 @@ int CascadeClassifierImpl::runAt( Ptr<FeatureEvaluator>& evaluator, Point pt, do
return predictOrdered<HaarEvaluator>( *this, evaluator, weight ); return predictOrdered<HaarEvaluator>( *this, evaluator, weight );
else if( data.featureType == FeatureEvaluator::LBP ) else if( data.featureType == FeatureEvaluator::LBP )
return predictCategorical<LBPEvaluator>( *this, evaluator, weight ); return predictCategorical<LBPEvaluator>( *this, evaluator, weight );
else if( data.featureType == FeatureEvaluator::HOG )
return predictOrdered<HOGEvaluator>( *this, evaluator, weight );
else else
return -2; return -2;
} }
...@@ -1036,14 +934,17 @@ Ptr<BaseCascadeClassifier::MaskGenerator> createFaceDetectionMaskGenerator() ...@@ -1036,14 +934,17 @@ Ptr<BaseCascadeClassifier::MaskGenerator> createFaceDetectionMaskGenerator()
class CascadeClassifierInvoker : public ParallelLoopBody class CascadeClassifierInvoker : public ParallelLoopBody
{ {
public: public:
CascadeClassifierInvoker( CascadeClassifierImpl& _cc, Size _sz1, int _stripSize, int _yStep, double _factor, CascadeClassifierInvoker( CascadeClassifierImpl& _cc, int _nscales, int _nstripes,
std::vector<Rect>& _vec, std::vector<int>& _levels, std::vector<double>& _weights, bool outputLevels, const Mat& _mask, Mutex* _mtx) const FeatureEvaluator::ScaleData* _scaleData,
const int* _stripeSizes, std::vector<Rect>& _vec,
std::vector<int>& _levels, std::vector<double>& _weights,
bool outputLevels, const Mat& _mask, Mutex* _mtx)
{ {
classifier = &_cc; classifier = &_cc;
processingRectSize = _sz1; nscales = _nscales;
stripSize = _stripSize; nstripes = _nstripes;
yStep = _yStep; scaleData = _scaleData;
scalingFactor = _factor; stripeSizes = _stripeSizes;
rectangles = &_vec; rectangles = &_vec;
rejectLevels = outputLevels ? &_levels : 0; rejectLevels = outputLevels ? &_levels : 0;
levelWeights = outputLevels ? &_weights : 0; levelWeights = outputLevels ? &_weights : 0;
...@@ -1054,27 +955,26 @@ public: ...@@ -1054,27 +955,26 @@ public:
void operator()(const Range& range) const void operator()(const Range& range) const
{ {
Ptr<FeatureEvaluator> evaluator = classifier->featureEvaluator->clone(); Ptr<FeatureEvaluator> evaluator = classifier->featureEvaluator->clone();
double gypWeight = 0.;
Size origWinSize = classifier->data.origWinSize;
Size winSize(cvRound(classifier->data.origWinSize.width * scalingFactor), for( int scaleIdx = 0; scaleIdx < nscales; scaleIdx++ )
cvRound(classifier->data.origWinSize.height * scalingFactor)); {
const FeatureEvaluator::ScaleData& s = scaleData[scaleIdx];
float scalingFactor = s.scale;
int yStep = s.ystep;
int stripeSize = stripeSizes[scaleIdx];
int y0 = range.start*stripeSize;
Size szw = s.getWorkingSize(origWinSize);
int y1 = std::min(range.end*stripeSize, szw.height);
Size winSize(cvRound(origWinSize.width * scalingFactor),
cvRound(origWinSize.height * scalingFactor));
int y1 = range.start * stripSize; for( int y = y0; y < y1; y += yStep )
int y2 = std::min(range.end * stripSize, processingRectSize.height);
for( int y = y1; y < y2; y += yStep )
{ {
for( int x = 0; x < processingRectSize.width; x += yStep ) for( int x = 0; x < szw.width; x += yStep )
{ {
if ( (!mask.empty()) && (mask.at<uchar>(Point(x,y))==0)) { int result = classifier->runAt(evaluator, Point(x, y), scaleIdx, gypWeight);
continue;
}
double gypWeight;
int result = classifier->runAt(evaluator, Point(x, y), gypWeight);
#if defined (LOG_CASCADE_STATISTIC)
logger.setPoint(Point(x, y), result);
#endif
if( rejectLevels ) if( rejectLevels )
{ {
if( result == 1 ) if( result == 1 )
...@@ -1082,7 +982,9 @@ public: ...@@ -1082,7 +982,9 @@ public:
if( classifier->data.stages.size() + result == 0 ) if( classifier->data.stages.size() + result == 0 )
{ {
mtx->lock(); mtx->lock();
rectangles->push_back(Rect(cvRound(x*scalingFactor), cvRound(y*scalingFactor), winSize.width, winSize.height)); rectangles->push_back(Rect(cvRound(x*scalingFactor),
cvRound(y*scalingFactor),
winSize.width, winSize.height));
rejectLevels->push_back(-result); rejectLevels->push_back(-result);
levelWeights->push_back(gypWeight); levelWeights->push_back(gypWeight);
mtx->unlock(); mtx->unlock();
...@@ -1091,7 +993,8 @@ public: ...@@ -1091,7 +993,8 @@ public:
else if( result > 0 ) else if( result > 0 )
{ {
mtx->lock(); mtx->lock();
rectangles->push_back(Rect(cvRound(x*scalingFactor), cvRound(y*scalingFactor), rectangles->push_back(Rect(cvRound(x*scalingFactor),
cvRound(y*scalingFactor),
winSize.width, winSize.height)); winSize.width, winSize.height));
mtx->unlock(); mtx->unlock();
} }
...@@ -1100,155 +1003,155 @@ public: ...@@ -1100,155 +1003,155 @@ public:
} }
} }
} }
}
CascadeClassifierImpl* classifier; CascadeClassifierImpl* classifier;
std::vector<Rect>* rectangles; std::vector<Rect>* rectangles;
Size processingRectSize; int nscales, nstripes;
int stripSize, yStep; const FeatureEvaluator::ScaleData* scaleData;
double scalingFactor; const int* stripeSizes;
std::vector<int> *rejectLevels; std::vector<int> *rejectLevels;
std::vector<double> *levelWeights; std::vector<double> *levelWeights;
std::vector<float> scales;
Mat mask; Mat mask;
Mutex* mtx; Mutex* mtx;
}; };
struct getRect { Rect operator ()(const CvAvgComp& e) const { return e.rect; } }; struct getRect { Rect operator ()(const CvAvgComp& e) const { return e.rect; } };
struct getNeighbors { int operator ()(const CvAvgComp& e) const { return e.neighbors; } }; struct getNeighbors { int operator ()(const CvAvgComp& e) const { return e.neighbors; } };
bool CascadeClassifierImpl::detectSingleScale( InputArray _image, Size processingRectSize, bool CascadeClassifierImpl::ocl_detectMultiScaleNoGrouping( const std::vector<float>& scales,
int yStep, double factor, std::vector<Rect>& candidates, std::vector<Rect>& candidates )
std::vector<int>& levels, std::vector<double>& weights,
Size sumSize0, bool outputRejectLevels )
{
if( !featureEvaluator->setImage(_image, data.origWinSize, sumSize0) )
return false;
#if defined (LOG_CASCADE_STATISTIC)
logger.setImage(image);
#endif
Mat currentMask;
if (maskGenerator) {
Mat image = _image.getMat();
currentMask=maskGenerator->generateMask(image);
}
std::vector<Rect> candidatesVector;
std::vector<int> rejectLevels;
std::vector<double> levelWeights;
int stripCount, stripSize;
const int PTS_PER_THREAD = 1000;
stripCount = ((processingRectSize.width/yStep)*(processingRectSize.height + yStep-1)/yStep + PTS_PER_THREAD/2)/PTS_PER_THREAD;
stripCount = std::min(std::max(stripCount, 1), 100);
stripSize = (((processingRectSize.height + stripCount - 1)/stripCount + yStep-1)/yStep)*yStep;
if( outputRejectLevels )
{
parallel_for_(Range(0, stripCount), CascadeClassifierInvoker( *this, processingRectSize, stripSize, yStep, factor,
candidatesVector, rejectLevels, levelWeights, true, currentMask, &mtx));
levels.insert( levels.end(), rejectLevels.begin(), rejectLevels.end() );
weights.insert( weights.end(), levelWeights.begin(), levelWeights.end() );
}
else
{
parallel_for_(Range(0, stripCount), CascadeClassifierInvoker( *this, processingRectSize, stripSize, yStep, factor,
candidatesVector, rejectLevels, levelWeights, false, currentMask, &mtx));
}
candidates.insert( candidates.end(), candidatesVector.begin(), candidatesVector.end() );
#if defined (LOG_CASCADE_STATISTIC)
logger.write();
#endif
return true;
}
bool CascadeClassifierImpl::ocl_detectSingleScale( InputArray _image, Size processingRectSize,
int yStep, double factor, Size sumSize0 )
{ {
int featureType = getFeatureType(); int featureType = getFeatureType();
std::vector<UMat> bufs; std::vector<UMat> bufs;
size_t globalsize[] = { processingRectSize.width/yStep, processingRectSize.height/yStep }; featureEvaluator->getUMats(bufs);
Size localsz = featureEvaluator->getLocalSize();
if( localsz.area() == 0 )
return false;
Size lbufSize = featureEvaluator->getLocalBufSize();
size_t localsize[] = { localsz.width, localsz.height };
const int grp_per_CU = 12;
size_t globalsize[] = { grp_per_CU*ocl::Device::getDefault().maxComputeUnits()*localsize[0], localsize[1] };
bool ok = false; bool ok = false;
ufacepos.create(1, MAX_FACES*3+1, CV_32S);
UMat ufacepos_count(ufacepos, Rect(0, 0, 1, 1));
ufacepos_count.setTo(Scalar::all(0));
if( ustages.empty() ) if( ustages.empty() )
{ {
copyVectorToUMat(data.stages, ustages); copyVectorToUMat(data.stages, ustages);
copyVectorToUMat(data.stumps, ustumps); if (!data.stumps.empty())
copyVectorToUMat(data.stumps, unodes);
else
copyVectorToUMat(data.nodes, unodes);
copyVectorToUMat(data.leaves, uleaves);
if( !data.subsets.empty() ) if( !data.subsets.empty() )
copyVectorToUMat(data.subsets, usubsets); copyVectorToUMat(data.subsets, usubsets);
} }
int nstages = (int)data.stages.size();
if( featureType == FeatureEvaluator::HAAR ) if( featureType == FeatureEvaluator::HAAR )
{ {
Ptr<HaarEvaluator> haar = featureEvaluator.dynamicCast<HaarEvaluator>(); Ptr<HaarEvaluator> haar = featureEvaluator.dynamicCast<HaarEvaluator>();
if( haar.empty() ) if( haar.empty() )
return false; return false;
haar->setImage(_image, data.origWinSize, sumSize0);
if( haarKernel.empty() ) if( haarKernel.empty() )
{ {
haarKernel.create("runHaarClassifierStump", ocl::objdetect::cascadedetect_oclsrc, ""); String opts;
if (lbufSize.area())
opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D SUM_BUF_SIZE=%d -D SUM_BUF_STEP=%d -D NODE_COUNT=%d",
localsz.width, localsz.height, lbufSize.area(), lbufSize.width, data.maxNodesPerTree);
else
opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D NODE_COUNT=%d",
localsz.width, localsz.height, data.maxNodesPerTree);
haarKernel.create("runHaarClassifier", ocl::objdetect::cascadedetect_oclsrc, opts);
if( haarKernel.empty() ) if( haarKernel.empty() )
return false; return false;
} }
haar->getUMats(bufs);
Rect normrect = haar->getNormRect(); Rect normrect = haar->getNormRect();
int sqofs = haar->getSquaresOffset();
int splitstage_ocl = 1;
haarKernel.args(ocl::KernelArg::ReadOnlyNoSize(bufs[0]), // sum haarKernel.args((int)scales.size(),
ocl::KernelArg::ReadOnlyNoSize(bufs[1]), // sqsum ocl::KernelArg::PtrReadOnly(bufs[0]), // scaleData
ocl::KernelArg::ReadOnlyNoSize(bufs[1]), // sum
ocl::KernelArg::PtrReadOnly(bufs[2]), // optfeatures ocl::KernelArg::PtrReadOnly(bufs[2]), // optfeatures
// cascade classifier // cascade classifier
(int)data.stages.size(), splitstage_ocl, nstages,
ocl::KernelArg::PtrReadOnly(ustages), ocl::KernelArg::PtrReadOnly(ustages),
ocl::KernelArg::PtrReadOnly(ustumps), ocl::KernelArg::PtrReadOnly(unodes),
ocl::KernelArg::PtrReadOnly(uleaves),
ocl::KernelArg::PtrWriteOnly(ufacepos), // positions ocl::KernelArg::PtrWriteOnly(ufacepos), // positions
processingRectSize, normrect, sqofs, data.origWinSize, (int)MAX_FACES);
yStep, (float)factor, ok = haarKernel.run(2, globalsize, localsize, true);
normrect, data.origWinSize, (int)MAX_FACES);
ok = haarKernel.run(2, globalsize, 0, true);
} }
else if( featureType == FeatureEvaluator::LBP ) else if( featureType == FeatureEvaluator::LBP )
{ {
if (data.maxNodesPerTree > 1)
return false;
Ptr<LBPEvaluator> lbp = featureEvaluator.dynamicCast<LBPEvaluator>(); Ptr<LBPEvaluator> lbp = featureEvaluator.dynamicCast<LBPEvaluator>();
if( lbp.empty() ) if( lbp.empty() )
return false; return false;
lbp->setImage(_image, data.origWinSize, sumSize0);
if( lbpKernel.empty() ) if( lbpKernel.empty() )
{ {
lbpKernel.create("runLBPClassifierStump", ocl::objdetect::cascadedetect_oclsrc, ""); String opts;
if (lbufSize.area())
opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D SUM_BUF_SIZE=%d -D SUM_BUF_STEP=%d",
localsz.width, localsz.height, lbufSize.area(), lbufSize.width);
else
opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d", localsz.width, localsz.height);
lbpKernel.create("runLBPClassifierStumpSimple", ocl::objdetect::cascadedetect_oclsrc, opts);
if( lbpKernel.empty() ) if( lbpKernel.empty() )
return false; return false;
} }
lbp->getUMats(bufs); int splitstage_ocl = 1;
int subsetSize = (data.ncategories + 31)/32; int subsetSize = (data.ncategories + 31)/32;
lbpKernel.args(ocl::KernelArg::ReadOnlyNoSize(bufs[0]), // sum lbpKernel.args((int)scales.size(),
ocl::KernelArg::PtrReadOnly(bufs[1]), // optfeatures ocl::KernelArg::PtrReadOnly(bufs[0]), // scaleData
ocl::KernelArg::ReadOnlyNoSize(bufs[1]), // sum
ocl::KernelArg::PtrReadOnly(bufs[2]), // optfeatures
// cascade classifier // cascade classifier
(int)data.stages.size(), splitstage_ocl, nstages,
ocl::KernelArg::PtrReadOnly(ustages), ocl::KernelArg::PtrReadOnly(ustages),
ocl::KernelArg::PtrReadOnly(ustumps), ocl::KernelArg::PtrReadOnly(unodes),
ocl::KernelArg::PtrReadOnly(usubsets), ocl::KernelArg::PtrReadOnly(usubsets),
subsetSize, subsetSize,
ocl::KernelArg::PtrWriteOnly(ufacepos), // positions ocl::KernelArg::PtrWriteOnly(ufacepos), // positions
processingRectSize,
yStep, (float)factor,
data.origWinSize, (int)MAX_FACES); data.origWinSize, (int)MAX_FACES);
ok = lbpKernel.run(2, globalsize, 0, true);
ok = lbpKernel.run(2, globalsize, localsize, true);
}
if( ok )
{
Mat facepos = ufacepos.getMat(ACCESS_READ);
const int* fptr = facepos.ptr<int>();
int nfaces = fptr[0];
nfaces = std::min(nfaces, (int)MAX_FACES);
for( int i = 0; i < nfaces; i++ )
{
const FeatureEvaluator::ScaleData& s = featureEvaluator->getScaleData(fptr[i*3 + 1]);
candidates.push_back(Rect(cvRound(fptr[i*3 + 2]*s.scale),
cvRound(fptr[i*3 + 3]*s.scale),
cvRound(data.origWinSize.width*s.scale),
cvRound(data.origWinSize.height*s.scale)));
}
} }
//CV_Assert(ok);
return ok; return ok;
} }
...@@ -1296,11 +1199,11 @@ void CascadeClassifierImpl::detectMultiScaleNoGrouping( InputArray _image, std:: ...@@ -1296,11 +1199,11 @@ void CascadeClassifierImpl::detectMultiScaleNoGrouping( InputArray _image, std::
double scaleFactor, Size minObjectSize, Size maxObjectSize, double scaleFactor, Size minObjectSize, Size maxObjectSize,
bool outputRejectLevels ) bool outputRejectLevels )
{ {
int featureType = getFeatureType();
Size imgsz = _image.size(); Size imgsz = _image.size();
int imgtype = _image.type();
Mat grayImage, imageBuffer; Mat grayImage;
UMat ugrayImage;
_InputArray gray;
candidates.clear(); candidates.clear();
rejectLevels.clear(); rejectLevels.clear();
...@@ -1309,120 +1212,86 @@ void CascadeClassifierImpl::detectMultiScaleNoGrouping( InputArray _image, std:: ...@@ -1309,120 +1212,86 @@ void CascadeClassifierImpl::detectMultiScaleNoGrouping( InputArray _image, std::
if( maxObjectSize.height == 0 || maxObjectSize.width == 0 ) if( maxObjectSize.height == 0 || maxObjectSize.width == 0 )
maxObjectSize = imgsz; maxObjectSize = imgsz;
bool use_ocl = ocl::useOpenCL() && bool use_ocl = tryOpenCL && ocl::useOpenCL() &&
(featureType == FeatureEvaluator::HAAR || featureEvaluator->getLocalSize().area() > 0 &&
featureType == FeatureEvaluator::LBP) &&
ocl::Device::getDefault().type() != ocl::Device::TYPE_CPU && ocl::Device::getDefault().type() != ocl::Device::TYPE_CPU &&
(data.minNodesPerTree == data.maxNodesPerTree) &&
!isOldFormatCascade() && !isOldFormatCascade() &&
data.isStumpBased() &&
maskGenerator.empty() && maskGenerator.empty() &&
!outputRejectLevels && !outputRejectLevels;
tryOpenCL;
if( !use_ocl ) /*if( use_ocl )
{ {
Mat image = _image.getMat(); if (_image.channels() > 1)
if (maskGenerator) cvtColor(_image, ugrayImage, COLOR_BGR2GRAY);
maskGenerator->initializeMask(image); else if (_image.isUMat())
ugrayImage = _image.getUMat();
grayImage = image;
if( CV_MAT_CN(imgtype) > 1 )
{
Mat temp;
cvtColor(grayImage, temp, COLOR_BGR2GRAY);
grayImage = temp;
}
imageBuffer.create(imgsz.height + 1, imgsz.width + 1, CV_8U);
}
else else
_image.copyTo(ugrayImage);
gray = ugrayImage;
}
else*/
{ {
UMat uimage = _image.getUMat(); if (_image.channels() > 1)
if( CV_MAT_CN(imgtype) > 1 ) cvtColor(_image, grayImage, COLOR_BGR2GRAY);
cvtColor(uimage, ugrayImage, COLOR_BGR2GRAY); else if (_image.isMat())
grayImage = _image.getMat();
else else
uimage.copyTo(ugrayImage); _image.copyTo(grayImage);
uimageBuffer.create(imgsz.height + 1, imgsz.width + 1, CV_8U); gray = grayImage;
} }
Size sumSize0((imgsz.width + SUM_ALIGN) & -SUM_ALIGN, imgsz.height+1); std::vector<float> scales;
scales.reserve(1024);
if( use_ocl )
{
ufacepos.create(1, MAX_FACES*4 + 1, CV_32S);
UMat ufacecount(ufacepos, Rect(0,0,1,1));
ufacecount.setTo(Scalar::all(0));
}
for( double factor = 1; ; factor *= scaleFactor ) for( double factor = 1; ; factor *= scaleFactor )
{ {
Size originalWindowSize = getOriginalWindowSize(); Size originalWindowSize = getOriginalWindowSize();
Size windowSize( cvRound(originalWindowSize.width*factor), cvRound(originalWindowSize.height*factor) ); Size windowSize( cvRound(originalWindowSize.width*factor), cvRound(originalWindowSize.height*factor) );
Size scaledImageSize( cvRound( imgsz.width/factor ), cvRound( imgsz.height/factor ) ); if( windowSize.width > maxObjectSize.width || windowSize.height > maxObjectSize.height ||
Size processingRectSize( scaledImageSize.width - originalWindowSize.width, windowSize.width > imgsz.width || windowSize.height > imgsz.height )
scaledImageSize.height - originalWindowSize.height );
if( processingRectSize.width <= 0 || processingRectSize.height <= 0 )
break;
if( windowSize.width > maxObjectSize.width || windowSize.height > maxObjectSize.height )
break; break;
if( windowSize.width < minObjectSize.width || windowSize.height < minObjectSize.height ) if( windowSize.width < minObjectSize.width || windowSize.height < minObjectSize.height )
continue; continue;
scales.push_back((float)factor);
int yStep;
if( getFeatureType() == cv::FeatureEvaluator::HOG )
{
yStep = 4;
} }
else
{
yStep = factor > 2. ? 1 : 2;
}
if( use_ocl )
{
UMat uscaledImage(uimageBuffer, Rect(0, 0, scaledImageSize.width, scaledImageSize.height));
resize( ugrayImage, uscaledImage, scaledImageSize, 0, 0, INTER_LINEAR );
if( ocl_detectSingleScale( uscaledImage, processingRectSize, yStep, factor, sumSize0 ) ) if( !featureEvaluator->setImage(gray, scales) )
continue;
/////// if the OpenCL branch has been executed but failed, fall back to CPU: /////
tryOpenCL = false; // for this cascade do not try OpenCL anymore
// since we may already have some partial results from OpenCL code (unlikely, but still),
// we just recursively call the function again, but with tryOpenCL==false it will
// go with CPU route, so there is no infinite recursion
detectMultiScaleNoGrouping( _image, candidates, rejectLevels, levelWeights,
scaleFactor, minObjectSize, maxObjectSize,
outputRejectLevels);
return; return;
}
else
{
Mat scaledImage( scaledImageSize, CV_8U, imageBuffer.data );
resize( grayImage, scaledImage, scaledImageSize, 0, 0, INTER_LINEAR );
if( !detectSingleScale( scaledImage, processingRectSize, yStep, factor, candidates, // OpenCL code
rejectLevels, levelWeights, sumSize0, outputRejectLevels ) ) if( use_ocl && ocl_detectMultiScaleNoGrouping( scales, candidates ))
break; return;
} tryOpenCL = false;
}
if( use_ocl && tryOpenCL ) // CPU code
featureEvaluator->getMats();
{ {
Mat facepos = ufacepos.getMat(ACCESS_READ); Mat currentMask;
const int* fptr = facepos.ptr<int>(); if (maskGenerator)
int i, nfaces = fptr[0]; currentMask = maskGenerator->generateMask(gray.getMat());
for( i = 0; i < nfaces; i++ )
size_t i, nscales = scales.size();
cv::AutoBuffer<int> stripeSizeBuf(nscales);
int* stripeSizes = stripeSizeBuf;
const FeatureEvaluator::ScaleData* s = &featureEvaluator->getScaleData(0);
Size szw = s->getWorkingSize(data.origWinSize);
int nstripes = cvCeil(szw.width/32.);
for( i = 0; i < nscales; i++ )
{ {
candidates.push_back(Rect(fptr[i*4+1], fptr[i*4+2], fptr[i*4+3], fptr[i*4+4])); szw = s[i].getWorkingSize(data.origWinSize);
stripeSizes[i] = std::max((szw.height/s[i].ystep + nstripes-1)/nstripes, 1)*s[i].ystep;
} }
CascadeClassifierInvoker invoker(*this, (int)nscales, nstripes, s, stripeSizes,
candidates, rejectLevels, levelWeights,
outputRejectLevels, currentMask, &mtx);
parallel_for_(Range(0, nstripes), invoker);
} }
} }
void CascadeClassifierImpl::detectMultiScale( InputArray _image, std::vector<Rect>& objects, void CascadeClassifierImpl::detectMultiScale( InputArray _image, std::vector<Rect>& objects,
std::vector<int>& rejectLevels, std::vector<int>& rejectLevels,
std::vector<double>& levelWeights, std::vector<double>& levelWeights,
...@@ -1462,10 +1331,9 @@ void CascadeClassifierImpl::detectMultiScale( InputArray _image, std::vector<Rec ...@@ -1462,10 +1331,9 @@ void CascadeClassifierImpl::detectMultiScale( InputArray _image, std::vector<Rec
double scaleFactor, int minNeighbors, double scaleFactor, int minNeighbors,
int flags, Size minObjectSize, Size maxObjectSize) int flags, Size minObjectSize, Size maxObjectSize)
{ {
Mat image = _image.getMat();
std::vector<int> fakeLevels; std::vector<int> fakeLevels;
std::vector<double> fakeWeights; std::vector<double> fakeWeights;
detectMultiScale( image, objects, fakeLevels, fakeWeights, scaleFactor, detectMultiScale( _image, objects, fakeLevels, fakeWeights, scaleFactor,
minNeighbors, flags, minObjectSize, maxObjectSize ); minNeighbors, flags, minObjectSize, maxObjectSize );
} }
...@@ -1550,6 +1418,7 @@ bool CascadeClassifierImpl::Data::read(const FileNode &root) ...@@ -1550,6 +1418,7 @@ bool CascadeClassifierImpl::Data::read(const FileNode &root)
stumps.clear(); stumps.clear();
FileNodeIterator it = fn.begin(), it_end = fn.end(); FileNodeIterator it = fn.begin(), it_end = fn.end();
minNodesPerTree = INT_MAX;
maxNodesPerTree = 0; maxNodesPerTree = 0;
for( int si = 0; it != it_end; si++, ++it ) for( int si = 0; it != it_end; si++, ++it )
...@@ -1576,6 +1445,7 @@ bool CascadeClassifierImpl::Data::read(const FileNode &root) ...@@ -1576,6 +1445,7 @@ bool CascadeClassifierImpl::Data::read(const FileNode &root)
DTree tree; DTree tree;
tree.nodeCount = (int)internalNodes.size()/nodeStep; tree.nodeCount = (int)internalNodes.size()/nodeStep;
minNodesPerTree = std::min(minNodesPerTree, tree.nodeCount);
maxNodesPerTree = std::max(maxNodesPerTree, tree.nodeCount); maxNodesPerTree = std::max(maxNodesPerTree, tree.nodeCount);
classifiers.push_back(tree); classifiers.push_back(tree);
...@@ -1613,7 +1483,7 @@ bool CascadeClassifierImpl::Data::read(const FileNode &root) ...@@ -1613,7 +1483,7 @@ bool CascadeClassifierImpl::Data::read(const FileNode &root)
} }
} }
if( isStumpBased() ) if( maxNodesPerTree == 1 )
{ {
int nodeOfs = 0, leafOfs = 0; int nodeOfs = 0, leafOfs = 0;
size_t nstages = stages.size(); size_t nstages = stages.size();
...@@ -1641,7 +1511,8 @@ bool CascadeClassifierImpl::read_(const FileNode& root) ...@@ -1641,7 +1511,8 @@ bool CascadeClassifierImpl::read_(const FileNode& root)
haarKernel = ocl::Kernel(); haarKernel = ocl::Kernel();
lbpKernel = ocl::Kernel(); lbpKernel = ocl::Kernel();
ustages.release(); ustages.release();
ustumps.release(); unodes.release();
uleaves.release();
if( !data.read(root) ) if( !data.read(root) )
return false; return false;
...@@ -1651,7 +1522,7 @@ bool CascadeClassifierImpl::read_(const FileNode& root) ...@@ -1651,7 +1522,7 @@ bool CascadeClassifierImpl::read_(const FileNode& root)
if( fn.empty() ) if( fn.empty() )
return false; return false;
return featureEvaluator->read(fn); return featureEvaluator->read(fn, data.origWinSize);
} }
template<> void DefaultDeleter<CvHaarClassifierCascade>::operator ()(CvHaarClassifierCascade* obj) const template<> void DefaultDeleter<CvHaarClassifierCascade>::operator ()(CvHaarClassifierCascade* obj) const
......
...@@ -3,6 +3,72 @@ ...@@ -3,6 +3,72 @@
namespace cv namespace cv
{ {
class FeatureEvaluator
{
public:
enum
{
HAAR = 0,
LBP = 1,
HOG = 2
};
struct ScaleData
{
ScaleData() { scale = 0.f; layer_ofs = ystep = 0; }
Size getWorkingSize(Size winSize) const
{
return Size(std::max(szi.width - winSize.width, 0),
std::max(szi.height - winSize.height, 0));
}
float scale;
Size szi;
int layer_ofs, ystep;
};
virtual ~FeatureEvaluator();
virtual bool read(const FileNode& node, Size origWinSize);
virtual Ptr<FeatureEvaluator> clone() const;
virtual int getFeatureType() const;
int getNumChannels() const { return nchannels; }
virtual bool setImage(InputArray img, const std::vector<float>& scales);
virtual bool setWindow(Point p, int scaleIdx);
const ScaleData& getScaleData(int scaleIdx) const
{
CV_Assert( 0 <= scaleIdx && scaleIdx < (int)scaleData->size());
return scaleData->at(scaleIdx);
}
virtual void getUMats(std::vector<UMat>& bufs);
virtual void getMats();
Size getLocalSize() const { return localSize; }
Size getLocalBufSize() const { return lbufSize; }
virtual float calcOrd(int featureIdx) const;
virtual int calcCat(int featureIdx) const;
static Ptr<FeatureEvaluator> create(int type);
protected:
enum { SBUF_VALID=1, USBUF_VALID=2 };
int sbufFlag;
bool updateScaleData( Size imgsz, const std::vector<float>& _scales );
virtual void computeChannels( int, InputArray ) {}
virtual void computeOptFeatures() {}
Size origWinSize, sbufSize, localSize, lbufSize;
int nchannels;
Mat sbuf, rbuf;
UMat urbuf, usbuf, ufbuf, uscaleData;
Ptr<std::vector<ScaleData> > scaleData;
};
class CascadeClassifierImpl : public BaseCascadeClassifier class CascadeClassifierImpl : public BaseCascadeClassifier
{ {
public: public:
...@@ -54,9 +120,8 @@ protected: ...@@ -54,9 +120,8 @@ protected:
int yStep, double factor, std::vector<Rect>& candidates, int yStep, double factor, std::vector<Rect>& candidates,
std::vector<int>& rejectLevels, std::vector<double>& levelWeights, std::vector<int>& rejectLevels, std::vector<double>& levelWeights,
Size sumSize0, bool outputRejectLevels = false ); Size sumSize0, bool outputRejectLevels = false );
bool ocl_detectSingleScale( InputArray image, Size processingRectSize, bool ocl_detectMultiScaleNoGrouping( const std::vector<float>& scales,
int yStep, double factor, Size sumSize0 ); std::vector<Rect>& candidates );
void detectMultiScaleNoGrouping( InputArray image, std::vector<Rect>& candidates, void detectMultiScaleNoGrouping( InputArray image, std::vector<Rect>& candidates,
std::vector<int>& rejectLevels, std::vector<double>& levelWeights, std::vector<int>& rejectLevels, std::vector<double>& levelWeights,
...@@ -72,6 +137,7 @@ protected: ...@@ -72,6 +137,7 @@ protected:
}; };
friend class CascadeClassifierInvoker; friend class CascadeClassifierInvoker;
friend class SparseCascadeClassifierInvoker;
template<class FEval> template<class FEval>
friend int predictOrdered( CascadeClassifierImpl& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight); friend int predictOrdered( CascadeClassifierImpl& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight);
...@@ -85,7 +151,7 @@ protected: ...@@ -85,7 +151,7 @@ protected:
template<class FEval> template<class FEval>
friend int predictCategoricalStump( CascadeClassifierImpl& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight); friend int predictCategoricalStump( CascadeClassifierImpl& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight);
int runAt( Ptr<FeatureEvaluator>& feval, Point pt, double& weight ); int runAt( Ptr<FeatureEvaluator>& feval, Point pt, int scaleIdx, double& weight );
class Data class Data
{ {
...@@ -126,12 +192,10 @@ protected: ...@@ -126,12 +192,10 @@ protected:
bool read(const FileNode &node); bool read(const FileNode &node);
bool isStumpBased() const { return maxNodesPerTree == 1; }
int stageType; int stageType;
int featureType; int featureType;
int ncategories; int ncategories;
int maxNodesPerTree; int minNodesPerTree, maxNodesPerTree;
Size origWinSize; Size origWinSize;
std::vector<Stage> stages; std::vector<Stage> stages;
...@@ -148,7 +212,7 @@ protected: ...@@ -148,7 +212,7 @@ protected:
Ptr<MaskGenerator> maskGenerator; Ptr<MaskGenerator> maskGenerator;
UMat ugrayImage, uimageBuffer; UMat ugrayImage, uimageBuffer;
UMat ufacepos, ustages, ustumps, usubsets; UMat ufacepos, ustages, unodes, uleaves, usubsets;
ocl::Kernel haarKernel, lbpKernel; ocl::Kernel haarKernel, lbpKernel;
bool tryOpenCL; bool tryOpenCL;
...@@ -268,7 +332,6 @@ public: ...@@ -268,7 +332,6 @@ public:
enum { RECT_NUM = Feature::RECT_NUM }; enum { RECT_NUM = Feature::RECT_NUM };
float calc( const int* pwin ) const; float calc( const int* pwin ) const;
void setOffsets( const Feature& _f, int step, int tofs ); void setOffsets( const Feature& _f, int step, int tofs );
int ofs[RECT_NUM][4]; int ofs[RECT_NUM][4];
...@@ -278,35 +341,34 @@ public: ...@@ -278,35 +341,34 @@ public:
HaarEvaluator(); HaarEvaluator();
virtual ~HaarEvaluator(); virtual ~HaarEvaluator();
virtual bool read( const FileNode& node ); virtual bool read( const FileNode& node, Size origWinSize);
virtual Ptr<FeatureEvaluator> clone() const; virtual Ptr<FeatureEvaluator> clone() const;
virtual int getFeatureType() const { return FeatureEvaluator::HAAR; } virtual int getFeatureType() const { return FeatureEvaluator::HAAR; }
virtual bool setImage(InputArray, Size origWinSize, Size sumSize); virtual bool setWindow(Point p, int scaleIdx);
virtual bool setWindow(Point pt); Rect getNormRect() const;
virtual Rect getNormRect() const; int getSquaresOffset() const;
virtual void getUMats(std::vector<UMat>& bufs);
double operator()(int featureIdx) const float operator()(int featureIdx) const
{ return optfeaturesPtr[featureIdx].calc(pwin) * varianceNormFactor; } { return optfeaturesPtr[featureIdx].calc(pwin) * varianceNormFactor; }
virtual double calcOrd(int featureIdx) const virtual float calcOrd(int featureIdx) const
{ return (*this)(featureIdx); } { return (*this)(featureIdx); }
protected: protected:
Size origWinSize, sumSize0; virtual void computeChannels( int i, InputArray img );
virtual void computeOptFeatures();
Ptr<std::vector<Feature> > features; Ptr<std::vector<Feature> > features;
Ptr<std::vector<OptFeature> > optfeatures; Ptr<std::vector<OptFeature> > optfeatures;
OptFeature* optfeaturesPtr; // optimization Ptr<std::vector<OptFeature> > optfeatures_lbuf;
bool hasTiltedFeatures; bool hasTiltedFeatures;
Mat sum0, sum, sqsum0, sqsum; int tofs, sqofs;
UMat usum0, usum, usqsum0, usqsum, ufbuf; Vec4i nofs;
Rect normrect; Rect normrect;
int nofs[4];
const int* pwin; const int* pwin;
double varianceNormFactor; OptFeature* optfeaturesPtr; // optimization
float varianceNormFactor;
}; };
inline HaarEvaluator::Feature :: Feature() inline HaarEvaluator::Feature :: Feature()
...@@ -336,28 +398,6 @@ inline float HaarEvaluator::OptFeature :: calc( const int* ptr ) const ...@@ -336,28 +398,6 @@ inline float HaarEvaluator::OptFeature :: calc( const int* ptr ) const
return ret; return ret;
} }
inline void HaarEvaluator::OptFeature :: setOffsets( const Feature& _f, int step, int tofs )
{
weight[0] = _f.rect[0].weight;
weight[1] = _f.rect[1].weight;
weight[2] = _f.rect[2].weight;
Rect r2 = weight[2] > 0 ? _f.rect[2].r : Rect(0,0,0,0);
if (_f.tilted)
{
CV_TILTED_OFS( ofs[0][0], ofs[0][1], ofs[0][2], ofs[0][3], tofs, _f.rect[0].r, step );
CV_TILTED_OFS( ofs[1][0], ofs[1][1], ofs[1][2], ofs[1][3], tofs, _f.rect[1].r, step );
CV_TILTED_PTRS( ofs[2][0], ofs[2][1], ofs[2][2], ofs[2][3], tofs, r2, step );
}
else
{
CV_SUM_OFS( ofs[0][0], ofs[0][1], ofs[0][2], ofs[0][3], 0, _f.rect[0].r, step );
CV_SUM_OFS( ofs[1][0], ofs[1][1], ofs[1][2], ofs[1][3], 0, _f.rect[1].r, step );
CV_SUM_OFS( ofs[2][0], ofs[2][1], ofs[2][2], ofs[2][3], 0, r2, step );
}
}
//---------------------------------------------- LBPEvaluator ------------------------------------- //---------------------------------------------- LBPEvaluator -------------------------------------
class LBPEvaluator : public FeatureEvaluator class LBPEvaluator : public FeatureEvaluator
...@@ -386,27 +426,25 @@ public: ...@@ -386,27 +426,25 @@ public:
LBPEvaluator(); LBPEvaluator();
virtual ~LBPEvaluator(); virtual ~LBPEvaluator();
virtual bool read( const FileNode& node ); virtual bool read( const FileNode& node, Size origWinSize );
virtual Ptr<FeatureEvaluator> clone() const; virtual Ptr<FeatureEvaluator> clone() const;
virtual int getFeatureType() const { return FeatureEvaluator::LBP; } virtual int getFeatureType() const { return FeatureEvaluator::LBP; }
virtual bool setImage(InputArray image, Size _origWinSize, Size); virtual bool setWindow(Point p, int scaleIdx);
virtual bool setWindow(Point pt);
virtual void getUMats(std::vector<UMat>& bufs);
int operator()(int featureIdx) const int operator()(int featureIdx) const
{ return optfeaturesPtr[featureIdx].calc(pwin); } { return optfeaturesPtr[featureIdx].calc(pwin); }
virtual int calcCat(int featureIdx) const virtual int calcCat(int featureIdx) const
{ return (*this)(featureIdx); } { return (*this)(featureIdx); }
protected: protected:
Size origWinSize, sumSize0; virtual void computeChannels( int i, InputArray img );
virtual void computeOptFeatures();
Ptr<std::vector<Feature> > features; Ptr<std::vector<Feature> > features;
Ptr<std::vector<OptFeature> > optfeatures; Ptr<std::vector<OptFeature> > optfeatures;
Ptr<std::vector<OptFeature> > optfeatures_lbuf;
OptFeature* optfeaturesPtr; // optimization OptFeature* optfeaturesPtr; // optimization
Mat sum0, sum;
UMat usum0, usum, ufbuf;
const int* pwin; const int* pwin;
}; };
...@@ -436,98 +474,6 @@ inline int LBPEvaluator::OptFeature :: calc( const int* p ) const ...@@ -436,98 +474,6 @@ inline int LBPEvaluator::OptFeature :: calc( const int* p ) const
(CALC_SUM_OFS_( ofs[4], ofs[5], ofs[8], ofs[9], p ) >= cval ? 1 : 0); (CALC_SUM_OFS_( ofs[4], ofs[5], ofs[8], ofs[9], p ) >= cval ? 1 : 0);
} }
inline void LBPEvaluator::OptFeature :: setOffsets( const Feature& _f, int step )
{
Rect tr = _f.rect;
CV_SUM_OFS( ofs[0], ofs[1], ofs[4], ofs[5], 0, tr, step );
tr.x += 2*_f.rect.width;
CV_SUM_OFS( ofs[2], ofs[3], ofs[6], ofs[7], 0, tr, step );
tr.y += 2*_f.rect.height;
CV_SUM_OFS( ofs[10], ofs[11], ofs[14], ofs[15], 0, tr, step );
tr.x -= 2*_f.rect.width;
CV_SUM_OFS( ofs[8], ofs[9], ofs[12], ofs[13], 0, tr, step );
}
//---------------------------------------------- HOGEvaluator -------------------------------------------
class HOGEvaluator : public FeatureEvaluator
{
public:
struct Feature
{
Feature();
float calc( int offset ) const;
void updatePtrs( const std::vector<Mat>& _hist, const Mat &_normSum );
bool read( const FileNode& node );
enum { CELL_NUM = 4, BIN_NUM = 9 };
Rect rect[CELL_NUM];
int featComponent; //component index from 0 to 35
const float* pF[4]; //for feature calculation
const float* pN[4]; //for normalization calculation
};
HOGEvaluator();
virtual ~HOGEvaluator();
virtual bool read( const FileNode& node );
virtual Ptr<FeatureEvaluator> clone() const;
virtual int getFeatureType() const { return FeatureEvaluator::HOG; }
virtual bool setImage( InputArray image, Size winSize, Size );
virtual bool setWindow( Point pt );
double operator()(int featureIdx) const
{
return featuresPtr[featureIdx].calc(offset);
}
virtual double calcOrd( int featureIdx ) const
{
return (*this)(featureIdx);
}
private:
virtual void integralHistogram( const Mat& srcImage, std::vector<Mat> &histogram, Mat &norm, int nbins ) const;
Size origWinSize;
Ptr<std::vector<Feature> > features;
Feature* featuresPtr;
std::vector<Mat> hist;
Mat normSum;
int offset;
};
inline HOGEvaluator::Feature :: Feature()
{
rect[0] = rect[1] = rect[2] = rect[3] = Rect();
pF[0] = pF[1] = pF[2] = pF[3] = 0;
pN[0] = pN[1] = pN[2] = pN[3] = 0;
featComponent = 0;
}
inline float HOGEvaluator::Feature :: calc( int _offset ) const
{
float res = CALC_SUM(pF, _offset);
float normFactor = CALC_SUM(pN, _offset);
res = (res > 0.001f) ? (res / ( normFactor + 0.001f) ) : 0.f;
return res;
}
inline void HOGEvaluator::Feature :: updatePtrs( const std::vector<Mat> &_hist, const Mat &_normSum )
{
int binIdx = featComponent % BIN_NUM;
int cellIdx = featComponent / BIN_NUM;
Rect normRect = Rect( rect[0].x, rect[0].y, 2*rect[0].width, 2*rect[0].height );
const float* featBuf = (const float*)_hist[binIdx].data;
size_t featStep = _hist[0].step / sizeof(featBuf[0]);
const float* normBuf = (const float*)_normSum.data;
size_t normStep = _normSum.step / sizeof(normBuf[0]);
CV_SUM_PTRS( pF[0], pF[1], pF[2], pF[3], featBuf, rect[cellIdx], featStep );
CV_SUM_PTRS( pN[0], pN[1], pN[2], pN[3], normBuf, normRect, normStep );
}
//---------------------------------------------- predictor functions ------------------------------------- //---------------------------------------------- predictor functions -------------------------------------
...@@ -662,11 +608,7 @@ inline int predictCategoricalStump( CascadeClassifierImpl& cascade, ...@@ -662,11 +608,7 @@ inline int predictCategoricalStump( CascadeClassifierImpl& cascade,
const CascadeClassifierImpl::Data::Stump* cascadeStumps = &cascade.data.stumps[0]; const CascadeClassifierImpl::Data::Stump* cascadeStumps = &cascade.data.stumps[0];
const CascadeClassifierImpl::Data::Stage* cascadeStages = &cascade.data.stages[0]; const CascadeClassifierImpl::Data::Stage* cascadeStages = &cascade.data.stages[0];
#ifdef HAVE_TEGRA_OPTIMIZATION float tmp = 0;
float tmp = 0; // float accumulator -- float operations are quicker
#else
double tmp = 0;
#endif
for( int si = 0; si < nstages; si++ ) for( int si = 0; si < nstages; si++ )
{ {
const CascadeClassifierImpl::Data::Stage& stage = cascadeStages[si]; const CascadeClassifierImpl::Data::Stage& stage = cascadeStages[si];
......
///////////////////////////// OpenCL kernels for face detection ////////////////////////////// ///////////////////////////// OpenCL kernels for face detection //////////////////////////////
////////////////////////////// see the opencv/doc/license.txt /////////////////////////////// ////////////////////////////// see the opencv/doc/license.txt ///////////////////////////////
//
// the code has been derived from the OpenCL Haar cascade kernel by
//
// Niko Li, newlife20080214@gmail.com
// Wang Weiyan, wangweiyanster@gmail.com
// Jia Haipeng, jiahaipeng95@gmail.com
// Nathan, liujun@multicorewareinc.com
// Peng Xiao, pengxiao@outlook.com
// Erping Pang, erping@multicorewareinc.com
//
typedef struct __attribute__((aligned(4))) OptHaarFeature typedef struct __attribute__((aligned(4))) OptHaarFeature
{ {
int4 ofs[3] __attribute__((aligned (4))); int4 ofs[3] __attribute__((aligned (4)));
...@@ -20,6 +32,12 @@ typedef struct __attribute__((aligned(4))) Stump ...@@ -20,6 +32,12 @@ typedef struct __attribute__((aligned(4))) Stump
} }
Stump; Stump;
typedef struct __attribute__((aligned(4))) Node
{
int4 n __attribute__((aligned (4)));
}
Node;
typedef struct __attribute__((aligned (4))) Stage typedef struct __attribute__((aligned (4))) Stage
{ {
int first __attribute__((aligned (4))); int first __attribute__((aligned (4)));
...@@ -28,48 +46,133 @@ typedef struct __attribute__((aligned (4))) Stage ...@@ -28,48 +46,133 @@ typedef struct __attribute__((aligned (4))) Stage
} }
Stage; Stage;
__kernel void runHaarClassifierStump( typedef struct __attribute__((aligned (4))) ScaleData
{
float scale __attribute__((aligned (4)));
int szi_width __attribute__((aligned (4)));
int szi_height __attribute__((aligned (4)));
int layer_ofs __attribute__((aligned (4)));
int ystep __attribute__((aligned (4)));
}
ScaleData;
#ifndef SUM_BUF_SIZE
#define SUM_BUF_SIZE 0
#endif
#ifndef NODE_COUNT
#define NODE_COUNT 1
#endif
__kernel __attribute__((reqd_work_group_size(LOCAL_SIZE_X,LOCAL_SIZE_Y,1)))
void runHaarClassifier(
int nscales, __global const ScaleData* scaleData,
__global const int* sum, __global const int* sum,
int sumstep, int sumoffset, int _sumstep, int sumoffset,
__global const int* sqsum,
int sqsumstep, int sqsumoffset,
__global const OptHaarFeature* optfeatures, __global const OptHaarFeature* optfeatures,
int nstages, int splitstage, int nstages,
__global const Stage* stages, __global const Stage* stages,
__global const Stump* stumps, __global const Node* nodes,
__global const float* leaves0,
volatile __global int* facepos, volatile __global int* facepos,
int2 imgsize, int xyscale, float factor, int4 normrect, int sqofs, int2 windowsize, int maxFaces)
int4 normrect, int2 windowsize, int maxFaces)
{ {
int ix = get_global_id(0)*xyscale; int lx = get_local_id(0);
int iy = get_global_id(1)*xyscale; int ly = get_local_id(1);
sumstep /= sizeof(int); int groupIdx = get_group_id(0);
sqsumstep /= sizeof(int); int i, ngroups = get_global_size(0)/LOCAL_SIZE_X;
int scaleIdx, tileIdx, stageIdx;
int sumstep = (int)(_sumstep/sizeof(int));
int4 nofs0 = (int4)(mad24(normrect.y, sumstep, normrect.x),
mad24(normrect.y, sumstep, normrect.x + normrect.z),
mad24(normrect.y + normrect.w, sumstep, normrect.x),
mad24(normrect.y + normrect.w, sumstep, normrect.x + normrect.z));
int normarea = normrect.z * normrect.w;
float invarea = 1.f/normarea;
int lidx = ly*LOCAL_SIZE_X + lx;
#if SUM_BUF_SIZE > 0
int4 nofs = (int4)(mad24(normrect.y, SUM_BUF_STEP, normrect.x),
mad24(normrect.y, SUM_BUF_STEP, normrect.x + normrect.z),
mad24(normrect.y + normrect.w, SUM_BUF_STEP, normrect.x),
mad24(normrect.y + normrect.w, SUM_BUF_STEP, normrect.x + normrect.z));
#else
int4 nofs = nofs0;
#endif
#define LOCAL_SIZE (LOCAL_SIZE_X*LOCAL_SIZE_Y)
__local int lstore[SUM_BUF_SIZE + LOCAL_SIZE*5/2+1];
#if SUM_BUF_SIZE > 0
__local int* ibuf = lstore;
__local int* lcount = ibuf + SUM_BUF_SIZE;
#else
__local int* lcount = lstore;
#endif
__local float* lnf = (__local float*)(lcount + 1);
__local float* lpartsum = lnf + LOCAL_SIZE;
__local short* lbuf = (__local short*)(lpartsum + LOCAL_SIZE);
if( ix < imgsize.x && iy < imgsize.y ) for( scaleIdx = nscales-1; scaleIdx >= 0; scaleIdx-- )
{ {
int stageIdx; __global const ScaleData* s = scaleData + scaleIdx;
__global const Stump* stump = stumps; int ystep = s->ystep;
int2 worksize = (int2)(max(s->szi_width - windowsize.x, 0), max(s->szi_height - windowsize.y, 0));
int2 ntiles = (int2)((worksize.x + LOCAL_SIZE_X-1)/LOCAL_SIZE_X,
(worksize.y + LOCAL_SIZE_Y-1)/LOCAL_SIZE_Y);
int totalTiles = ntiles.x*ntiles.y;
__global const int* psum = sum + mad24(iy, sumstep, ix); for( tileIdx = groupIdx; tileIdx < totalTiles; tileIdx += ngroups )
__global const int* pnsum = psum + mad24(normrect.y, sumstep, normrect.x); {
int normarea = normrect.z * normrect.w; int ix0 = (tileIdx % ntiles.x)*LOCAL_SIZE_X;
float invarea = 1.f/normarea; int iy0 = (tileIdx / ntiles.x)*LOCAL_SIZE_Y;
float sval = (pnsum[0] - pnsum[normrect.z] - pnsum[mul24(normrect.w, sumstep)] + int ix = lx, iy = ly;
pnsum[mad24(normrect.w, sumstep, normrect.z)])*invarea; __global const int* psum0 = sum + mad24(iy0, sumstep, ix0) + s->layer_ofs;
float sqval = (sqsum[mad24(iy + normrect.y, sqsumstep, ix + normrect.x)])*invarea; __global const int* psum1 = psum0 + mad24(iy, sumstep, ix);
if( ix0 >= worksize.x || iy0 >= worksize.y )
continue;
#if SUM_BUF_SIZE > 0
for( i = lidx*4; i < SUM_BUF_SIZE; i += LOCAL_SIZE_X*LOCAL_SIZE_Y*4 )
{
int dy = i/SUM_BUF_STEP, dx = i - dy*SUM_BUF_STEP;
vstore4(vload4(0, psum0 + mad24(dy, sumstep, dx)), 0, ibuf+i);
}
barrier(CLK_LOCAL_MEM_FENCE);
#endif
if( lidx == 0 )
lcount[0] = 0;
barrier(CLK_LOCAL_MEM_FENCE);
if( ix0 + ix < worksize.x && iy0 + iy < worksize.y )
{
#if NODE_COUNT==1
__global const Stump* stump = (__global const Stump*)nodes;
#else
__global const Node* node = nodes;
__global const float* leaves = leaves0;
#endif
#if SUM_BUF_SIZE > 0
__local const int* psum = ibuf + mad24(iy, SUM_BUF_STEP, ix);
#else
__global const int* psum = psum1;
#endif
__global const float* psqsum = (__global const float*)(psum1 + sqofs);
float sval = (psum[nofs.x] - psum[nofs.y] - psum[nofs.z] + psum[nofs.w])*invarea;
float sqval = (psqsum[nofs0.x] - psqsum[nofs0.y] - psqsum[nofs0.z] + psqsum[nofs0.w])*invarea;
float nf = (float)normarea * sqrt(max(sqval - sval * sval, 0.f)); float nf = (float)normarea * sqrt(max(sqval - sval * sval, 0.f));
nf = nf > 0 ? nf : 1.f; nf = nf > 0 ? nf : 1.f;
for( stageIdx = 0; stageIdx < nstages; stageIdx++ ) for( stageIdx = 0; stageIdx < splitstage; stageIdx++ )
{ {
int i, ntrees = stages[stageIdx].ntrees; int ntrees = stages[stageIdx].ntrees;
float s = 0.f; float s = 0.f;
for( i = 0; i < ntrees; i++, stump++ ) #if NODE_COUNT==1
for( i = 0; i < ntrees; i++ )
{ {
float4 st = stump->st; float4 st = stump[i].st;
__global const OptHaarFeature* f = optfeatures + as_int(st.x); __global const OptHaarFeature* f = optfeatures + as_int(st.x);
float4 weight = f->weight; float4 weight = f->weight;
...@@ -85,64 +188,352 @@ __kernel void runHaarClassifierStump( ...@@ -85,64 +188,352 @@ __kernel void runHaarClassifierStump(
s += (sval < st.y*nf) ? st.z : st.w; s += (sval < st.y*nf) ? st.z : st.w;
} }
stump += ntrees;
#else
for( i = 0; i < ntrees; i++, node += NODE_COUNT, leaves += NODE_COUNT+1 )
{
int idx = 0;
do
{
int4 n = node[idx].n;
__global const OptHaarFeature* f = optfeatures + n.x;
float4 weight = f->weight;
int4 ofs = f->ofs[0];
sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x;
ofs = f->ofs[1];
sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.y;
if( weight.z > 0 )
{
ofs = f->ofs[2];
sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.z;
}
idx = (sval < as_float(n.y)*nf) ? n.z : n.w;
}
while(idx > 0);
s += leaves[-idx];
}
#endif
if( s < stages[stageIdx].threshold ) if( s < stages[stageIdx].threshold )
break; break;
} }
if( stageIdx == splitstage && (ystep == 1 || ((ix | iy) & 1) == 0) )
{
int count = atomic_inc(lcount);
lbuf[count] = (int)(ix | (iy << 8));
lnf[count] = nf;
}
}
for( stageIdx = splitstage; stageIdx < nstages; stageIdx++ )
{
int nrects = lcount[0];
barrier(CLK_LOCAL_MEM_FENCE);
if( nrects == 0 )
break;
if( lidx == 0 )
lcount[0] = 0;
{
#if NODE_COUNT == 1
__global const Stump* stump = (__global const Stump*)nodes + stages[stageIdx].first;
#else
__global const Node* node = nodes + stages[stageIdx].first*NODE_COUNT;
__global const float* leaves = leaves0 + stages[stageIdx].first*(NODE_COUNT+1);
#endif
int nparts = LOCAL_SIZE / nrects;
int ntrees = stages[stageIdx].ntrees;
int ntrees_p = (ntrees + nparts - 1)/nparts;
int nr = lidx / nparts;
int partidx = -1, idxval = 0;
float partsum = 0.f, nf = 0.f;
if( nr < nrects )
{
partidx = lidx % nparts;
idxval = lbuf[nr];
nf = lnf[nr];
{
int ntrees0 = ntrees_p*partidx;
int ntrees1 = min(ntrees0 + ntrees_p, ntrees);
int ix1 = idxval & 255, iy1 = idxval >> 8;
#if SUM_BUF_SIZE > 0
__local const int* psum = ibuf + mad24(iy1, SUM_BUF_STEP, ix1);
#else
__global const int* psum = psum0 + mad24(iy1, sumstep, ix1);
#endif
#if NODE_COUNT == 1
for( i = ntrees0; i < ntrees1; i++ )
{
float4 st = stump[i].st;
__global const OptHaarFeature* f = optfeatures + as_int(st.x);
float4 weight = f->weight;
int4 ofs = f->ofs[0];
float sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x;
ofs = f->ofs[1];
sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.y;
//if( weight.z > 0 )
{
ofs = f->ofs[2];
sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.z;
}
partsum += (sval < st.y*nf) ? st.z : st.w;
}
#else
for( i = ntrees0; i < ntrees1; i++ )
{
int idx = 0;
do
{
int4 n = node[i*2 + idx].n;
__global const OptHaarFeature* f = optfeatures + n.x;
float4 weight = f->weight;
int4 ofs = f->ofs[0];
float sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x;
ofs = f->ofs[1];
sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.y;
if( weight.z > 0 )
{
ofs = f->ofs[2];
sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.z;
}
idx = (sval < as_float(n.y)*nf) ? n.z : n.w;
}
while(idx > 0);
partsum += leaves[i*3-idx];
}
#endif
}
}
lpartsum[lidx] = partsum;
barrier(CLK_LOCAL_MEM_FENCE);
if( partidx == 0 )
{
float s = lpartsum[nr*nparts];
for( i = 1; i < nparts; i++ )
s += lpartsum[i + nr*nparts];
if( s >= stages[stageIdx].threshold )
{
int count = atomic_inc(lcount);
lbuf[count] = idxval;
lnf[count] = nf;
}
}
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if( stageIdx == nstages ) if( stageIdx == nstages )
{
int nrects = lcount[0];
if( lidx < nrects )
{ {
int nfaces = atomic_inc(facepos); int nfaces = atomic_inc(facepos);
if( nfaces < maxFaces ) if( nfaces < maxFaces )
{ {
volatile __global int* face = facepos + 1 + nfaces*4; volatile __global int* face = facepos + 1 + nfaces*3;
face[0] = convert_int_rte(ix*factor); int val = lbuf[lidx];
face[1] = convert_int_rte(iy*factor); face[0] = scaleIdx;
face[2] = convert_int_rte(windowsize.x*factor); face[1] = ix0 + (val & 255);
face[3] = convert_int_rte(windowsize.y*factor); face[2] = iy0 + (val >> 8);
}
}
} }
} }
} }
} }
#undef CALC_SUM_OFS_
#define CALC_SUM_OFS_(p0, p1, p2, p3, ptr) \
((ptr)[p0] - (ptr)[p1] - (ptr)[p2] + (ptr)[p3])
__kernel void runLBPClassifierStump( __kernel void runLBPClassifierStumpSimple(
int nscales, __global const ScaleData* scaleData,
__global const int* sum, __global const int* sum,
int sumstep, int sumoffset, int _sumstep, int sumoffset,
__global const OptLBPFeature* optfeatures, __global const OptLBPFeature* optfeatures,
int nstages, int splitstage, int nstages,
__global const Stage* stages, __global const Stage* stages,
__global const Stump* stumps, __global const Stump* stumps,
__global const int* bitsets, __global const int* bitsets,
int bitsetSize, int bitsetSize,
volatile __global int* facepos, volatile __global int* facepos,
int2 imgsize, int xyscale, float factor,
int2 windowsize, int maxFaces) int2 windowsize, int maxFaces)
{ {
int ix = get_global_id(0)*xyscale; int lx = get_local_id(0);
int iy = get_global_id(1)*xyscale; int ly = get_local_id(1);
sumstep /= sizeof(int); int local_size_x = get_local_size(0);
int local_size_y = get_local_size(1);
int groupIdx = get_group_id(1)*get_num_groups(0) + get_group_id(0);
int ngroups = get_num_groups(0)*get_num_groups(1);
int scaleIdx, tileIdx, stageIdx;
int startStage = 0, endStage = nstages;
int sumstep = (int)(_sumstep/sizeof(int));
if( ix < imgsize.x && iy < imgsize.y ) for( scaleIdx = nscales-1; scaleIdx >= 0; scaleIdx-- )
{ {
int stageIdx; __global const ScaleData* s = scaleData + scaleIdx;
int ystep = s->ystep;
int2 worksize = (int2)(max(s->szi_width - windowsize.x, 0), max(s->szi_height - windowsize.y, 0));
int2 ntiles = (int2)((worksize.x/ystep + local_size_x-1)/local_size_x,
(worksize.y/ystep + local_size_y-1)/local_size_y);
int totalTiles = ntiles.x*ntiles.y;
for( tileIdx = groupIdx; tileIdx < totalTiles; tileIdx += ngroups )
{
int iy = ((tileIdx / ntiles.x)*local_size_y + ly)*ystep;
int ix = ((tileIdx % ntiles.x)*local_size_x + lx)*ystep;
if( ix < worksize.x && iy < worksize.y )
{
__global const int* p = sum + mad24(iy, sumstep, ix) + s->layer_ofs;
__global const Stump* stump = stumps; __global const Stump* stump = stumps;
__global const int* p = sum + mad24(iy, sumstep, ix); __global const int* bitset = bitsets;
for( stageIdx = 0; stageIdx < nstages; stageIdx++ ) for( stageIdx = 0; stageIdx < endStage; stageIdx++ )
{ {
int i, ntrees = stages[stageIdx].ntrees; int i, ntrees = stages[stageIdx].ntrees;
float s = 0.f; float s = 0.f;
for( i = 0; i < ntrees; i++, stump++, bitsets += bitsetSize ) for( i = 0; i < ntrees; i++, stump++, bitset += bitsetSize )
{ {
float4 st = stump->st; float4 st = stump->st;
__global const OptLBPFeature* f = optfeatures + as_int(st.x); __global const OptLBPFeature* f = optfeatures + as_int(st.x);
int16 ofs = f->ofs; int16 ofs = f->ofs;
#define CALC_SUM_OFS_(p0, p1, p2, p3, ptr) \ int cval = CALC_SUM_OFS_( ofs.s5, ofs.s6, ofs.s9, ofs.sa, p );
((ptr)[p0] - (ptr)[p1] - (ptr)[p2] + (ptr)[p3])
int mask, idx = (CALC_SUM_OFS_( ofs.s0, ofs.s1, ofs.s4, ofs.s5, p ) >= cval ? 4 : 0); // 0
idx |= (CALC_SUM_OFS_( ofs.s1, ofs.s2, ofs.s5, ofs.s6, p ) >= cval ? 2 : 0); // 1
idx |= (CALC_SUM_OFS_( ofs.s2, ofs.s3, ofs.s6, ofs.s7, p ) >= cval ? 1 : 0); // 2
mask = (CALC_SUM_OFS_( ofs.s6, ofs.s7, ofs.sa, ofs.sb, p ) >= cval ? 16 : 0); // 5
mask |= (CALC_SUM_OFS_( ofs.sa, ofs.sb, ofs.se, ofs.sf, p ) >= cval ? 8 : 0); // 8
mask |= (CALC_SUM_OFS_( ofs.s9, ofs.sa, ofs.sd, ofs.se, p ) >= cval ? 4 : 0); // 7
mask |= (CALC_SUM_OFS_( ofs.s8, ofs.s9, ofs.sc, ofs.sd, p ) >= cval ? 2 : 0); // 6
mask |= (CALC_SUM_OFS_( ofs.s4, ofs.s5, ofs.s8, ofs.s9, p ) >= cval ? 1 : 0); // 7
s += (bitset[idx] & (1 << mask)) ? st.z : st.w;
}
if( s < stages[stageIdx].threshold )
break;
}
if( stageIdx == nstages )
{
int nfaces = atomic_inc(facepos);
if( nfaces < maxFaces )
{
volatile __global int* face = facepos + 1 + nfaces*3;
face[0] = scaleIdx;
face[1] = ix;
face[2] = iy;
}
}
}
}
}
}
__kernel __attribute__((reqd_work_group_size(LOCAL_SIZE_X,LOCAL_SIZE_Y,1)))
void runLBPClassifierStump(
int nscales, __global const ScaleData* scaleData,
__global const int* sum,
int _sumstep, int sumoffset,
__global const OptLBPFeature* optfeatures,
int splitstage, int nstages,
__global const Stage* stages,
__global const Stump* stumps,
__global const int* bitsets,
int bitsetSize,
volatile __global int* facepos,
int2 windowsize, int maxFaces)
{
int lx = get_local_id(0);
int ly = get_local_id(1);
int groupIdx = get_group_id(0);
int i, ngroups = get_global_size(0)/LOCAL_SIZE_X;
int scaleIdx, tileIdx, stageIdx;
int sumstep = (int)(_sumstep/sizeof(int));
int lidx = ly*LOCAL_SIZE_X + lx;
#define LOCAL_SIZE (LOCAL_SIZE_X*LOCAL_SIZE_Y)
__local int lstore[SUM_BUF_SIZE + LOCAL_SIZE*3/2+1];
#if SUM_BUF_SIZE > 0
__local int* ibuf = lstore;
__local int* lcount = ibuf + SUM_BUF_SIZE;
#else
__local int* lcount = lstore;
#endif
__local float* lpartsum = (__local float*)(lcount + 1);
__local short* lbuf = (__local short*)(lpartsum + LOCAL_SIZE);
for( scaleIdx = nscales-1; scaleIdx >= 0; scaleIdx-- )
{
__global const ScaleData* s = scaleData + scaleIdx;
int ystep = s->ystep;
int2 worksize = (int2)(max(s->szi_width - windowsize.x, 0), max(s->szi_height - windowsize.y, 0));
int2 ntiles = (int2)((worksize.x + LOCAL_SIZE_X-1)/LOCAL_SIZE_X,
(worksize.y + LOCAL_SIZE_Y-1)/LOCAL_SIZE_Y);
int totalTiles = ntiles.x*ntiles.y;
for( tileIdx = groupIdx; tileIdx < totalTiles; tileIdx += ngroups )
{
int ix0 = (tileIdx % ntiles.x)*LOCAL_SIZE_X;
int iy0 = (tileIdx / ntiles.x)*LOCAL_SIZE_Y;
int ix = lx, iy = ly;
__global const int* psum0 = sum + mad24(iy0, sumstep, ix0) + s->layer_ofs;
if( ix0 >= worksize.x || iy0 >= worksize.y )
continue;
#if SUM_BUF_SIZE > 0
for( i = lidx*4; i < SUM_BUF_SIZE; i += LOCAL_SIZE_X*LOCAL_SIZE_Y*4 )
{
int dy = i/SUM_BUF_STEP, dx = i - dy*SUM_BUF_STEP;
vstore4(vload4(0, psum0 + mad24(dy, sumstep, dx)), 0, ibuf+i);
}
barrier(CLK_LOCAL_MEM_FENCE);
#endif
if( lidx == 0 )
lcount[0] = 0;
barrier(CLK_LOCAL_MEM_FENCE);
if( ix0 + ix < worksize.x && iy0 + iy < worksize.y )
{
__global const Stump* stump = stumps;
__global const int* bitset = bitsets;
#if SUM_BUF_SIZE > 0
__local const int* p = ibuf + mad24(iy, SUM_BUF_STEP, ix);
#else
__global const int* p = psum0 + mad24(iy, sumstep, ix);
#endif
for( stageIdx = 0; stageIdx < splitstage; stageIdx++ )
{
int ntrees = stages[stageIdx].ntrees;
float s = 0.f;
for( i = 0; i < ntrees; i++, stump++, bitset += bitsetSize )
{
float4 st = stump->st;
__global const OptLBPFeature* f = optfeatures + as_int(st.x);
int16 ofs = f->ofs;
int cval = CALC_SUM_OFS_( ofs.s5, ofs.s6, ofs.s9, ofs.sa, p ); int cval = CALC_SUM_OFS_( ofs.s5, ofs.s6, ofs.s9, ofs.sa, p );
...@@ -156,23 +547,113 @@ __kernel void runLBPClassifierStump( ...@@ -156,23 +547,113 @@ __kernel void runLBPClassifierStump(
mask |= (CALC_SUM_OFS_( ofs.s8, ofs.s9, ofs.sc, ofs.sd, p ) >= cval ? 2 : 0); // 6 mask |= (CALC_SUM_OFS_( ofs.s8, ofs.s9, ofs.sc, ofs.sd, p ) >= cval ? 2 : 0); // 6
mask |= (CALC_SUM_OFS_( ofs.s4, ofs.s5, ofs.s8, ofs.s9, p ) >= cval ? 1 : 0); // 7 mask |= (CALC_SUM_OFS_( ofs.s4, ofs.s5, ofs.s8, ofs.s9, p ) >= cval ? 1 : 0); // 7
s += (bitsets[idx] & (1 << mask)) ? st.z : st.w; s += (bitset[idx] & (1 << mask)) ? st.z : st.w;
} }
if( s < stages[stageIdx].threshold ) if( s < stages[stageIdx].threshold )
break; break;
} }
if( stageIdx == splitstage && (ystep == 1 || ((ix | iy) & 1) == 0) )
{
int count = atomic_inc(lcount);
lbuf[count] = (int)(ix | (iy << 8));
}
}
for( stageIdx = splitstage; stageIdx < nstages; stageIdx++ )
{
int nrects = lcount[0];
barrier(CLK_LOCAL_MEM_FENCE);
if( nrects == 0 )
break;
if( lidx == 0 )
lcount[0] = 0;
{
__global const Stump* stump = stumps + stages[stageIdx].first;
__global const int* bitset = bitsets + stages[stageIdx].first*bitsetSize;
int nparts = LOCAL_SIZE / nrects;
int ntrees = stages[stageIdx].ntrees;
int ntrees_p = (ntrees + nparts - 1)/nparts;
int nr = lidx / nparts;
int partidx = -1, idxval = 0;
float partsum = 0.f, nf = 0.f;
if( nr < nrects )
{
partidx = lidx % nparts;
idxval = lbuf[nr];
{
int ntrees0 = ntrees_p*partidx;
int ntrees1 = min(ntrees0 + ntrees_p, ntrees);
int ix1 = idxval & 255, iy1 = idxval >> 8;
#if SUM_BUF_SIZE > 0
__local const int* p = ibuf + mad24(iy1, SUM_BUF_STEP, ix1);
#else
__global const int* p = psum0 + mad24(iy1, sumstep, ix1);
#endif
for( i = ntrees0; i < ntrees1; i++ )
{
float4 st = stump[i].st;
__global const OptLBPFeature* f = optfeatures + as_int(st.x);
int16 ofs = f->ofs;
#define CALC_SUM_OFS_(p0, p1, p2, p3, ptr) \
((ptr)[p0] - (ptr)[p1] - (ptr)[p2] + (ptr)[p3])
int cval = CALC_SUM_OFS_( ofs.s5, ofs.s6, ofs.s9, ofs.sa, p );
int mask, idx = (CALC_SUM_OFS_( ofs.s0, ofs.s1, ofs.s4, ofs.s5, p ) >= cval ? 4 : 0); // 0
idx |= (CALC_SUM_OFS_( ofs.s1, ofs.s2, ofs.s5, ofs.s6, p ) >= cval ? 2 : 0); // 1
idx |= (CALC_SUM_OFS_( ofs.s2, ofs.s3, ofs.s6, ofs.s7, p ) >= cval ? 1 : 0); // 2
mask = (CALC_SUM_OFS_( ofs.s6, ofs.s7, ofs.sa, ofs.sb, p ) >= cval ? 16 : 0); // 5
mask |= (CALC_SUM_OFS_( ofs.sa, ofs.sb, ofs.se, ofs.sf, p ) >= cval ? 8 : 0); // 8
mask |= (CALC_SUM_OFS_( ofs.s9, ofs.sa, ofs.sd, ofs.se, p ) >= cval ? 4 : 0); // 7
mask |= (CALC_SUM_OFS_( ofs.s8, ofs.s9, ofs.sc, ofs.sd, p ) >= cval ? 2 : 0); // 6
mask |= (CALC_SUM_OFS_( ofs.s4, ofs.s5, ofs.s8, ofs.s9, p ) >= cval ? 1 : 0); // 7
partsum += (bitset[i*bitsetSize + idx] & (1 << mask)) ? st.z : st.w;
}
}
}
lpartsum[lidx] = partsum;
barrier(CLK_LOCAL_MEM_FENCE);
if( partidx == 0 )
{
float s = lpartsum[nr*nparts];
for( i = 1; i < nparts; i++ )
s += lpartsum[i + nr*nparts];
if( s >= stages[stageIdx].threshold )
{
int count = atomic_inc(lcount);
lbuf[count] = idxval;
}
}
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if( stageIdx == nstages ) if( stageIdx == nstages )
{
int nrects = lcount[0];
if( lidx < nrects )
{ {
int nfaces = atomic_inc(facepos); int nfaces = atomic_inc(facepos);
if( nfaces < maxFaces ) if( nfaces < maxFaces )
{ {
volatile __global int* face = facepos + 1 + nfaces*4; volatile __global int* face = facepos + 1 + nfaces*3;
face[0] = convert_int_rte(ix*factor); int val = lbuf[lidx];
face[1] = convert_int_rte(iy*factor); face[0] = scaleIdx;
face[2] = convert_int_rte(windowsize.x*factor); face[1] = ix0 + (val & 255);
face[3] = convert_int_rte(windowsize.y*factor); face[2] = iy0 + (val >> 8);
}
}
} }
} }
} }
......
...@@ -257,6 +257,7 @@ int CV_DetectorTest::runTestCase( int detectorIdx, vector<vector<Rect> >& object ...@@ -257,6 +257,7 @@ int CV_DetectorTest::runTestCase( int detectorIdx, vector<vector<Rect> >& object
string dataPath = ts->get_data_path(), detectorFilename; string dataPath = ts->get_data_path(), detectorFilename;
if( !detectorFilenames[detectorIdx].empty() ) if( !detectorFilenames[detectorIdx].empty() )
detectorFilename = dataPath + detectorFilenames[detectorIdx]; detectorFilename = dataPath + detectorFilenames[detectorIdx];
printf("detector %s\n", detectorFilename.c_str());
for( int ii = 0; ii < (int)imageFilenames.size(); ++ii ) for( int ii = 0; ii < (int)imageFilenames.size(); ++ii )
{ {
......
...@@ -231,9 +231,14 @@ void detectAndDraw( UMat& img, Mat& canvas, CascadeClassifier& cascade, ...@@ -231,9 +231,14 @@ void detectAndDraw( UMat& img, Mat& canvas, CascadeClassifier& cascade,
smallImg.copyTo(canvas); smallImg.copyTo(canvas);
double fps = getTickFrequency()/t; double fps = getTickFrequency()/t;
static double avgfps = 0;
static int nframes = 0;
nframes++;
double alpha = nframes > 50 ? 0.01 : 1./nframes;
avgfps = avgfps*(1-alpha) + fps*alpha;
putText(canvas, format("OpenCL: %s, fps: %.1f", ocl::useOpenCL() ? "ON" : "OFF", fps), Point(250, 50), putText(canvas, format("OpenCL: %s, fps: %.1f", ocl::useOpenCL() ? "ON" : "OFF", avgfps), Point(50, 30),
FONT_HERSHEY_SIMPLEX, 1, Scalar(0,255,0), 3); FONT_HERSHEY_SIMPLEX, 0.8, Scalar(0,255,0), 2);
for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ ) for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ )
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment