Commit a1784b73 authored by Vadim Pisarevsky's avatar Vadim Pisarevsky

converted old haar cascades to the new format; added the conversion function;…

converted old haar cascades to the new format; added the conversion function; added OpenCL optimization into CascadeClassfier; optimized the data structures and CPU code for the stump case.
parents 2431c72d ec3f22ce
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -426,6 +426,61 @@ public:
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); set(i, a11); return *this;
}
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
const _Tp12& a12)
{
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
set(i, a12); return *this;
}
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12,
typename _Tp13>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
const _Tp12& a12, const _Tp13& a13)
{
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
i = set(i, a12); set(i, a13); return *this;
}
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12,
typename _Tp13, typename _Tp14>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
const _Tp12& a12, const _Tp13& a13, const _Tp14& a14)
{
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
i = set(i, a12); i = set(i, a13); set(i, a14); return *this;
}
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12,
typename _Tp13, typename _Tp14, typename _Tp15>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
const _Tp12& a12, const _Tp13& a13, const _Tp14& a14, const _Tp15& a15)
{
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
i = set(i, a12); i = set(i, a13); i = set(i, a14); set(i, a15); return *this;
}
bool run(int dims, size_t globalsize[],
size_t localsize[], bool sync, const Queue& q=Queue());
......
......@@ -1075,6 +1075,11 @@ CV_EXPORTS_W void boxFilter( InputArray src, OutputArray dst, int ddepth,
Size ksize, Point anchor = Point(-1,-1),
bool normalize = true,
int borderType = BORDER_DEFAULT );
CV_EXPORTS_W void sqrBoxFilter( InputArray _src, OutputArray _dst, int ddepth,
Size ksize, Point anchor = Point(-1, -1),
bool normalize = true,
int borderType = BORDER_DEFAULT );
//! a synonym for normalized box filter
CV_EXPORTS_W void blur( InputArray src, OutputArray dst,
......
......@@ -895,6 +895,114 @@ void cv::blur( InputArray src, OutputArray dst,
boxFilter( src, dst, -1, ksize, anchor, true, borderType );
}
/****************************************************************************************\
Squared Box Filter
\****************************************************************************************/
namespace cv
{
template<typename T, typename ST> struct SqrRowSum : public BaseRowFilter
{
SqrRowSum( int _ksize, int _anchor )
{
ksize = _ksize;
anchor = _anchor;
}
void operator()(const uchar* src, uchar* dst, int width, int cn)
{
const T* S = (const T*)src;
ST* D = (ST*)dst;
int i = 0, k, ksz_cn = ksize*cn;
width = (width - 1)*cn;
for( k = 0; k < cn; k++, S++, D++ )
{
ST s = 0;
for( i = 0; i < ksz_cn; i += cn )
{
ST val = (ST)S[i];
s += val*val;
}
D[0] = s;
for( i = 0; i < width; i += cn )
{
ST val0 = (ST)S[i], val1 = (ST)S[i + ksz_cn];
s += val1*val1 - val0*val0;
D[i+cn] = s;
}
}
}
};
static Ptr<BaseRowFilter> getSqrRowSumFilter(int srcType, int sumType, int ksize, int anchor)
{
int sdepth = CV_MAT_DEPTH(srcType), ddepth = CV_MAT_DEPTH(sumType);
CV_Assert( CV_MAT_CN(sumType) == CV_MAT_CN(srcType) );
if( anchor < 0 )
anchor = ksize/2;
if( sdepth == CV_8U && ddepth == CV_32S )
return makePtr<SqrRowSum<uchar, int> >(ksize, anchor);
if( sdepth == CV_8U && ddepth == CV_64F )
return makePtr<SqrRowSum<uchar, double> >(ksize, anchor);
if( sdepth == CV_16U && ddepth == CV_64F )
return makePtr<SqrRowSum<ushort, double> >(ksize, anchor);
if( sdepth == CV_16S && ddepth == CV_64F )
return makePtr<SqrRowSum<short, double> >(ksize, anchor);
if( sdepth == CV_32F && ddepth == CV_64F )
return makePtr<SqrRowSum<float, double> >(ksize, anchor);
if( sdepth == CV_64F && ddepth == CV_64F )
return makePtr<SqrRowSum<double, double> >(ksize, anchor);
CV_Error_( CV_StsNotImplemented,
("Unsupported combination of source format (=%d), and buffer format (=%d)",
srcType, sumType));
return Ptr<BaseRowFilter>();
}
}
void cv::sqrBoxFilter( InputArray _src, OutputArray _dst, int ddepth,
Size ksize, Point anchor,
bool normalize, int borderType )
{
Mat src = _src.getMat();
int sdepth = src.depth(), cn = src.channels();
if( ddepth < 0 )
ddepth = sdepth < CV_32F ? CV_32F : CV_64F;
_dst.create( src.size(), CV_MAKETYPE(ddepth, cn) );
Mat dst = _dst.getMat();
if( borderType != BORDER_CONSTANT && normalize )
{
if( src.rows == 1 )
ksize.height = 1;
if( src.cols == 1 )
ksize.width = 1;
}
int sumType = CV_64F;
if( sdepth == CV_8U )
sumType = CV_32S;
sumType = CV_MAKETYPE( sumType, cn );
int srcType = CV_MAKETYPE(sdepth, cn);
int dstType = CV_MAKETYPE(ddepth, cn);
Ptr<BaseRowFilter> rowFilter = getSqrRowSumFilter(srcType, sumType, ksize.width, anchor.x );
Ptr<BaseColumnFilter> columnFilter = getColumnSumFilter(sumType,
dstType, ksize.height, anchor.y,
normalize ? 1./(ksize.width*ksize.height) : 1);
Ptr<FilterEngine> f = makePtr<FilterEngine>(Ptr<BaseFilter>(), rowFilter, columnFilter,
srcType, dstType, sumType, borderType );
f->apply( src, dst );
}
/****************************************************************************************\
Gaussian Blur
\****************************************************************************************/
......
......@@ -111,12 +111,15 @@ public:
};
CV_EXPORTS void groupRectangles(std::vector<Rect>& rectList, int groupThreshold, double eps = 0.2);
CV_EXPORTS_W void groupRectangles(CV_IN_OUT std::vector<Rect>& rectList, CV_OUT std::vector<int>& weights, int groupThreshold, double eps = 0.2);
CV_EXPORTS void groupRectangles(std::vector<Rect>& rectList, int groupThreshold, double eps, std::vector<int>* weights, std::vector<double>* levelWeights );
CV_EXPORTS_W void groupRectangles(CV_IN_OUT std::vector<Rect>& rectList, CV_OUT std::vector<int>& weights,
int groupThreshold, double eps = 0.2);
CV_EXPORTS void groupRectangles(std::vector<Rect>& rectList, int groupThreshold,
double eps, std::vector<int>* weights, std::vector<double>* levelWeights );
CV_EXPORTS void groupRectangles(std::vector<Rect>& rectList, std::vector<int>& rejectLevels,
std::vector<double>& levelWeights, int groupThreshold, double eps = 0.2);
CV_EXPORTS void groupRectangles_meanshift(std::vector<Rect>& rectList, std::vector<double>& foundWeights, std::vector<double>& foundScales,
double detectThreshold = 0.0, Size winDetSize = Size(64, 128));
CV_EXPORTS void groupRectangles_meanshift(std::vector<Rect>& rectList, std::vector<double>& foundWeights,
std::vector<double>& foundScales,
double detectThreshold = 0.0, Size winDetSize = Size(64, 128));
class CV_EXPORTS FeatureEvaluator
{
......@@ -132,7 +135,7 @@ public:
virtual Ptr<FeatureEvaluator> clone() const;
virtual int getFeatureType() const;
virtual bool setImage(const Mat& img, Size origWinSize);
virtual bool setImage(InputArray img, Size origWinSize, Size sumSize);
virtual bool setWindow(Point p);
virtual double calcOrd(int featureIdx) const;
......@@ -232,6 +235,8 @@ public:
CV_WRAP int getFeatureType() const;
void* getOldCascade();
CV_WRAP static bool convert(const String& oldcascade, const String& newcascade);
void setMaskGenerator(const Ptr<BaseCascadeClassifier::MaskGenerator>& maskGenerator);
Ptr<BaseCascadeClassifier::MaskGenerator> getMaskGenerator();
......
This diff is collapsed.
This diff is collapsed.
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, Itseez Inc, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
/* Haar features calculation */
#include "precomp.hpp"
#include <stdio.h>
namespace cv
{
/* field names */
#define ICV_HAAR_SIZE_NAME "size"
#define ICV_HAAR_STAGES_NAME "stages"
#define ICV_HAAR_TREES_NAME "trees"
#define ICV_HAAR_FEATURE_NAME "feature"
#define ICV_HAAR_RECTS_NAME "rects"
#define ICV_HAAR_TILTED_NAME "tilted"
#define ICV_HAAR_THRESHOLD_NAME "threshold"
#define ICV_HAAR_LEFT_NODE_NAME "left_node"
#define ICV_HAAR_LEFT_VAL_NAME "left_val"
#define ICV_HAAR_RIGHT_NODE_NAME "right_node"
#define ICV_HAAR_RIGHT_VAL_NAME "right_val"
#define ICV_HAAR_STAGE_THRESHOLD_NAME "stage_threshold"
#define ICV_HAAR_PARENT_NAME "parent"
#define ICV_HAAR_NEXT_NAME "next"
namespace haar_cvt
{
struct HaarFeature
{
enum { RECT_NUM = 3 };
HaarFeature()
{
tilted = false;
for( int i = 0; i < RECT_NUM; i++ )
{
rect[i].r = Rect(0,0,0,0);
rect[i].weight = 0.f;
}
}
bool tilted;
struct
{
Rect r;
float weight;
} rect[RECT_NUM];
};
struct HaarClassifierNode
{
HaarClassifierNode()
{
f = left = right = 0;
threshold = 0.f;
}
int f, left, right;
float threshold;
};
struct HaarClassifier
{
std::vector<HaarClassifierNode> nodes;
std::vector<float> leaves;
};
struct HaarStageClassifier
{
double threshold;
std::vector<HaarClassifier> weaks;
};
static bool convert(const String& oldcascade, const String& newcascade)
{
FileStorage oldfs(oldcascade, FileStorage::READ);
if( !oldfs.isOpened() )
return false;
FileNode oldroot = oldfs.getFirstTopLevelNode();
FileNode sznode = oldroot[ICV_HAAR_SIZE_NAME];
if( sznode.empty() )
return false;
int maxdepth = 0;
Size cascadesize;
cascadesize.width = (int)sznode[0];
cascadesize.height = (int)sznode[1];
std::vector<HaarFeature> features;
size_t i, j, k, n;
FileNode stages_seq = oldroot[ICV_HAAR_STAGES_NAME];
size_t nstages = stages_seq.size();
std::vector<HaarStageClassifier> stages(nstages);
for( i = 0; i < nstages; i++ )
{
FileNode stagenode = stages_seq[i];
HaarStageClassifier& stage = stages[i];
stage.threshold = (double)stagenode[ICV_HAAR_STAGE_THRESHOLD_NAME];
FileNode weaks_seq = stagenode[ICV_HAAR_TREES_NAME];
size_t nweaks = weaks_seq.size();
stage.weaks.resize(nweaks);
for( j = 0; j < nweaks; j++ )
{
HaarClassifier& weak = stage.weaks[j];
FileNode weaknode = weaks_seq[j];
size_t nnodes = weaknode.size();
for( n = 0; n < nnodes; n++ )
{
FileNode nnode = weaknode[n];
FileNode fnode = nnode[ICV_HAAR_FEATURE_NAME];
HaarFeature f;
HaarClassifierNode node;
node.f = (int)features.size();
f.tilted = (int)fnode[ICV_HAAR_TILTED_NAME] != 0;
FileNode rects_seq = fnode[ICV_HAAR_RECTS_NAME];
size_t nrects = rects_seq.size();
for( k = 0; k < nrects; k++ )
{
FileNode rnode = rects_seq[k];
f.rect[k].r.x = (int)rnode[0];
f.rect[k].r.y = (int)rnode[1];
f.rect[k].r.width = (int)rnode[2];
f.rect[k].r.height = (int)rnode[3];
f.rect[k].weight = (float)rnode[4];
}
features.push_back(f);
node.threshold = nnode[ICV_HAAR_THRESHOLD_NAME];
FileNode leftValNode = nnode[ICV_HAAR_LEFT_VAL_NAME];
if( !leftValNode.empty() )
{
node.left = -(int)weak.leaves.size();
weak.leaves.push_back((float)leftValNode);
}
else
{
node.left = (int)nnode[ICV_HAAR_LEFT_NODE_NAME];
}
FileNode rightValNode = nnode[ICV_HAAR_RIGHT_VAL_NAME];
if( !rightValNode.empty() )
{
node.right = -(int)weak.leaves.size();
weak.leaves.push_back((float)rightValNode);
}
else
{
node.right = (int)nnode[ICV_HAAR_RIGHT_NODE_NAME];
}
weak.nodes.push_back(node);
}
}
}
FileStorage newfs(newcascade, FileStorage::WRITE);
if( !newfs.isOpened() )
return false;
size_t maxWeakCount = 0, nfeatures = features.size();
for( i = 0; i < nstages; i++ )
maxWeakCount = std::max(maxWeakCount, stages[i].weaks.size());
newfs << "cascade" << "{:opencv-cascade-classifier"
<< "stageType" << "BOOST"
<< "featureType" << "HAAR"
<< "height" << cascadesize.width
<< "width" << cascadesize.height
<< "stageParams" << "{"
<< "maxWeakCount" << (int)maxWeakCount
<< "}"
<< "featureParams" << "{"
<< "maxCatCount" << 0
<< "}"
<< "stageNum" << (int)nstages
<< "stages" << "[";
for( i = 0; i < nstages; i++ )
{
size_t nweaks = stages[i].weaks.size();
newfs << "{" << "maxWeakCount" << (int)nweaks
<< "stageThreshold" << stages[i].threshold
<< "weakClassifiers" << "[";
for( j = 0; j < nweaks; j++ )
{
const HaarClassifier& c = stages[i].weaks[j];
newfs << "{" << "internalNodes" << "[";
size_t nnodes = c.nodes.size(), nleaves = c.leaves.size();
for( k = 0; k < nnodes; k++ )
newfs << c.nodes[k].left << c.nodes[k].right
<< c.nodes[k].f << c.nodes[k].threshold;
newfs << "]" << "leafValues" << "[";
for( k = 0; k < nleaves; k++ )
newfs << c.leaves[k];
newfs << "]" << "}";
}
newfs << "]" << "}";
}
newfs << "]"
<< "features" << "[";
for( i = 0; i < nfeatures; i++ )
{
const HaarFeature& f = features[i];
newfs << "{" << "rects" << "[";
for( j = 0; j < (size_t)HaarFeature::RECT_NUM; j++ )
{
if( j >= 2 && fabs(f.rect[j].weight) < FLT_EPSILON )
break;
newfs << "[" << f.rect[j].r.x << f.rect[j].r.y <<
f.rect[j].r.width << f.rect[j].r.height << f.rect[j].weight << "]";
}
newfs << "]";
if( f.tilted )
newfs << "tilted" << 1;
newfs << "}";
}
newfs << "]" << "}";
return true;
}
}
bool CascadeClassifier::convert(const String& oldcascade, const String& newcascade)
{
bool ok = haar_cvt::convert(oldcascade, newcascade);
if( !ok && newcascade.size() > 0 )
remove(newcascade.c_str());
return ok;
}
}
///////////////////////////// OpenCL kernels for face detection //////////////////////////////
////////////////////////////// see the opencv/doc/license.txt ///////////////////////////////
typedef struct __attribute__((aligned(4))) OptFeature
{
int4 ofs[3] __attribute__((aligned (4)));
float4 weight __attribute__((aligned (4)));
}
OptFeature;
typedef struct __attribute__((aligned(4))) Stump
{
int featureIdx __attribute__((aligned (4)));
float threshold __attribute__((aligned (4))); // for ordered features only
float left __attribute__((aligned (4)));
float right __attribute__((aligned (4)));
}
Stump;
typedef struct __attribute__((aligned (4))) Stage
{
int first __attribute__((aligned (4)));
int ntrees __attribute__((aligned (4)));
float threshold __attribute__((aligned (4)));
}
Stage;
__kernel void runHaarClassifierStump(
__global const int* sum,
int sumstep, int sumoffset,
__global const int* sqsum,
int sqsumstep, int sqsumoffset,
__global const OptFeature* optfeatures,
int nstages,
__global const Stage* stages,
__global const Stump* stumps,
volatile __global int* facepos,
int2 imgsize, int xyscale, float factor,
int4 normrect, int2 windowsize, int maxFaces)
{
int ix = get_global_id(0)*xyscale;
int iy = get_global_id(1)*xyscale;
sumstep /= sizeof(int);
sqsumstep /= sizeof(int);
if( ix < imgsize.x && iy < imgsize.y )
{
int ntrees;
int stageIdx, i;
float s = 0.f;
__global const Stump* stump = stumps;
__global const OptFeature* f;
__global const int* psum = sum + mad24(iy, sumstep, ix);
__global const int* pnsum = psum + mad24(normrect.y, sumstep, normrect.x);
int normarea = normrect.z * normrect.w;
float invarea = 1.f/normarea;
float sval = (pnsum[0] - pnsum[normrect.z] - pnsum[mul24(normrect.w, sumstep)] +
pnsum[mad24(normrect.w, sumstep, normrect.z)])*invarea;
float sqval = (sqsum[mad24(iy + normrect.y, sqsumstep, ix + normrect.x)])*invarea;
float nf = (float)normarea * sqrt(max(sqval - sval * sval, 0.f));
float4 weight, vsval;
int4 ofs, ofs0, ofs1, ofs2;
nf = nf > 0 ? nf : 1.f;
for( stageIdx = 0; stageIdx < nstages; stageIdx++ )
{
ntrees = stages[stageIdx].ntrees;
s = 0.f;
for( i = 0; i < ntrees; i++, stump++ )
{
f = optfeatures + stump->featureIdx;
weight = f->weight;
ofs = f->ofs[0];
sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x;
ofs = f->ofs[1];
sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.y;
if( weight.z > 0 )
{
ofs = f->ofs[2];
sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.z;
}
s += (sval < stump->threshold*nf) ? stump->left : stump->right;
}
if( s < stages[stageIdx].threshold )
break;
}
if( stageIdx == nstages )
{
int nfaces = atomic_inc(facepos);
if( nfaces < maxFaces )
{
volatile __global int* face = facepos + 1 + nfaces*4;
face[0] = convert_int_rte(ix*factor);
face[1] = convert_int_rte(iy*factor);
face[2] = convert_int_rte(windowsize.x*factor);
face[3] = convert_int_rte(windowsize.y*factor);
}
}
}
}
#if 0
__kernel void runLBPClassifierStump(
__global const int* sum,
int sumstep, int sumoffset,
__global const int* sqsum,
int sqsumstep, int sqsumoffset,
__global const OptFeature* optfeatures,
int nstages,
__global const Stage* stages,
__global const Stump* stumps,
__global const int* bitsets,
int bitsetSize,
volatile __global int* facepos,
int2 imgsize, int xyscale, float factor,
int4 normrect, int2 windowsize, int maxFaces)
{
int ix = get_global_id(0)*xyscale*VECTOR_SIZE;
int iy = get_global_id(1)*xyscale;
sumstep /= sizeof(int);
sqsumstep /= sizeof(int);
if( ix < imgsize.x && iy < imgsize.y )
{
int ntrees;
int stageIdx, i;
float s = 0.f;
__global const Stump* stump = stumps;
__global const int* bitset = bitsets;
__global const OptFeature* f;
__global const int* psum = sum + mad24(iy, sumstep, ix);
__global const int* pnsum = psum + mad24(normrect.y, sumstep, normrect.x);
int normarea = normrect.z * normrect.w;
float invarea = 1.f/normarea;
float sval = (pnsum[0] - pnsum[normrect.z] - pnsum[mul24(normrect.w, sumstep)] +
pnsum[mad24(normrect.w, sumstep, normrect.z)])*invarea;
float sqval = (sqsum[mad24(iy + normrect.y, sqsumstep, ix + normrect.x)])*invarea;
float nf = (float)normarea * sqrt(max(sqval - sval * sval, 0.f));
float4 weight;
int4 ofs;
nf = nf > 0 ? nf : 1.f;
for( stageIdx = 0; stageIdx < nstages; stageIdx++ )
{
ntrees = stages[stageIdx].ntrees;
s = 0.f;
for( i = 0; i < ntrees; i++, stump++, bitset += bitsetSize )
{
f = optfeatures + stump->featureIdx;
weight = f->weight;
// compute LBP feature to val
s += (bitset[val >> 5] & (1 << (val & 31))) ? stump->left : stump->right;
}
if( s < stages[stageIdx].threshold )
break;
}
if( stageIdx == nstages )
{
int nfaces = atomic_inc(facepos);
if( nfaces < maxFaces )
{
volatile __global int* face = facepos + 1 + nfaces*4;
face[0] = convert_int_rte(ix*factor);
face[1] = convert_int_rte(iy*factor);
face[2] = convert_int_rte(windowsize.x*factor);
face[3] = convert_int_rte(windowsize.y*factor);
}
}
}
}
#endif
......@@ -98,6 +98,8 @@ int main( int argc, const char** argv )
return -1;
}
cout << "old cascade: " << (cascade.isOldFormatCascade() ? "TRUE" : "FALSE") << endl;
if( inputName.empty() || (isdigit(inputName.c_str()[0]) && inputName.c_str()[1] == '\0') )
{
int c = inputName.empty() ? 0 : inputName.c_str()[0] - '0';
......@@ -199,13 +201,12 @@ void detectAndDraw( UMat& img, Mat& canvas, CascadeClassifier& cascade,
t = (double)getTickCount();
cvtColor( img, gray, COLOR_BGR2GRAY );
resize( gray, smallImg, Size(), scale0, scale0, INTER_LINEAR );
cvtColor(smallImg, canvas, COLOR_GRAY2BGR);
equalizeHist( smallImg, smallImg );
resize( img, smallImg, Size(), scale0, scale0, INTER_LINEAR );
cvtColor( smallImg, gray, COLOR_BGR2GRAY );
equalizeHist( gray, gray );
cascade.detectMultiScale( smallImg, faces,
1.1, 2, 0
cascade.detectMultiScale( gray, faces,
1.1, 3, 0
//|CASCADE_FIND_BIGGEST_OBJECT
//|CASCADE_DO_ROUGH_SEARCH
|CASCADE_SCALE_IMAGE
......@@ -213,8 +214,8 @@ void detectAndDraw( UMat& img, Mat& canvas, CascadeClassifier& cascade,
Size(30, 30) );
if( tryflip )
{
flip(smallImg, smallImg, 1);
cascade.detectMultiScale( smallImg, faces2,
flip(gray, gray, 1);
cascade.detectMultiScale( gray, faces2,
1.1, 2, 0
//|CASCADE_FIND_BIGGEST_OBJECT
//|CASCADE_DO_ROUGH_SEARCH
......@@ -227,7 +228,7 @@ void detectAndDraw( UMat& img, Mat& canvas, CascadeClassifier& cascade,
}
}
t = (double)getTickCount() - t;
cvtColor(smallImg, canvas, COLOR_GRAY2BGR);
smallImg.copyTo(canvas);
double fps = getTickFrequency()/t;
......@@ -255,7 +256,7 @@ void detectAndDraw( UMat& img, Mat& canvas, CascadeClassifier& cascade,
color, 3, 8, 0);
if( nestedCascade.empty() )
continue;
UMat smallImgROI = smallImg(*r);
UMat smallImgROI = gray(*r);
nestedCascade.detectMultiScale( smallImgROI, nestedObjects,
1.1, 2, 0
//|CASCADE_FIND_BIGGEST_OBJECT
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment