Commit 15409105 authored by Vadim Pisarevsky's avatar Vadim Pisarevsky

started adding OpenCL acceleration of LBP-based object detectors

parent b4bd5bab
...@@ -654,6 +654,7 @@ bool LBPEvaluator::Feature :: read(const FileNode& node ) ...@@ -654,6 +654,7 @@ bool LBPEvaluator::Feature :: read(const FileNode& node )
LBPEvaluator::LBPEvaluator() LBPEvaluator::LBPEvaluator()
{ {
features = makePtr<std::vector<Feature> >(); features = makePtr<std::vector<Feature> >();
optfeatures = makePtr<std::vector<OptFeature> >();
} }
LBPEvaluator::~LBPEvaluator() LBPEvaluator::~LBPEvaluator()
{ {
...@@ -662,11 +663,12 @@ LBPEvaluator::~LBPEvaluator() ...@@ -662,11 +663,12 @@ LBPEvaluator::~LBPEvaluator()
bool LBPEvaluator::read( const FileNode& node ) bool LBPEvaluator::read( const FileNode& node )
{ {
features->resize(node.size()); features->resize(node.size());
featuresPtr = &(*features)[0]; optfeaturesPtr = &(*optfeatures)[0];
FileNodeIterator it = node.begin(), it_end = node.end(); FileNodeIterator it = node.begin(), it_end = node.end();
std::vector<Feature>& ff = *features;
for(int i = 0; it != it_end; ++it, i++) for(int i = 0; it != it_end; ++it, i++)
{ {
if(!featuresPtr[i].read(*it)) if(!ff[i].read(*it))
return false; return false;
} }
return true; return true;
...@@ -677,31 +679,58 @@ Ptr<FeatureEvaluator> LBPEvaluator::clone() const ...@@ -677,31 +679,58 @@ Ptr<FeatureEvaluator> LBPEvaluator::clone() const
Ptr<LBPEvaluator> ret = makePtr<LBPEvaluator>(); Ptr<LBPEvaluator> ret = makePtr<LBPEvaluator>();
ret->origWinSize = origWinSize; ret->origWinSize = origWinSize;
ret->features = features; ret->features = features;
ret->featuresPtr = &(*ret->features)[0]; ret->optfeatures = optfeatures;
ret->optfeaturesPtr = ret->optfeatures.empty() ? 0 : &(*ret->optfeatures)[0];
ret->sum0 = sum0, ret->sum = sum; ret->sum0 = sum0, ret->sum = sum;
ret->normrect = normrect; ret->pwin = pwin;
ret->offset = offset;
return ret; return ret;
} }
bool LBPEvaluator::setImage( InputArray _image, Size _origWinSize, Size ) bool LBPEvaluator::setImage( InputArray _image, Size _origWinSize, Size _sumSize )
{ {
Mat image = _image.getMat(); Size imgsz = _image.size();
int rn = image.rows+1, cn = image.cols+1; int cols = imgsz.width, rows = imgsz.height;
origWinSize = _origWinSize;
if (imgsz.width < origWinSize.width || imgsz.height < origWinSize.height)
if( image.cols < origWinSize.width || image.rows < origWinSize.height )
return false; return false;
if( sum0.rows < rn || sum0.cols < cn ) origWinSize = _origWinSize;
int rn = _sumSize.height, cn = _sumSize.width;
int sumStep;
CV_Assert(rn >= rows+1 && cn >= cols+1);
if( _image.isUMat() )
{
usum0.create(rn, cn, CV_32S);
usum = UMat(usum0, Rect(0, 0, cols+1, rows+1));
integral(_image, usum, noArray(), noArray(), CV_32S);
sumStep = (int)(usum.step/usum.elemSize());
}
else
{
sum0.create(rn, cn, CV_32S); sum0.create(rn, cn, CV_32S);
sum = Mat(rn, cn, CV_32S, sum0.data); sum = sum0(Rect(0, 0, cols+1, rows+1));
integral(image, sum);
integral(_image, sum, noArray(), noArray(), CV_32S);
sumStep = (int)(sum.step/sum.elemSize());
}
size_t fi, nfeatures = features->size(); size_t fi, nfeatures = features->size();
const std::vector<Feature>& ff = *features;
for( fi = 0; fi < nfeatures; fi++ )
featuresPtr[fi].updatePtrs( sum ); if( sumSize0 != _sumSize )
{
optfeatures->resize(nfeatures);
optfeaturesPtr = &(*optfeatures)[0];
for( fi = 0; fi < nfeatures; fi++ )
optfeaturesPtr[fi].setOffsets( ff[fi], sumStep );
}
if( _image.isUMat() && (sumSize0 != _sumSize || ufbuf.empty()) )
copyVectorToUMat(*optfeatures, ufbuf);
sumSize0 = _sumSize;
return true; return true;
} }
...@@ -711,7 +740,7 @@ bool LBPEvaluator::setWindow( Point pt ) ...@@ -711,7 +740,7 @@ bool LBPEvaluator::setWindow( Point pt )
pt.x + origWinSize.width >= sum.cols || pt.x + origWinSize.width >= sum.cols ||
pt.y + origWinSize.height >= sum.rows ) pt.y + origWinSize.height >= sum.rows )
return false; return false;
offset = pt.y * ((int)sum.step/sizeof(int)) + pt.x; pwin = &sum.at<int>(pt);
return true; return true;
} }
......
...@@ -250,13 +250,11 @@ public: ...@@ -250,13 +250,11 @@ public:
struct Feature struct Feature
{ {
Feature(); Feature();
bool read( const FileNode& node ); bool read( const FileNode& node );
bool tilted; bool tilted;
enum { RECT_NUM = 3 }; enum { RECT_NUM = 3 };
struct struct
{ {
Rect r; Rect r;
...@@ -369,14 +367,20 @@ public: ...@@ -369,14 +367,20 @@ public:
{ {
Feature(); Feature();
Feature( int x, int y, int _block_w, int _block_h ) : Feature( int x, int y, int _block_w, int _block_h ) :
rect(x, y, _block_w, _block_h) {} rect(x, y, _block_w, _block_h) {}
int calc( int offset ) const;
void updatePtrs( const Mat& sum );
bool read(const FileNode& node ); bool read(const FileNode& node );
Rect rect; // weight and height for block Rect rect; // weight and height for block
const int* p[16]; // fast };
struct OptFeature
{
OptFeature();
int calc( const int* pwin ) const;
void setOffsets( const Feature& _f, int step );
int ofs[16];
}; };
LBPEvaluator(); LBPEvaluator();
...@@ -390,53 +394,57 @@ public: ...@@ -390,53 +394,57 @@ public:
virtual bool setWindow(Point pt); virtual bool setWindow(Point pt);
int operator()(int featureIdx) const int operator()(int featureIdx) const
{ return featuresPtr[featureIdx].calc(offset); } { return optfeaturesPtr[featureIdx].calc(pwin); }
virtual int calcCat(int featureIdx) const virtual int calcCat(int featureIdx) const
{ return (*this)(featureIdx); } { return (*this)(featureIdx); }
protected: protected:
Size origWinSize; Size origWinSize, sumSize0;
Ptr<std::vector<Feature> > features; Ptr<std::vector<Feature> > features;
Feature* featuresPtr; // optimization Ptr<std::vector<OptFeature> > optfeatures;
OptFeature* optfeaturesPtr; // optimization
Mat sum0, sum; Mat sum0, sum;
Rect normrect; UMat usum0, usum, ufbuf;
int offset; const int* pwin;
}; };
inline LBPEvaluator::Feature :: Feature() inline LBPEvaluator::Feature :: Feature()
{ {
rect = Rect(); rect = Rect();
}
inline LBPEvaluator::OptFeature :: OptFeature()
{
for( int i = 0; i < 16; i++ ) for( int i = 0; i < 16; i++ )
p[i] = 0; ofs[i] = 0;
} }
inline int LBPEvaluator::Feature :: calc( int _offset ) const inline int LBPEvaluator::OptFeature :: calc( const int* p ) const
{ {
int cval = CALC_SUM_( p[5], p[6], p[9], p[10], _offset ); int cval = CALC_SUM_OFS_( ofs[5], ofs[6], ofs[9], ofs[10], p );
return (CALC_SUM_( p[0], p[1], p[4], p[5], _offset ) >= cval ? 128 : 0) | // 0 return (CALC_SUM_OFS_( ofs[0], ofs[1], ofs[4], ofs[5], p ) >= cval ? 128 : 0) | // 0
(CALC_SUM_( p[1], p[2], p[5], p[6], _offset ) >= cval ? 64 : 0) | // 1 (CALC_SUM_OFS_( ofs[1], ofs[2], ofs[5], ofs[6], p ) >= cval ? 64 : 0) | // 1
(CALC_SUM_( p[2], p[3], p[6], p[7], _offset ) >= cval ? 32 : 0) | // 2 (CALC_SUM_OFS_( ofs[2], ofs[3], ofs[6], ofs[7], p ) >= cval ? 32 : 0) | // 2
(CALC_SUM_( p[6], p[7], p[10], p[11], _offset ) >= cval ? 16 : 0) | // 5 (CALC_SUM_OFS_( ofs[6], ofs[7], ofs[10], ofs[11], p ) >= cval ? 16 : 0) | // 5
(CALC_SUM_( p[10], p[11], p[14], p[15], _offset ) >= cval ? 8 : 0)| // 8 (CALC_SUM_OFS_( ofs[10], ofs[11], ofs[14], ofs[15], p ) >= cval ? 8 : 0)| // 8
(CALC_SUM_( p[9], p[10], p[13], p[14], _offset ) >= cval ? 4 : 0)| // 7 (CALC_SUM_OFS_( ofs[9], ofs[10], ofs[13], ofs[14], p ) >= cval ? 4 : 0)| // 7
(CALC_SUM_( p[8], p[9], p[12], p[13], _offset ) >= cval ? 2 : 0)| // 6 (CALC_SUM_OFS_( ofs[8], ofs[9], ofs[12], ofs[13], p ) >= cval ? 2 : 0)| // 6
(CALC_SUM_( p[4], p[5], p[8], p[9], _offset ) >= cval ? 1 : 0); (CALC_SUM_OFS_( ofs[4], ofs[5], ofs[8], ofs[9], p ) >= cval ? 1 : 0);
} }
inline void LBPEvaluator::Feature :: updatePtrs( const Mat& _sum ) inline void LBPEvaluator::OptFeature :: setOffsets( const Feature& _f, int step )
{ {
const int* ptr = (const int*)_sum.data; Rect tr = _f.rect;
size_t step = _sum.step/sizeof(ptr[0]); CV_SUM_OFS( ofs[0], ofs[1], ofs[4], ofs[5], 0, tr, step );
Rect tr = rect; tr.x += 2*_f.rect.width;
CV_SUM_PTRS( p[0], p[1], p[4], p[5], ptr, tr, step ); CV_SUM_OFS( ofs[2], ofs[3], ofs[6], ofs[7], 0, tr, step );
tr.x += 2*rect.width; tr.y += 2*_f.rect.height;
CV_SUM_PTRS( p[2], p[3], p[6], p[7], ptr, tr, step ); CV_SUM_OFS( ofs[10], ofs[11], ofs[14], ofs[15], 0, tr, step );
tr.y += 2*rect.height; tr.x -= 2*_f.rect.width;
CV_SUM_PTRS( p[10], p[11], p[14], p[15], ptr, tr, step ); CV_SUM_OFS( ofs[8], ofs[9], ofs[12], ofs[13], 0, tr, step );
tr.x -= 2*rect.width;
CV_SUM_PTRS( p[8], p[9], p[12], p[13], ptr, tr, step );
} }
//---------------------------------------------- HOGEvaluator ------------------------------------------- //---------------------------------------------- HOGEvaluator -------------------------------------------
......
///////////////////////////// OpenCL kernels for face detection ////////////////////////////// ///////////////////////////// OpenCL kernels for face detection //////////////////////////////
////////////////////////////// see the opencv/doc/license.txt /////////////////////////////// ////////////////////////////// see the opencv/doc/license.txt ///////////////////////////////
typedef struct __attribute__((aligned(4))) OptFeature typedef struct __attribute__((aligned(4))) OptHaarFeature
{ {
int4 ofs[3] __attribute__((aligned (4))); int4 ofs[3] __attribute__((aligned (4)));
float4 weight __attribute__((aligned (4))); float4 weight __attribute__((aligned (4)));
} }
OptFeature; OptHaarFeature;
typedef struct __attribute__((aligned(4))) OptLBPFeature
{
int16 ofs __attribute__((aligned (4)));
}
OptLBPFeature;
typedef struct __attribute__((aligned(4))) Stump typedef struct __attribute__((aligned(4))) Stump
{ {
int featureIdx __attribute__((aligned (4))); float4 st __attribute__((aligned (4)));
float threshold __attribute__((aligned (4))); // for ordered features only
float left __attribute__((aligned (4)));
float right __attribute__((aligned (4)));
} }
Stump; Stump;
...@@ -30,7 +33,7 @@ __kernel void runHaarClassifierStump( ...@@ -30,7 +33,7 @@ __kernel void runHaarClassifierStump(
int sumstep, int sumoffset, int sumstep, int sumoffset,
__global const int* sqsum, __global const int* sqsum,
int sqsumstep, int sqsumoffset, int sqsumstep, int sqsumoffset,
__global const OptFeature* optfeatures, __global const OptHaarFeature* optfeatures,
int nstages, int nstages,
__global const Stage* stages, __global const Stage* stages,
...@@ -47,11 +50,8 @@ __kernel void runHaarClassifierStump( ...@@ -47,11 +50,8 @@ __kernel void runHaarClassifierStump(
if( ix < imgsize.x && iy < imgsize.y ) if( ix < imgsize.x && iy < imgsize.y )
{ {
int ntrees; int stageIdx;
int stageIdx, i;
float s = 0.f;
__global const Stump* stump = stumps; __global const Stump* stump = stumps;
__global const OptFeature* f;
__global const int* psum = sum + mad24(iy, sumstep, ix); __global const int* psum = sum + mad24(iy, sumstep, ix);
__global const int* pnsum = psum + mad24(normrect.y, sumstep, normrect.x); __global const int* pnsum = psum + mad24(normrect.y, sumstep, normrect.x);
...@@ -61,20 +61,19 @@ __kernel void runHaarClassifierStump( ...@@ -61,20 +61,19 @@ __kernel void runHaarClassifierStump(
pnsum[mad24(normrect.w, sumstep, normrect.z)])*invarea; pnsum[mad24(normrect.w, sumstep, normrect.z)])*invarea;
float sqval = (sqsum[mad24(iy + normrect.y, sqsumstep, ix + normrect.x)])*invarea; float sqval = (sqsum[mad24(iy + normrect.y, sqsumstep, ix + normrect.x)])*invarea;
float nf = (float)normarea * sqrt(max(sqval - sval * sval, 0.f)); float nf = (float)normarea * sqrt(max(sqval - sval * sval, 0.f));
float4 weight, vsval;
int4 ofs, ofs0, ofs1, ofs2;
nf = nf > 0 ? nf : 1.f; nf = nf > 0 ? nf : 1.f;
for( stageIdx = 0; stageIdx < nstages; stageIdx++ ) for( stageIdx = 0; stageIdx < nstages; stageIdx++ )
{ {
ntrees = stages[stageIdx].ntrees; int i, ntrees = stages[stageIdx].ntrees;
s = 0.f; float s = 0.f;
for( i = 0; i < ntrees; i++, stump++ ) for( i = 0; i < ntrees; i++, stump++ )
{ {
f = optfeatures + stump->featureIdx; float4 st = stump->st;
weight = f->weight; __global const OptHaarFeature* f = optfeatures + as_int(st.x);
float4 weight = f->weight;
ofs = f->ofs[0]; int4 ofs = f->ofs[0];
sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x; sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x;
ofs = f->ofs[1]; ofs = f->ofs[1];
sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.y; sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.y;
...@@ -84,7 +83,7 @@ __kernel void runHaarClassifierStump( ...@@ -84,7 +83,7 @@ __kernel void runHaarClassifierStump(
sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.z; sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.z;
} }
s += (sval < stump->threshold*nf) ? stump->left : stump->right; s += (sval < st.y*nf) ? st.z : st.w;
} }
if( s < stages[stageIdx].threshold ) if( s < stages[stageIdx].threshold )
...@@ -110,9 +109,7 @@ __kernel void runHaarClassifierStump( ...@@ -110,9 +109,7 @@ __kernel void runHaarClassifierStump(
__kernel void runLBPClassifierStump( __kernel void runLBPClassifierStump(
__global const int* sum, __global const int* sum,
int sumstep, int sumoffset, int sumstep, int sumoffset,
__global const int* sqsum, __global const OptLBPFeature* optfeatures,
int sqsumstep, int sqsumoffset,
__global const OptFeature* optfeatures,
int nstages, int nstages,
__global const Stage* stages, __global const Stage* stages,
...@@ -124,50 +121,45 @@ __kernel void runLBPClassifierStump( ...@@ -124,50 +121,45 @@ __kernel void runLBPClassifierStump(
int2 imgsize, int xyscale, float factor, int2 imgsize, int xyscale, float factor,
int4 normrect, int2 windowsize, int maxFaces) int4 normrect, int2 windowsize, int maxFaces)
{ {
int ix = get_global_id(0)*xyscale*VECTOR_SIZE; int ix = get_global_id(0)*xyscale;
int iy = get_global_id(1)*xyscale; int iy = get_global_id(1)*xyscale;
sumstep /= sizeof(int); sumstep /= sizeof(int);
sqsumstep /= sizeof(int); sqsumstep /= sizeof(int);
if( ix < imgsize.x && iy < imgsize.y ) if( ix < imgsize.x && iy < imgsize.y )
{ {
int ntrees; int stageIdx;
int stageIdx, i;
float s = 0.f;
__global const Stump* stump = stumps; __global const Stump* stump = stumps;
__global const int* bitset = bitsets;
__global const OptFeature* f;
__global const int* psum = sum + mad24(iy, sumstep, ix);
__global const int* pnsum = psum + mad24(normrect.y, sumstep, normrect.x);
int normarea = normrect.z * normrect.w;
float invarea = 1.f/normarea;
float sval = (pnsum[0] - pnsum[normrect.z] - pnsum[mul24(normrect.w, sumstep)] +
pnsum[mad24(normrect.w, sumstep, normrect.z)])*invarea;
float sqval = (sqsum[mad24(iy + normrect.y, sqsumstep, ix + normrect.x)])*invarea;
float nf = (float)normarea * sqrt(max(sqval - sval * sval, 0.f));
float4 weight;
int4 ofs;
nf = nf > 0 ? nf : 1.f;
for( stageIdx = 0; stageIdx < nstages; stageIdx++ ) for( stageIdx = 0; stageIdx < nstages; stageIdx++ )
{ {
ntrees = stages[stageIdx].ntrees; int i, ntrees = stages[stageIdx].ntrees;
s = 0.f; float s = 0.f;
for( i = 0; i < ntrees; i++, stump++, bitset += bitsetSize ) for( i = 0; i < ntrees; i++, stump++ )
{ {
f = optfeatures + stump->featureIdx; float4 st = stump->st;
__global const OptLBPFeature* f = optfeatures + as_int(st.x);
weight = f->weight; int16 ofs = f->ofs;
// compute LBP feature to val
s += (bitset[val >> 5] & (1 << (val & 31))) ? stump->left : stump->right;
int4 ofs = f->ofs[0];
sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x;
ofs = f->ofs[1];
sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.y;
if( weight.z > 0 )
{
ofs = f->ofs[2];
sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.z;
}
s += (sval < st.y*nf) ? st.z : st.w;
} }
if( s < stages[stageIdx].threshold ) if( s < stages[stageIdx].threshold )
break; break;
} }
if( stageIdx == nstages ) if( stageIdx == nstages )
{ {
int nfaces = atomic_inc(facepos); int nfaces = atomic_inc(facepos);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment