Commit 6309b2d0 authored by Vladimir Dudnik's avatar Vladimir Dudnik

added OpenCVFindIPP.cmake script, which will look for IPP installation at CMake…

added OpenCVFindIPP.cmake script, which will look for IPP installation at CMake configuration time. First, IPPROOT environment variable will be tested, if not found script will look at default install places.
The script should support IPP from 5.3 up to 7.x versions (although tested on Windows for IPP 6.1 and IPP 7.0 versions only)

Preliminary optimization of HOG with IPP added too. Not yet quite efficient, code for cpu branch should be redesigned in order to have better performance.
parent 1a34fa30
This diff is collapsed.
...@@ -41,5 +41,4 @@ ...@@ -41,5 +41,4 @@
#include "precomp.hpp" #include "precomp.hpp"
/* End of file. */ /* End of file. */
...@@ -2229,7 +2229,7 @@ void dct( const Mat& src0, Mat& dst, int flags ) ...@@ -2229,7 +2229,7 @@ void dct( const Mat& src0, Mat& dst, int flags )
Mat src = src0; Mat src = src0;
int type = src.type(), depth = src.depth(); int type = src.type(), depth = src.depth();
void /* *spec_dft = 0, */ *spec = 0; void /* *spec_dft = 0, */ *spec = 0;
double scale = 1.; double scale = 1.;
int prev_len = 0, nf = 0, stage, end_stage; int prev_len = 0, nf = 0, stage, end_stage;
uchar *src_dft_buf = 0, *dst_dft_buf = 0; uchar *src_dft_buf = 0, *dst_dft_buf = 0;
......
...@@ -278,7 +278,7 @@ binaryOpC1_( const Mat& srcmat1, const Mat& srcmat2, Mat& dstmat ) ...@@ -278,7 +278,7 @@ binaryOpC1_( const Mat& srcmat1, const Mat& srcmat2, Mat& dstmat )
DT* dst = (DT*)dstmat.data; DT* dst = (DT*)dstmat.data;
size_t step1 = srcmat1.step/sizeof(src1[0]); size_t step1 = srcmat1.step/sizeof(src1[0]);
size_t step2 = srcmat2.step/sizeof(src2[0]); size_t step2 = srcmat2.step/sizeof(src2[0]);
size_t step = dstmat.step/sizeof(dst[0]); size_t step = dstmat.step/sizeof(dst[0]);
Size size = getContinuousSize( srcmat1, srcmat2, dstmat, dstmat.channels() ); Size size = getContinuousSize( srcmat1, srcmat2, dstmat, dstmat.channels() );
if( size.width == 1 ) if( size.width == 1 )
...@@ -290,7 +290,8 @@ binaryOpC1_( const Mat& srcmat1, const Mat& srcmat2, Mat& dstmat ) ...@@ -290,7 +290,8 @@ binaryOpC1_( const Mat& srcmat1, const Mat& srcmat2, Mat& dstmat )
for( ; size.height--; src1 += step1, src2 += step2, dst += step ) for( ; size.height--; src1 += step1, src2 += step2, dst += step )
{ {
int x = vecOp(src1, src2, dst, size.width); int x;
x = vecOp(src1, src2, dst, size.width);
for( ; x <= size.width - 4; x += 4 ) for( ; x <= size.width - 4; x += 4 )
{ {
DT f0, f1; DT f0, f1;
......
...@@ -434,7 +434,6 @@ private: ...@@ -434,7 +434,6 @@ private:
void mousePressEvent(QMouseEvent *event); void mousePressEvent(QMouseEvent *event);
void mouseReleaseEvent(QMouseEvent *event); void mouseReleaseEvent(QMouseEvent *event);
void mouseDoubleClickEvent(QMouseEvent *event); void mouseDoubleClickEvent(QMouseEvent *event);
//void dragMoveEvent(QDragMoveEvent *event);
void drawInstructions(QPainter *painter); void drawInstructions(QPainter *painter);
void drawViewOverview(QPainter *painter); void drawViewOverview(QPainter *painter);
void drawImgRegion(QPainter *painter); void drawImgRegion(QPainter *painter);
......
...@@ -42,7 +42,9 @@ ...@@ -42,7 +42,9 @@
#include "precomp.hpp" #include "precomp.hpp"
#include <iterator> #include <iterator>
#ifdef HAVE_IPP
#include "ipp.h"
#endif
/****************************************************************************************\ /****************************************************************************************\
The code below is implementation of HOG (Histogram-of-Oriented Gradients) The code below is implementation of HOG (Histogram-of-Oriented Gradients)
descriptor and object detection, introduced by Navneet Dalal and Bill Triggs. descriptor and object detection, introduced by Navneet Dalal and Bill Triggs.
...@@ -216,12 +218,39 @@ void HOGDescriptor::computeGradient(const Mat& img, Mat& grad, Mat& qangle, ...@@ -216,12 +218,39 @@ void HOGDescriptor::computeGradient(const Mat& img, Mat& grad, Mat& qangle,
int _nbins = nbins; int _nbins = nbins;
float angleScale = (float)(_nbins/CV_PI); float angleScale = (float)(_nbins/CV_PI);
#ifdef HAVE_IPP
Mat lutimg(img.rows,img.cols,CV_MAKETYPE(CV_32F,cn));
Mat hidxs(1, width, CV_32F);
Ipp32f *pHidxs = (Ipp32f*)hidxs.data;
Ipp32f *pAngles = (Ipp32f*)Angle.data;
IppiSize roiSize;
roiSize.width = img.cols;
roiSize.height = img.rows;
for( y = 0; y < roiSize.height; y++ )
{
const uchar* imgPtr = img.data + y*img.step;
float* imglutPtr = (float*)(lutimg.data + y*lutimg.step);
for( x = 0; x < roiSize.width*cn; x++ )
{
imglutPtr[x] = lut[imgPtr[x]];
}
}
#endif
for( y = 0; y < gradsize.height; y++ ) for( y = 0; y < gradsize.height; y++ )
{ {
#ifdef HAVE_IPP
const float* imgPtr = (float*)(lutimg.data + lutimg.step*ymap[y]);
const float* prevPtr = (float*)(lutimg.data + lutimg.step*ymap[y-1]);
const float* nextPtr = (float*)(lutimg.data + lutimg.step*ymap[y+1]);
#else
const uchar* imgPtr = img.data + img.step*ymap[y]; const uchar* imgPtr = img.data + img.step*ymap[y];
const uchar* prevPtr = img.data + img.step*ymap[y-1]; const uchar* prevPtr = img.data + img.step*ymap[y-1];
const uchar* nextPtr = img.data + img.step*ymap[y+1]; const uchar* nextPtr = img.data + img.step*ymap[y+1];
#endif
float* gradPtr = (float*)grad.ptr(y); float* gradPtr = (float*)grad.ptr(y);
uchar* qanglePtr = (uchar*)qangle.ptr(y); uchar* qanglePtr = (uchar*)qangle.ptr(y);
...@@ -230,8 +259,13 @@ void HOGDescriptor::computeGradient(const Mat& img, Mat& grad, Mat& qangle, ...@@ -230,8 +259,13 @@ void HOGDescriptor::computeGradient(const Mat& img, Mat& grad, Mat& qangle,
for( x = 0; x < width; x++ ) for( x = 0; x < width; x++ )
{ {
int x1 = xmap[x]; int x1 = xmap[x];
#ifdef HAVE_IPP
dbuf[x] = (float)(imgPtr[xmap[x+1]] - imgPtr[xmap[x-1]]);
dbuf[width + x] = (float)(nextPtr[x1] - prevPtr[x1]);
#else
dbuf[x] = (float)(lut[imgPtr[xmap[x+1]]] - lut[imgPtr[xmap[x-1]]]); dbuf[x] = (float)(lut[imgPtr[xmap[x+1]]] - lut[imgPtr[xmap[x-1]]]);
dbuf[width + x] = (float)(lut[nextPtr[x1]] - lut[prevPtr[x1]]); dbuf[width + x] = (float)(lut[nextPtr[x1]] - lut[prevPtr[x1]]);
#endif
} }
} }
else else
...@@ -239,9 +273,32 @@ void HOGDescriptor::computeGradient(const Mat& img, Mat& grad, Mat& qangle, ...@@ -239,9 +273,32 @@ void HOGDescriptor::computeGradient(const Mat& img, Mat& grad, Mat& qangle,
for( x = 0; x < width; x++ ) for( x = 0; x < width; x++ )
{ {
int x1 = xmap[x]*3; int x1 = xmap[x]*3;
float dx0, dy0, dx, dy, mag0, mag;
#ifdef HAVE_IPP
const float* p2 = imgPtr + xmap[x+1]*3;
const float* p0 = imgPtr + xmap[x-1]*3;
dx0 = p2[2] - p0[2];
dy0 = nextPtr[x1+2] - prevPtr[x1+2];
mag0 = dx0*dx0 + dy0*dy0;
dx = p2[1] - p0[1];
dy = nextPtr[x1+1] - prevPtr[x1+1];
mag = dx*dx + dy*dy;
if( mag0 < mag )
{
dx0 = dx;
dy0 = dy;
mag0 = mag;
}
dx = p2[0] - p0[0];
dy = nextPtr[x1] - prevPtr[x1];
mag = dx*dx + dy*dy;
#else
const uchar* p2 = imgPtr + xmap[x+1]*3; const uchar* p2 = imgPtr + xmap[x+1]*3;
const uchar* p0 = imgPtr + xmap[x-1]*3; const uchar* p0 = imgPtr + xmap[x-1]*3;
float dx0, dy0, dx, dy, mag0, mag;
dx0 = lut[p2[2]] - lut[p0[2]]; dx0 = lut[p2[2]] - lut[p0[2]];
dy0 = lut[nextPtr[x1+2]] - lut[prevPtr[x1+2]]; dy0 = lut[nextPtr[x1+2]] - lut[prevPtr[x1+2]];
...@@ -261,7 +318,7 @@ void HOGDescriptor::computeGradient(const Mat& img, Mat& grad, Mat& qangle, ...@@ -261,7 +318,7 @@ void HOGDescriptor::computeGradient(const Mat& img, Mat& grad, Mat& qangle,
dx = lut[p2[0]] - lut[p0[0]]; dx = lut[p2[0]] - lut[p0[0]];
dy = lut[nextPtr[x1]] - lut[prevPtr[x1]]; dy = lut[nextPtr[x1]] - lut[prevPtr[x1]];
mag = dx*dx + dy*dy; mag = dx*dx + dy*dy;
#endif
if( mag0 < mag ) if( mag0 < mag )
{ {
dx0 = dx; dx0 = dx;
...@@ -273,14 +330,35 @@ void HOGDescriptor::computeGradient(const Mat& img, Mat& grad, Mat& qangle, ...@@ -273,14 +330,35 @@ void HOGDescriptor::computeGradient(const Mat& img, Mat& grad, Mat& qangle,
dbuf[x+width] = dy0; dbuf[x+width] = dy0;
} }
} }
#ifdef HAVE_IPP
cartToPolar( Dx, Dy, Mag, Angle, false ); ippsCartToPolar_32f((const Ipp32f*)Dx.data, (const Ipp32f*)Dy.data, (Ipp32f*)Mag.data, pAngles, width);
for( x = 0; x < width; x++ )
{
if(pAngles[x] < 0.f) pAngles[x]+=(Ipp32f)(CV_PI*2.);
}
ippsNormalize_32f(pAngles, pAngles, width, 0.5f/angleScale, 1.f/angleScale);
ippsFloor_32f(pAngles,(Ipp32f*)hidxs.data,width);
ippsSub_32f_I((Ipp32f*)hidxs.data,pAngles,width);
ippsMul_32f_I((Ipp32f*)Mag.data,pAngles,width);
ippsSub_32f_I(pAngles,(Ipp32f*)Mag.data,width);
ippsRealToCplx_32f((Ipp32f*)Mag.data,pAngles,(Ipp32fc*)gradPtr,width);
#else
cartToPolar( Dx, Dy, Mag, Angle, false );
#endif
for( x = 0; x < width; x++ ) for( x = 0; x < width; x++ )
{ {
#ifdef HAVE_IPP
int hidx = (int)pHidxs[x];
#else
float mag = dbuf[x+width*2], angle = dbuf[x+width*3]*angleScale - 0.5f; float mag = dbuf[x+width*2], angle = dbuf[x+width*3]*angleScale - 0.5f;
int hidx = cvFloor(angle); int hidx = cvFloor(angle);
angle -= hidx; angle -= hidx;
gradPtr[x*2] = mag*(1.f - angle);
gradPtr[x*2+1] = mag*angle;
#endif
if( hidx < 0 ) if( hidx < 0 )
hidx += _nbins; hidx += _nbins;
else if( hidx >= _nbins ) else if( hidx >= _nbins )
...@@ -291,9 +369,7 @@ void HOGDescriptor::computeGradient(const Mat& img, Mat& grad, Mat& qangle, ...@@ -291,9 +369,7 @@ void HOGDescriptor::computeGradient(const Mat& img, Mat& grad, Mat& qangle,
hidx++; hidx++;
hidx &= hidx < _nbins ? -1 : 0; hidx &= hidx < _nbins ? -1 : 0;
qanglePtr[x*2+1] = (uchar)hidx; qanglePtr[x*2+1] = (uchar)hidx;
gradPtr[x*2] = mag*(1.f - angle); }
gradPtr[x*2+1] = mag*angle;
}
} }
} }
...@@ -576,9 +652,12 @@ const float* HOGCache::getBlock(Point pt, float* buf) ...@@ -576,9 +652,12 @@ const float* HOGCache::getBlock(Point pt, float* buf)
const uchar* qanglePtr = qangle.data + qangle.step*pt.y + pt.x*2; const uchar* qanglePtr = qangle.data + qangle.step*pt.y + pt.x*2;
CV_Assert( blockHist != 0 ); CV_Assert( blockHist != 0 );
#ifdef HAVE_IPP
ippsZero_32f(blockHist,blockHistogramSize);
#else
for( k = 0; k < blockHistogramSize; k++ ) for( k = 0; k < blockHistogramSize; k++ )
blockHist[k] = 0.f; blockHist[k] = 0.f;
#endif
const PixData* _pixData = &pixData[0]; const PixData* _pixData = &pixData[0];
...@@ -658,20 +737,40 @@ const float* HOGCache::getBlock(Point pt, float* buf) ...@@ -658,20 +737,40 @@ const float* HOGCache::getBlock(Point pt, float* buf)
void HOGCache::normalizeBlockHistogram(float* _hist) const void HOGCache::normalizeBlockHistogram(float* _hist) const
{ {
float* hist = &_hist[0]; float* hist = &_hist[0];
#ifdef HAVE_IPP
size_t sz = blockHistogramSize;
#else
size_t i, sz = blockHistogramSize; size_t i, sz = blockHistogramSize;
#endif
float sum = 0; float sum = 0;
#ifdef HAVE_IPP
ippsDotProd_32f(hist,hist,sz,&sum);
#else
for( i = 0; i < sz; i++ ) for( i = 0; i < sz; i++ )
sum += hist[i]*hist[i]; sum += hist[i]*hist[i];
#endif
float scale = 1.f/(std::sqrt(sum)+sz*0.1f), thresh = (float)descriptor->L2HysThreshold; float scale = 1.f/(std::sqrt(sum)+sz*0.1f), thresh = (float)descriptor->L2HysThreshold;
#ifdef HAVE_IPP
ippsMulC_32f_I(scale,hist,sz);
ippsThreshold_32f_I( hist, sz, thresh, ippCmpGreater );
ippsDotProd_32f(hist,hist,sz,&sum);
#else
for( i = 0, sum = 0; i < sz; i++ ) for( i = 0, sum = 0; i < sz; i++ )
{ {
hist[i] = std::min(hist[i]*scale, thresh); hist[i] = std::min(hist[i]*scale, thresh);
sum += hist[i]*hist[i]; sum += hist[i]*hist[i];
} }
#endif
scale = 1.f/(std::sqrt(sum)+1e-3f); scale = 1.f/(std::sqrt(sum)+1e-3f);
#ifdef HAVE_IPP
ippsMulC_32f_I(scale,hist,sz);
#else
for( i = 0; i < sz; i++ ) for( i = 0; i < sz; i++ )
hist[i] *= scale; hist[i] *= scale;
#endif
} }
...@@ -741,8 +840,12 @@ void HOGDescriptor::compute(const Mat& img, vector<float>& descriptors, ...@@ -741,8 +840,12 @@ void HOGDescriptor::compute(const Mat& img, vector<float>& descriptors,
float* dst = descriptor + bj.histOfs; float* dst = descriptor + bj.histOfs;
const float* src = cache.getBlock(pt, dst); const float* src = cache.getBlock(pt, dst);
if( src != dst ) if( src != dst )
#ifdef HAVE_IPP
ippsCopy_32f(src,dst,blockHistogramSize);
#else
for( int k = 0; k < blockHistogramSize; k++ ) for( int k = 0; k < blockHistogramSize; k++ )
dst[k] = src[k]; dst[k] = src[k];
#endif
} }
} }
} }
...@@ -796,18 +899,28 @@ void HOGDescriptor::detect(const Mat& img, ...@@ -796,18 +899,28 @@ void HOGDescriptor::detect(const Mat& img,
} }
double s = rho; double s = rho;
const float* svmVec = &svmDetector[0]; const float* svmVec = &svmDetector[0];
#ifdef HAVE_IPP
int j;
#else
int j, k; int j, k;
#endif
for( j = 0; j < nblocks; j++, svmVec += blockHistogramSize ) for( j = 0; j < nblocks; j++, svmVec += blockHistogramSize )
{ {
const HOGCache::BlockData& bj = blockData[j]; const HOGCache::BlockData& bj = blockData[j];
Point pt = pt0 + bj.imgOffset; Point pt = pt0 + bj.imgOffset;
const float* vec = cache.getBlock(pt, &blockHist[0]); const float* vec = cache.getBlock(pt, &blockHist[0]);
#ifdef HAVE_IPP
Ipp32f partSum;
ippsDotProd_32f(vec,svmVec,blockHistogramSize,&partSum);
s += (double)partSum;
#else
for( k = 0; k <= blockHistogramSize - 4; k += 4 ) for( k = 0; k <= blockHistogramSize - 4; k += 4 )
s += vec[k]*svmVec[k] + vec[k+1]*svmVec[k+1] + s += vec[k]*svmVec[k] + vec[k+1]*svmVec[k+1] +
vec[k+2]*svmVec[k+2] + vec[k+3]*svmVec[k+3]; vec[k+2]*svmVec[k+2] + vec[k+3]*svmVec[k+3];
for( ; k < blockHistogramSize; k++ ) for( ; k < blockHistogramSize; k++ )
s += vec[k]*svmVec[k]; s += vec[k]*svmVec[k];
#endif
} }
if( s >= hitThreshold ) if( s >= hitThreshold )
hits.push_back(pt0); hits.push_back(pt0);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment