added OpenCVFindIPP.cmake script, which will look for IPP installation at CMake…

added OpenCVFindIPP.cmake script, which will look for IPP installation at CMake configuration time. First, IPPROOT environment variable will be tested, if not found script will look at default install places. The script should support IPP from 5.3 up to 7.x versions (although tested on Windows for IPP 6.1 and IPP 7.0 versions only) Preliminary optimization of HOG with IPP added too. Not yet quite efficient, code for cpu branch should be redesigned in order to have better performance.

added OpenCVFindIPP.cmake script, which will look for IPP installation at CMake…
added OpenCVFindIPP.cmake script, which will look for IPP installation at CMake configuration time. First, IPPROOT environment variable will be tested, if not found script will look at default install places. The script should support IPP from 5.3 up to 7.x versions (although tested on Windows for IPP 6.1 and IPP 7.0 versions only) Preliminary optimization of HOG with IPP added too. Not yet quite efficient, code for cpu branch should be redesigned in order to have better performance.
6309b2d0 · Vladimir Dudnik · 1a34fa30 · 6309b2d0 · 6309b2d0 · 6309b2d0
Commit 6309b2d0 authored Dec 31, 2010 by Vladimir Dudnik
6 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
--- a/modules/contrib/src/precomp.cpp
+++ b/modules/contrib/src/precomp.cpp
@@ -41,5 +41,4 @@
 #include "precomp.hpp"
 /* End of file. */
--- a/modules/core/src/dxt.cpp
+++ b/modules/core/src/dxt.cpp
@@ -2229,7 +2229,7 @@ void dct( const Mat& src0, Mat& dst, int flags )
    Mat src = src0;
    int type = src.type(), depth = src.depth();
    void /* *spec_dft = 0, */ *spec = 0;
    double scale = 1.;
    int prev_len = 0, nf = 0, stage, end_stage;
    uchar *src_dft_buf = 0, *dst_dft_buf = 0;

--- a/modules/core/src/precomp.hpp
+++ b/modules/core/src/precomp.hpp
@@ -278,7 +278,7 @@ binaryOpC1_( const Mat& srcmat1, const Mat& srcmat2, Mat& dstmat )
    DT* dst = (DT*)dstmat.data;
    size_t step1 = srcmat1.step/sizeof(src1[0]);
    size_t step2 = srcmat2.step/sizeof(src2[0]);
-    size_t step = dstmat.step/sizeof(dst[0]);
+    size_t step  = dstmat.step/sizeof(dst[0]);
    Size size = getContinuousSize( srcmat1, srcmat2, dstmat, dstmat.channels() );
    if( size.width == 1 )
@@ -290,7 +290,8 @@ binaryOpC1_( const Mat& srcmat1, const Mat& srcmat2, Mat& dstmat )
    for( ; size.height--; src1 += step1, src2 += step2, dst += step )
    {
-        int x = vecOp(src1, src2, dst, size.width);
+        int x;
+        x = vecOp(src1, src2, dst, size.width);
        for( ; x <= size.width - 4; x += 4 )
        {
            DT f0, f1;

--- a/modules/highgui/src/window_QT.h
+++ b/modules/highgui/src/window_QT.h
@@ -434,7 +434,6 @@ private:
    void mousePressEvent(QMouseEvent *event);
    void mouseReleaseEvent(QMouseEvent *event);
    void mouseDoubleClickEvent(QMouseEvent *event);
-    //void dragMoveEvent(QDragMoveEvent *event);
    void drawInstructions(QPainter *painter);
    void drawViewOverview(QPainter *painter);
    void drawImgRegion(QPainter *painter);

--- a/modules/objdetect/src/hog.cpp
+++ b/modules/objdetect/src/hog.cpp
@@ -42,7 +42,9 @@
 #include "precomp.hpp"
 #include <iterator>
+#ifdef HAVE_IPP
+#include "ipp.h"
+#endif
 /****************************************************************************************\
      The code below is implementation of HOG (Histogram-of-Oriented Gradients)
      descriptor and object detection, introduced by Navneet Dalal and Bill Triggs.
@@ -216,12 +218,39 @@ void HOGDescriptor::computeGradient(const Mat& img, Mat& grad, Mat& qangle,
    int _nbins = nbins;
    float angleScale = (float)(_nbins/CV_PI);
+#ifdef HAVE_IPP
+    Mat lutimg(img.rows,img.cols,CV_MAKETYPE(CV_32F,cn));
+    Mat hidxs(1, width, CV_32F);
+    Ipp32f *pHidxs = (Ipp32f*)hidxs.data;
+    Ipp32f *pAngles = (Ipp32f*)Angle.data;
+    IppiSize roiSize;
+    roiSize.width = img.cols;
+    roiSize.height = img.rows;
+    for( y = 0; y < roiSize.height; y++ )
+    {
+       const uchar* imgPtr = img.data + y*img.step;
+       float* imglutPtr = (float*)(lutimg.data + y*lutimg.step);
+       for( x = 0; x < roiSize.width*cn; x++ )
+       {
+          imglutPtr[x] = lut[imgPtr[x]];
+       }
+    }
+#endif
    for( y = 0; y < gradsize.height; y++ )
    {
+#ifdef HAVE_IPP
+        const float* imgPtr = (float*)(lutimg.data + lutimg.step*ymap[y]);
+        const float* prevPtr = (float*)(lutimg.data + lutimg.step*ymap[y-1]);
+        const float* nextPtr = (float*)(lutimg.data + lutimg.step*ymap[y+1]);
+#else
        const uchar* imgPtr = img.data + img.step*ymap[y];
        const uchar* prevPtr = img.data + img.step*ymap[y-1];
        const uchar* nextPtr = img.data + img.step*ymap[y+1];
+#endif
        float* gradPtr = (float*)grad.ptr(y);
        uchar* qanglePtr = (uchar*)qangle.ptr(y);
@@ -230,8 +259,13 @@ void HOGDescriptor::computeGradient(const Mat& img, Mat& grad, Mat& qangle,
            for( x = 0; x < width; x++ )
            {
                int x1 = xmap[x];
+#ifdef HAVE_IPP
+                dbuf[x] = (float)(imgPtr[xmap[x+1]] - imgPtr[xmap[x-1]]);
+                dbuf[width + x] = (float)(nextPtr[x1] - prevPtr[x1]);
+#else
                dbuf[x] = (float)(lut[imgPtr[xmap[x+1]]] - lut[imgPtr[xmap[x-1]]]);
                dbuf[width + x] = (float)(lut[nextPtr[x1]] - lut[prevPtr[x1]]);
+#endif
            }
        }
        else
@@ -239,9 +273,32 @@ void HOGDescriptor::computeGradient(const Mat& img, Mat& grad, Mat& qangle,
            for( x = 0; x < width; x++ )
            {
                int x1 = xmap[x]*3;
+                float dx0, dy0, dx, dy, mag0, mag;
+#ifdef HAVE_IPP
+                const float* p2 = imgPtr + xmap[x+1]*3;
+                const float* p0 = imgPtr + xmap[x-1]*3;
+                dx0 = p2[2] - p0[2];
+                dy0 = nextPtr[x1+2] - prevPtr[x1+2];
+                mag0 = dx0*dx0 + dy0*dy0;
+                dx = p2[1] - p0[1];
+                dy = nextPtr[x1+1] - prevPtr[x1+1];
+                mag = dx*dx + dy*dy;
+                if( mag0 < mag )
+                {
+                    dx0 = dx;
+                    dy0 = dy;
+                    mag0 = mag;
+                }
+                dx = p2[0] - p0[0];
+                dy = nextPtr[x1] - prevPtr[x1];
+                mag = dx*dx + dy*dy;
+#else
                const uchar* p2 = imgPtr + xmap[x+1]*3;
                const uchar* p0 = imgPtr + xmap[x-1]*3;
-                float dx0, dy0, dx, dy, mag0, mag;
                dx0 = lut[p2[2]] - lut[p0[2]];
                dy0 = lut[nextPtr[x1+2]] - lut[prevPtr[x1+2]];
@@ -261,7 +318,7 @@ void HOGDescriptor::computeGradient(const Mat& img, Mat& grad, Mat& qangle,
                dx = lut[p2[0]] - lut[p0[0]];
                dy = lut[nextPtr[x1]] - lut[prevPtr[x1]];
                mag = dx*dx + dy*dy;
+ #endif
                if( mag0 < mag )
                {
                    dx0 = dx;
@@ -273,14 +330,35 @@ void HOGDescriptor::computeGradient(const Mat& img, Mat& grad, Mat& qangle,
                dbuf[x+width] = dy0;
            }
        }
+#ifdef HAVE_IPP
-        cartToPolar( Dx, Dy, Mag, Angle, false );
+        ippsCartToPolar_32f((const Ipp32f*)Dx.data, (const Ipp32f*)Dy.data, (Ipp32f*)Mag.data, pAngles, width);
+        for( x = 0; x < width; x++ )
+        {
+           if(pAngles[x] < 0.f) pAngles[x]+=(Ipp32f)(CV_PI*2.);
+        }
+        ippsNormalize_32f(pAngles, pAngles, width, 0.5f/angleScale, 1.f/angleScale);
+        ippsFloor_32f(pAngles,(Ipp32f*)hidxs.data,width);
+        ippsSub_32f_I((Ipp32f*)hidxs.data,pAngles,width);
+        ippsMul_32f_I((Ipp32f*)Mag.data,pAngles,width);
+        ippsSub_32f_I(pAngles,(Ipp32f*)Mag.data,width);
+        ippsRealToCplx_32f((Ipp32f*)Mag.data,pAngles,(Ipp32fc*)gradPtr,width);
+#else
+        cartToPolar( Dx, Dy, Mag, Angle, false );
+#endif
        for( x = 0; x < width; x++ )
        {
+#ifdef HAVE_IPP
+            int hidx = (int)pHidxs[x];
+#else
            float mag = dbuf[x+width*2], angle = dbuf[x+width*3]*angleScale - 0.5f;
            int hidx = cvFloor(angle);
            angle -= hidx;
+            gradPtr[x*2] = mag*(1.f - angle);
+            gradPtr[x*2+1] = mag*angle;
+#endif
            if( hidx < 0 )
                hidx += _nbins;
            else if( hidx >= _nbins )
@@ -291,9 +369,7 @@ void HOGDescriptor::computeGradient(const Mat& img, Mat& grad, Mat& qangle,
            hidx++;
            hidx &= hidx < _nbins ? -1 : 0;
            qanglePtr[x*2+1] = (uchar)hidx;
-            gradPtr[x*2] = mag*(1.f - angle);
+                    }
-            gradPtr[x*2+1] = mag*angle;
-        }
    }
 }
@@ -576,9 +652,12 @@ const float* HOGCache::getBlock(Point pt, float* buf)
    const uchar* qanglePtr = qangle.data + qangle.step*pt.y + pt.x*2;
    CV_Assert( blockHist != 0 );
+#ifdef HAVE_IPP
+    ippsZero_32f(blockHist,blockHistogramSize);
+#else
    for( k = 0; k < blockHistogramSize; k++ )
        blockHist[k] = 0.f;
+#endif
    const PixData* _pixData = &pixData[0];
@@ -658,20 +737,40 @@ const float* HOGCache::getBlock(Point pt, float* buf)
 void HOGCache::normalizeBlockHistogram(float* _hist) const
 {
    float* hist = &_hist[0];
+#ifdef HAVE_IPP
+    size_t sz = blockHistogramSize;
+#else
    size_t i, sz = blockHistogramSize;
+#endif
    float sum = 0;
+#ifdef HAVE_IPP
+    ippsDotProd_32f(hist,hist,sz,&sum);
+#else
    for( i = 0; i < sz; i++ )
        sum += hist[i]*hist[i];
+#endif
    float scale = 1.f/(std::sqrt(sum)+sz*0.1f), thresh = (float)descriptor->L2HysThreshold;
+#ifdef HAVE_IPP
+    ippsMulC_32f_I(scale,hist,sz);
+    ippsThreshold_32f_I( hist, sz, thresh, ippCmpGreater );
+    ippsDotProd_32f(hist,hist,sz,&sum);
+#else
    for( i = 0, sum = 0; i < sz; i++ )
    {
        hist[i] = std::min(hist[i]*scale, thresh);
        sum += hist[i]*hist[i];
    }
+#endif
    scale = 1.f/(std::sqrt(sum)+1e-3f);
+#ifdef HAVE_IPP
+    ippsMulC_32f_I(scale,hist,sz);
+#else
    for( i = 0; i < sz; i++ )
        hist[i] *= scale;
+#endif
 }
@@ -741,8 +840,12 @@ void HOGDescriptor::compute(const Mat& img, vector<float>& descriptors,
            float* dst = descriptor + bj.histOfs;
            const float* src = cache.getBlock(pt, dst);
            if( src != dst )
+#ifdef HAVE_IPP
+               ippsCopy_32f(src,dst,blockHistogramSize);
+#else
                for( int k = 0; k < blockHistogramSize; k++ )
                    dst[k] = src[k];
+#endif
        }
    }
 }
@@ -796,18 +899,28 @@ void HOGDescriptor::detect(const Mat& img,
        }
        double s = rho;
        const float* svmVec = &svmDetector[0];
+#ifdef HAVE_IPP
+        int j;
+#else
        int j, k;
+#endif
        for( j = 0; j < nblocks; j++, svmVec += blockHistogramSize )
        {
            const HOGCache::BlockData& bj = blockData[j];
            Point pt = pt0 + bj.imgOffset;
            const float* vec = cache.getBlock(pt, &blockHist[0]);
+#ifdef HAVE_IPP
+            Ipp32f partSum;
+            ippsDotProd_32f(vec,svmVec,blockHistogramSize,&partSum);
+            s += (double)partSum;
+#else
            for( k = 0; k <= blockHistogramSize - 4; k += 4 )
                s += vec[k]*svmVec[k] + vec[k+1]*svmVec[k+1] +
                    vec[k+2]*svmVec[k+2] + vec[k+3]*svmVec[k+3];
            for( ; k < blockHistogramSize; k++ )
                s += vec[k]*svmVec[k];
+#endif
        }
        if( s >= hitThreshold )
            hits.push_back(pt0);