added IPP dot product functions. Only 32f data type supported for now (there are…

added IPP dot product functions. Only 32f data type supported for now (there are accuracy issues in 8u/16s/16u/32s functions which will be fixed in IPP 7.0.3)

added IPP dot product functions. Only 32f data type supported for now (there are…
added IPP dot product functions. Only 32f data type supported for now (there are accuracy issues in 8u/16s/16u/32s functions which will be fixed in IPP 7.0.3)
4f83a063 · Vladimir Dudnik · 17dc1e13 · 4f83a063
Commit 4f83a063 authored Feb 27, 2011 by Vladimir Dudnik
Hide whitespace changes
Inline Side-by-side

Showing with 215 additions and 9 deletions

matmul.cpp modules/core/src/matmul.cpp +215 -9

No files found.
--- a/modules/core/src/matmul.cpp
+++ b/modules/core/src/matmul.cpp
@@ -42,6 +42,10 @@
 #include "precomp.hpp"
+#ifdef HAVE_IPP
+#include "ippversion.h"
+#endif
 namespace cv
 {
@@ -2629,11 +2633,183 @@ void mulTransposed( const Mat& src, Mat& dst, bool ata,
 *                                      Dot Product                                       *
 \****************************************************************************************/
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+static double ippDotProd8u(const Mat& srcmat1, const Mat& srcmat2)
+{
+    int      nchan  = srcmat1.channels();
+    Ipp64f   sum[4] = { 0.0 };
+    IppiSize roi    = { srcmat1.cols, srcmat1.rows };
+    switch(nchan)
+    {
+        case 1:
+            ippiDotProd_8u64f_C1R((const Ipp8u*)srcmat1.data, (int)srcmat1.step,
+                                  (const Ipp8u*)srcmat2.data, (int)srcmat2.step, roi, sum);
+            break;
+        case 3:
+            ippiDotProd_8u64f_C3R((const Ipp8u*)srcmat1.data, (int)srcmat1.step,
+                                  (const Ipp8u*)srcmat2.data, (int)srcmat2.step, roi, sum);
+            break;
+        case 4:
+            ippiDotProd_8u64f_C4R((const Ipp8u*)srcmat1.data, (int)srcmat1.step,
+                                  (const Ipp8u*)srcmat2.data, (int)srcmat2.step, roi, sum);
+            break;
+    }
+    for(int c = 1; c < nchan; c++)
+    {
+        sum[0] += sum[c];
+    }
+    return sum[0];
+} // ippDotProd8u()
+static double ippDotProd16u(const Mat& srcmat1, const Mat& srcmat2)
+{
+    int      nchan  = srcmat1.channels();
+    Ipp64f   sum[4] = { 0.0 };
+    IppiSize roi    = { srcmat1.cols, srcmat1.rows };
+    switch(nchan)
+    {
+        case 1:
+            ippiDotProd_16u64f_C1R((const Ipp16u*)srcmat1.data, (int)srcmat1.step,
+                                   (const Ipp16u*)srcmat2.data, (int)srcmat2.step, roi, sum);
+            break;
+        case 3:
+            ippiDotProd_16u64f_C3R((const Ipp16u*)srcmat1.data, (int)srcmat1.step,
+                                   (const Ipp16u*)srcmat2.data, (int)srcmat2.step, roi, sum);
+            break;
+        case 4:
+            ippiDotProd_16u64f_C4R((const Ipp16u*)srcmat1.data, (int)srcmat1.step,
+                                   (const Ipp16u*)srcmat2.data, (int)srcmat2.step, roi, sum);
+            break;
+    }
+    for(int c = 1; c < nchan; c++)
+    {
+        sum[0] += sum[c];
+    }
+    return sum[0];
+} // ippDotProd16u()
+static double ippDotProd16s(const Mat& srcmat1, const Mat& srcmat2)
+{
+    int      nchan  = srcmat1.channels();
+    Ipp64f   sum[4] = { 0.0 };
+    IppiSize roi    = { srcmat1.cols, srcmat1.rows };
+    switch(nchan)
+    {
+        case 1:
+            ippiDotProd_16s64f_C1R((const Ipp16s*)srcmat1.data, (int)srcmat1.step,
+                                   (const Ipp16s*)srcmat2.data, (int)srcmat2.step, roi, sum);
+            break;
+        case 3:
+            ippiDotProd_16s64f_C3R((const Ipp16s*)srcmat1.data, (int)srcmat1.step,
+                                   (const Ipp16s*)srcmat2.data, (int)srcmat2.step, roi, sum);
+            break;
+        case 4:
+            ippiDotProd_16s64f_C4R((const Ipp16s*)srcmat1.data, (int)srcmat1.step,
+                                   (const Ipp16s*)srcmat2.data, (int)srcmat2.step, roi, sum);
+            break;
+    }
+    for(int c = 1; c < nchan; c++)
+    {
+        sum[0] += sum[c];
+    }
+    return sum[0];
+} // ippDotProd16s()
+static double ippDotProd32s(const Mat& srcmat1, const Mat& srcmat2)
+{
+    int      nchan  = srcmat1.channels();
+    Ipp64f   sum[4] = { 0.0 };
+    IppiSize roi    = { srcmat1.cols, srcmat1.rows };
+    switch(nchan)
+    {
+        case 1:
+            ippiDotProd_32s64f_C1R((const Ipp32s*)srcmat1.data, (int)srcmat1.step,
+                                   (const Ipp32s*)srcmat2.data, (int)srcmat2.step, roi, sum);
+            break;
+        case 3:
+            ippiDotProd_32s64f_C3R((const Ipp32s*)srcmat1.data, (int)srcmat1.step,
+                                   (const Ipp32s*)srcmat2.data, (int)srcmat2.step, roi, sum);
+            break;
+        case 4:
+            ippiDotProd_32s64f_C4R((const Ipp32s*)srcmat1.data, (int)srcmat1.step,
+                                   (const Ipp32s*)srcmat2.data, (int)srcmat2.step, roi, sum);
+            break;
+    }
+    for(int c = 1; c < nchan; c++)
+    {
+        sum[0] += sum[c];
+    }
+    return sum[0];
+} // ippDotProd32s()
+static double ippDotProd32f(const Mat& srcmat1, const Mat& srcmat2)
+{
+    int      nchan  = srcmat1.channels();
+    Ipp64f   sum[4] = { 0.0 };
+    IppiSize roi    = { srcmat1.cols, srcmat1.rows };
+    switch(nchan)
+    {
+        case 1:
+            ippiDotProd_32f64f_C1R((const Ipp32f*)srcmat1.data, (int)srcmat1.step,
+                                   (const Ipp32f*)srcmat2.data, (int)srcmat2.step, roi, sum, ippAlgHintAccurate);
+            break;
+        case 3:
+            ippiDotProd_32f64f_C3R((const Ipp32f*)srcmat1.data, (int)srcmat1.step,
+                                   (const Ipp32f*)srcmat2.data, (int)srcmat2.step, roi, sum, ippAlgHintAccurate);
+            break;
+        case 4:
+            ippiDotProd_32f64f_C4R((const Ipp32f*)srcmat1.data, (int)srcmat1.step,
+                                   (const Ipp32f*)srcmat2.data, (int)srcmat2.step, roi, sum, ippAlgHintAccurate);
+            break;
+    }
+    for(int c = 1; c < nchan; c++)
+    {
+        sum[0] += sum[c];
+    }
+    return sum[0];
+} // ippDotProd32f()
+#endif
 template<typename T, typename WT, typename ST> static double
 dotprod_( const Mat& srcmat1, const Mat& srcmat2 )
 {
-    const T *src1 = (const T*)srcmat1.data, *src2 = (const T*)srcmat2.data;
+    const T* src1 = (const T*)srcmat1.data;
-    size_t step1 = srcmat1.step/sizeof(src1[0]), step2 = srcmat2.step/sizeof(src2[0]);
+    const T* src2 = (const T*)srcmat2.data;
+    size_t step1 = srcmat1.step/sizeof(src1[0]);
+    size_t step2 = srcmat2.step/sizeof(src2[0]);
    ST sum = 0;
    Size size = getContinuousSize( srcmat1, srcmat2, srcmat1.channels() );
@@ -2642,6 +2818,7 @@ dotprod_( const Mat& srcmat1, const Mat& srcmat2 )
        WT t = 0;
        for( ; size.height--; src1 += step1, src2 += step2 )
            t += (WT)src1[0]*src2[0];
        sum += t;
    }
    else
@@ -2652,17 +2829,19 @@ dotprod_( const Mat& srcmat1, const Mat& srcmat2 )
            WT t = 0;
            for( i = 0; i <= size.width - 4; i += 4 )
            {
-                sum += (WT)src1[i]*src2[i] +
+                sum += (WT)src1[i  ]*src2[i  ] +
-                    (WT)src1[i+1]*src2[i+1] +
+                       (WT)src1[i+1]*src2[i+1] +
-                    (WT)src1[i+2]*src2[i+2] +
+                       (WT)src1[i+2]*src2[i+2] +
-                    (WT)src1[i+3]*src2[i+3];
+                       (WT)src1[i+3]*src2[i+3];
            }
            for( ; i < size.width; i++ )
                t += (WT)src1[i]*src2[i];
            sum += t;
        }
    }
    return (double)sum;
 }
@@ -2670,16 +2849,43 @@ typedef double (*DotProductFunc)(const Mat& src1, const Mat& src2);
 double Mat::dot(const Mat& mat) const
 {
-    static DotProductFunc tab[] = {
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
-        dotprod_<uchar, int, int64>, 0,
+    static DotProductFunc ipptab[] =
+    {
+        dotprod_<uchar, int, int64>,
+        0,
+        dotprod_<ushort, double, double>,
+        dotprod_<short, double, double>,
+        dotprod_<int, double, double>,
+        ippDotProd32f,
+        dotprod_<double, double, double>,
+        0
+    };
+#endif
+    static DotProductFunc tab[] =
+    {
+        dotprod_<uchar, int, int64>,
+        0,
        dotprod_<ushort, double, double>,
        dotprod_<short, double, double>,
        dotprod_<int, double, double>,
        dotprod_<float, double, double>,
-        dotprod_<double, double, double>, 0 };
+        dotprod_<double, double, double>,
+        0
+    };
    DotProductFunc func = tab[depth()];
+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
+    if((*this).channels() != 2)
+    {
+        func = ipptab[depth()];
+    }
+#endif
    CV_Assert( mat.type() == type() && mat.size() == size() && func != 0 );
    return func( *this, mat );
 }