Replace naive normL2Sqr with hal::normL2Sqr_ for better performance

07c6b0a8 · woody.chow · 8e8315ab · 07c6b0a8 · 07c6b0a8
Commit 07c6b0a8 authored Sep 06, 2018 by woody.chow
Hide whitespace changes
Inline Side-by-side

Showing with 43 additions and 5 deletions

batch_distance.cpp modules/core/src/batch_distance.cpp +37 -0

kmeans.cpp modules/core/src/kmeans.cpp +6 -5

No files found.
--- a/modules/core/src/batch_distance.cpp
+++ b/modules/core/src/batch_distance.cpp
@@ -5,6 +5,7 @@
 #include "precomp.hpp"
 #include "stat.hpp"
+#include <opencv2/core/hal/hal.hpp>
 namespace cv
 {
@@ -45,6 +46,24 @@ void batchDistL2Sqr_(const _Tp* src1, const _Tp* src2, size_t step2,
    }
 }
+template<>
+void batchDistL2Sqr_(const float* src1, const float* src2, size_t step2,
+                     int nvecs, int len, float* dist, const uchar* mask)
+{
+    step2 /= sizeof(src2[0]);
+    if( !mask )
+    {
+        for( int i = 0; i < nvecs; i++ )
+            dist[i] = hal::normL2Sqr_(src1, src2 + step2*i, len);
+    }
+    else
+    {
+        float val0 = std::numeric_limits<float>::max();
+        for( int i = 0; i < nvecs; i++ )
+            dist[i] = mask[i] ? hal::normL2Sqr_(src1, src2 + step2*i, len) : val0;
+    }
+}
 template<typename _Tp, typename _Rt>
 void batchDistL2_(const _Tp* src1, const _Tp* src2, size_t step2,
                  int nvecs, int len, _Rt* dist, const uchar* mask)
@@ -63,6 +82,24 @@ void batchDistL2_(const _Tp* src1, const _Tp* src2, size_t step2,
    }
 }
+template<>
+void batchDistL2_(const float* src1, const float* src2, size_t step2,
+                  int nvecs, int len, float* dist, const uchar* mask)
+{
+    step2 /= sizeof(src2[0]);
+    if( !mask )
+    {
+        for( int i = 0; i < nvecs; i++ )
+            dist[i] = std::sqrt(hal::normL2Sqr_(src1, src2 + step2*i, len));
+    }
+    else
+    {
+        float val0 = std::numeric_limits<float>::max();
+        for( int i = 0; i < nvecs; i++ )
+            dist[i] = mask[i] ? std::sqrt(hal::normL2Sqr_(src1, src2 + step2*i, len)) : val0;
+    }
+}
 static void batchDistHamming(const uchar* src1, const uchar* src2, size_t step2,
                             int nvecs, int len, int* dist, const uchar* mask)
 {

--- a/modules/core/src/kmeans.cpp
+++ b/modules/core/src/kmeans.cpp
@@ -43,6 +43,7 @@
 #include "precomp.hpp"
 #include <opencv2/core/utils/configuration.private.hpp>
+#include <opencv2/core/hal/hal.hpp>
 ////////////////////////////////////////// kmeans ////////////////////////////////////////////
@@ -74,7 +75,7 @@ public:
        for (int i = begin; i<end; i++)
        {
-            tdist2[i] = std::min(normL2Sqr(data.ptr<float>(i), data.ptr<float>(ci), dims), dist[i]);
+            tdist2[i] = std::min(hal::normL2Sqr_(data.ptr<float>(i), data.ptr<float>(ci), dims), dist[i]);
        }
    }
@@ -106,7 +107,7 @@ static void generateCentersPP(const Mat& data, Mat& _out_centers,
    for (int i = 0; i < N; i++)
    {
-        dist[i] = normL2Sqr(data.ptr<float>(i), data.ptr<float>(centers[0]), dims);
+        dist[i] = hal::normL2Sqr_(data.ptr<float>(i), data.ptr<float>(centers[0]), dims);
        sum0 += dist[i];
    }
@@ -185,7 +186,7 @@ public:
            if (onlyDistance)
            {
                const float* center = centers.ptr<float>(labels[i]);
-                distances[i] = normL2Sqr(sample, center, dims);
+                distances[i] = hal::normL2Sqr_(sample, center, dims);
                continue;
            }
            else
@@ -196,7 +197,7 @@ public:
                for (int k = 0; k < K; k++)
                {
                    const float* center = centers.ptr<float>(k);
-                    const double dist = normL2Sqr(sample, center, dims);
+                    const double dist = hal::normL2Sqr_(sample, center, dims);
                    if (min_dist > dist)
                    {
@@ -379,7 +380,7 @@ double cv::kmeans( InputArray _data, int K,
                        if (labels[i] != max_k)
                            continue;
                        const float* sample = data.ptr<float>(i);
-                        double dist = normL2Sqr(sample, _base_center, dims);
+                        double dist = hal::normL2Sqr_(sample, _base_center, dims);
                        if (max_dist <= dist)
                        {