Commit 07c6b0a8 authored by woody.chow's avatar woody.chow

Replace naive normL2Sqr with hal::normL2Sqr_ for better performance

parent 8e8315ab
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include "precomp.hpp" #include "precomp.hpp"
#include "stat.hpp" #include "stat.hpp"
#include <opencv2/core/hal/hal.hpp>
namespace cv namespace cv
{ {
...@@ -45,6 +46,24 @@ void batchDistL2Sqr_(const _Tp* src1, const _Tp* src2, size_t step2, ...@@ -45,6 +46,24 @@ void batchDistL2Sqr_(const _Tp* src1, const _Tp* src2, size_t step2,
} }
} }
template<>
void batchDistL2Sqr_(const float* src1, const float* src2, size_t step2,
int nvecs, int len, float* dist, const uchar* mask)
{
step2 /= sizeof(src2[0]);
if( !mask )
{
for( int i = 0; i < nvecs; i++ )
dist[i] = hal::normL2Sqr_(src1, src2 + step2*i, len);
}
else
{
float val0 = std::numeric_limits<float>::max();
for( int i = 0; i < nvecs; i++ )
dist[i] = mask[i] ? hal::normL2Sqr_(src1, src2 + step2*i, len) : val0;
}
}
template<typename _Tp, typename _Rt> template<typename _Tp, typename _Rt>
void batchDistL2_(const _Tp* src1, const _Tp* src2, size_t step2, void batchDistL2_(const _Tp* src1, const _Tp* src2, size_t step2,
int nvecs, int len, _Rt* dist, const uchar* mask) int nvecs, int len, _Rt* dist, const uchar* mask)
...@@ -63,6 +82,24 @@ void batchDistL2_(const _Tp* src1, const _Tp* src2, size_t step2, ...@@ -63,6 +82,24 @@ void batchDistL2_(const _Tp* src1, const _Tp* src2, size_t step2,
} }
} }
template<>
void batchDistL2_(const float* src1, const float* src2, size_t step2,
int nvecs, int len, float* dist, const uchar* mask)
{
step2 /= sizeof(src2[0]);
if( !mask )
{
for( int i = 0; i < nvecs; i++ )
dist[i] = std::sqrt(hal::normL2Sqr_(src1, src2 + step2*i, len));
}
else
{
float val0 = std::numeric_limits<float>::max();
for( int i = 0; i < nvecs; i++ )
dist[i] = mask[i] ? std::sqrt(hal::normL2Sqr_(src1, src2 + step2*i, len)) : val0;
}
}
static void batchDistHamming(const uchar* src1, const uchar* src2, size_t step2, static void batchDistHamming(const uchar* src1, const uchar* src2, size_t step2,
int nvecs, int len, int* dist, const uchar* mask) int nvecs, int len, int* dist, const uchar* mask)
{ {
......
...@@ -43,6 +43,7 @@ ...@@ -43,6 +43,7 @@
#include "precomp.hpp" #include "precomp.hpp"
#include <opencv2/core/utils/configuration.private.hpp> #include <opencv2/core/utils/configuration.private.hpp>
#include <opencv2/core/hal/hal.hpp>
////////////////////////////////////////// kmeans //////////////////////////////////////////// ////////////////////////////////////////// kmeans ////////////////////////////////////////////
...@@ -74,7 +75,7 @@ public: ...@@ -74,7 +75,7 @@ public:
for (int i = begin; i<end; i++) for (int i = begin; i<end; i++)
{ {
tdist2[i] = std::min(normL2Sqr(data.ptr<float>(i), data.ptr<float>(ci), dims), dist[i]); tdist2[i] = std::min(hal::normL2Sqr_(data.ptr<float>(i), data.ptr<float>(ci), dims), dist[i]);
} }
} }
...@@ -106,7 +107,7 @@ static void generateCentersPP(const Mat& data, Mat& _out_centers, ...@@ -106,7 +107,7 @@ static void generateCentersPP(const Mat& data, Mat& _out_centers,
for (int i = 0; i < N; i++) for (int i = 0; i < N; i++)
{ {
dist[i] = normL2Sqr(data.ptr<float>(i), data.ptr<float>(centers[0]), dims); dist[i] = hal::normL2Sqr_(data.ptr<float>(i), data.ptr<float>(centers[0]), dims);
sum0 += dist[i]; sum0 += dist[i];
} }
...@@ -185,7 +186,7 @@ public: ...@@ -185,7 +186,7 @@ public:
if (onlyDistance) if (onlyDistance)
{ {
const float* center = centers.ptr<float>(labels[i]); const float* center = centers.ptr<float>(labels[i]);
distances[i] = normL2Sqr(sample, center, dims); distances[i] = hal::normL2Sqr_(sample, center, dims);
continue; continue;
} }
else else
...@@ -196,7 +197,7 @@ public: ...@@ -196,7 +197,7 @@ public:
for (int k = 0; k < K; k++) for (int k = 0; k < K; k++)
{ {
const float* center = centers.ptr<float>(k); const float* center = centers.ptr<float>(k);
const double dist = normL2Sqr(sample, center, dims); const double dist = hal::normL2Sqr_(sample, center, dims);
if (min_dist > dist) if (min_dist > dist)
{ {
...@@ -379,7 +380,7 @@ double cv::kmeans( InputArray _data, int K, ...@@ -379,7 +380,7 @@ double cv::kmeans( InputArray _data, int K,
if (labels[i] != max_k) if (labels[i] != max_k)
continue; continue;
const float* sample = data.ptr<float>(i); const float* sample = data.ptr<float>(i);
double dist = normL2Sqr(sample, _base_center, dims); double dist = hal::normL2Sqr_(sample, _base_center, dims);
if (max_dist <= dist) if (max_dist <= dist)
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment