Commit c6a27b3d authored by Vadim Pisarevsky's avatar Vadim Pisarevsky

probably, ultimately fixed the problem of empty clusters in kmeans; added test…

probably, ultimately fixed the problem of empty clusters in kmeans; added test for singular kmeans cases
parent d80651d9
...@@ -2489,7 +2489,7 @@ double cv::kmeans( InputArray _data, int K, ...@@ -2489,7 +2489,7 @@ double cv::kmeans( InputArray _data, int K,
} }
int* labels = _labels.ptr<int>(); int* labels = _labels.ptr<int>();
Mat centers(K, dims, type), old_centers(K, dims, type); Mat centers(K, dims, type), old_centers(K, dims, type), temp(1, dims, type);
vector<int> counters(K); vector<int> counters(K);
vector<Vec2f> _box(dims); vector<Vec2f> _box(dims);
Vec2f* box = &_box[0]; Vec2f* box = &_box[0];
...@@ -2533,7 +2533,7 @@ double cv::kmeans( InputArray _data, int K, ...@@ -2533,7 +2533,7 @@ double cv::kmeans( InputArray _data, int K,
for( a = 0; a < attempts; a++ ) for( a = 0; a < attempts; a++ )
{ {
double max_center_shift = DBL_MAX; double max_center_shift = DBL_MAX;
for( iter = 0; iter < criteria.maxCount && max_center_shift > criteria.epsilon; iter++ ) for( iter = 0;; )
{ {
swap(centers, old_centers); swap(centers, old_centers);
...@@ -2609,7 +2609,11 @@ double cv::kmeans( InputArray _data, int K, ...@@ -2609,7 +2609,11 @@ double cv::kmeans( InputArray _data, int K,
double max_dist = 0; double max_dist = 0;
int farthest_i = -1; int farthest_i = -1;
float* new_center = centers.ptr<float>(k); float* new_center = centers.ptr<float>(k);
float* old_center = centers.ptr<float>(max_k); float* _old_center = centers.ptr<float>(max_k);
float* old_center = temp.ptr<float>();
float scale = 1.f/counters[max_k];
for( j = 0; j < dims; j++ )
old_center[j] = _old_center[j]*scale;
for( i = 0; i < N; i++ ) for( i = 0; i < N; i++ )
{ {
...@@ -2627,6 +2631,7 @@ double cv::kmeans( InputArray _data, int K, ...@@ -2627,6 +2631,7 @@ double cv::kmeans( InputArray _data, int K,
counters[max_k]--; counters[max_k]--;
counters[k]++; counters[k]++;
labels[farthest_i] = k;
sample = data.ptr<float>(farthest_i); sample = data.ptr<float>(farthest_i);
for( j = 0; j < dims; j++ ) for( j = 0; j < dims; j++ )
...@@ -2659,6 +2664,9 @@ double cv::kmeans( InputArray _data, int K, ...@@ -2659,6 +2664,9 @@ double cv::kmeans( InputArray _data, int K,
} }
} }
if( ++iter == MAX(criteria.maxCount, 2) || max_center_shift <= criteria.epsilon )
break;
// assign labels // assign labels
compactness = 0; compactness = 0;
for( i = 0; i < N; i++ ) for( i = 0; i < N; i++ )
......
...@@ -2428,5 +2428,57 @@ TEST(Core_SolvePoly, accuracy) { Core_SolvePolyTest test; test.safe_run(); } ...@@ -2428,5 +2428,57 @@ TEST(Core_SolvePoly, accuracy) { Core_SolvePolyTest test; test.safe_run(); }
// TODO: eigenvv, invsqrt, cbrt, fastarctan, (round, floor, ceil(?)), // TODO: eigenvv, invsqrt, cbrt, fastarctan, (round, floor, ceil(?)),
class CV_KMeansSingularTest : public cvtest::BaseTest
{
public:
CV_KMeansSingularTest() {}
~CV_KMeansSingularTest() {}
protected:
void run(int)
{
try
{
RNG& rng = theRNG();
const int MAX_DIM=5;
int MAX_POINTS = 100;
for( int iter = 0; iter < 100; iter++ )
{
ts->update_context(this, iter, true);
int dims = rng.uniform(1, MAX_DIM+1);
int N = rng.uniform(1, MAX_POINTS+1);
int N0 = rng.uniform(1, N/10+1);
int K = rng.uniform(1, N+1);
Mat data0(N0, dims, CV_32F), labels;
rng.fill(data0, RNG::UNIFORM, -1, 1);
Mat data(N, dims, CV_32F);
for( int i = 0; i < N; i++ )
data0.row(rng.uniform(0, N0)).copyTo(data.row(i));
kmeans(data, K, labels, TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS, 30, 0),
5, KMEANS_PP_CENTERS);
Mat hist(K, 1, CV_32S, Scalar(0));
for( int i = 0; i < N; i++ )
{
int l = labels.at<int>(i);
CV_Assert( 0 <= l && l < K );
hist.at<int>(l)++;
}
for( int i = 0; i < K; i++ )
CV_Assert( hist.at<int>(i) != 0 );
}
}
catch(...)
{
ts->set_failed_test_info(cvtest::TS::FAIL_MISMATCH);
}
}
};
TEST(Core_KMeans, singular) { CV_KMeansSingularTest test; test.safe_run(); }
/* End of file. */ /* End of file. */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment