Merge pull request #8081 from mshabunin:fix-kmeans-compactness

pull/8107/head
Vadim Pisarevsky 8 years ago
commit dfb348ef0b
  1. 31
      modules/core/src/kmeans.cpp
  2. 57
      modules/core/test/test_math.cpp
  3. 8
      samples/cpp/kmeans.cpp

@ -165,11 +165,13 @@ public:
KMeansDistanceComputer( double *_distances, KMeansDistanceComputer( double *_distances,
int *_labels, int *_labels,
const Mat& _data, const Mat& _data,
const Mat& _centers ) const Mat& _centers,
bool _onlyDistance = false )
: distances(_distances), : distances(_distances),
labels(_labels), labels(_labels),
data(_data), data(_data),
centers(_centers) centers(_centers),
onlyDistance(_onlyDistance)
{ {
} }
@ -183,6 +185,12 @@ public:
for( int i = begin; i<end; ++i) for( int i = begin; i<end; ++i)
{ {
const float *sample = data.ptr<float>(i); const float *sample = data.ptr<float>(i);
if (onlyDistance)
{
const float* center = centers.ptr<float>(labels[i]);
distances[i] = normL2Sqr(sample, center, dims);
continue;
}
int k_best = 0; int k_best = 0;
double min_dist = DBL_MAX; double min_dist = DBL_MAX;
@ -210,6 +218,7 @@ private:
int *labels; int *labels;
const Mat& data; const Mat& data;
const Mat& centers; const Mat& centers;
bool onlyDistance;
}; };
} }
@ -259,6 +268,7 @@ double cv::kmeans( InputArray _data, int K,
Mat centers(K, dims, type), old_centers(K, dims, type), temp(1, dims, type); Mat centers(K, dims, type), old_centers(K, dims, type), temp(1, dims, type);
std::vector<int> counters(K); std::vector<int> counters(K);
std::vector<Vec2f> _box(dims); std::vector<Vec2f> _box(dims);
Mat dists(1, N, CV_64F);
Vec2f* box = &_box[0]; Vec2f* box = &_box[0];
double best_compactness = DBL_MAX, compactness = 0; double best_compactness = DBL_MAX, compactness = 0;
RNG& rng = theRNG(); RNG& rng = theRNG();
@ -430,19 +440,16 @@ double cv::kmeans( InputArray _data, int K,
} }
} }
if( ++iter == MAX(criteria.maxCount, 2) || max_center_shift <= criteria.epsilon ) bool isLastIter = (++iter == MAX(criteria.maxCount, 2) || max_center_shift <= criteria.epsilon);
break;
// assign labels // assign labels
Mat dists(1, N, CV_64F); dists = 0;
double* dist = dists.ptr<double>(0); double* dist = dists.ptr<double>(0);
parallel_for_(Range(0, N), parallel_for_(Range(0, N), KMeansDistanceComputer(dist, labels, data, centers, isLastIter));
KMeansDistanceComputer(dist, labels, data, centers)); compactness = sum(dists)[0];
compactness = 0;
for( i = 0; i < N; i++ ) if (isLastIter)
{ break;
compactness += dist[i];
}
} }
if( compactness < best_compactness ) if( compactness < best_compactness )

@ -2748,21 +2748,23 @@ public:
protected: protected:
void run(int inVariant) void run(int inVariant)
{ {
RNG& rng = ts->get_rng();
int i, iter = 0, N = 0, N0 = 0, K = 0, dims = 0; int i, iter = 0, N = 0, N0 = 0, K = 0, dims = 0;
Mat labels; Mat labels;
try
{ {
RNG& rng = theRNG();
const int MAX_DIM=5; const int MAX_DIM=5;
int MAX_POINTS = 100, maxIter = 100; int MAX_POINTS = 100, maxIter = 100;
for( iter = 0; iter < maxIter; iter++ ) for( iter = 0; iter < maxIter; iter++ )
{ {
ts->update_context(this, iter, true); ts->update_context(this, iter, true);
dims = rng.uniform(inVariant == MAT_1_N_CDIM ? 2 : 1, MAX_DIM+1); dims = rng.uniform(inVariant == MAT_1_N_CDIM ? 2 : 1, MAX_DIM+1);
N = rng.uniform(1, MAX_POINTS+1); N = rng.uniform(2, MAX_POINTS+1);
N0 = rng.uniform(1, MAX(N/10, 2)); N0 = rng.uniform(1, MAX(N/10, 2));
K = rng.uniform(1, N+1); K = rng.uniform(1, N+1);
Mat centers;
if (inVariant == VECTOR) if (inVariant == VECTOR)
{ {
dims = 2; dims = 2;
@ -2775,7 +2777,7 @@ protected:
data[i] = data0[rng.uniform(0, N0)]; data[i] = data0[rng.uniform(0, N0)];
kmeans(data, K, labels, TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS, 30, 0), kmeans(data, K, labels, TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS, 30, 0),
5, KMEANS_PP_CENTERS); 5, KMEANS_PP_CENTERS, centers);
} }
else else
{ {
@ -2820,27 +2822,23 @@ protected:
} }
kmeans(data, K, labels, TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS, 30, 0), kmeans(data, K, labels, TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS, 30, 0),
5, KMEANS_PP_CENTERS); 5, KMEANS_PP_CENTERS, centers);
} }
ASSERT_EQ(centers.rows, K);
ASSERT_EQ(labels.rows, N);
Mat hist(K, 1, CV_32S, Scalar(0)); Mat hist(K, 1, CV_32S, Scalar(0));
for( i = 0; i < N; i++ ) for( i = 0; i < N; i++ )
{ {
int l = labels.at<int>(i); int l = labels.at<int>(i);
CV_Assert(0 <= l && l < K); ASSERT_GE(l, 0);
ASSERT_LT(l, K);
hist.at<int>(l)++; hist.at<int>(l)++;
} }
for( i = 0; i < K; i++ ) for( i = 0; i < K; i++ )
CV_Assert( hist.at<int>(i) != 0 ); ASSERT_GT(hist.at<int>(i), 0);
}
} }
catch(...)
{
ts->printf(cvtest::TS::LOG,
"context: iteration=%d, N=%d, N0=%d, K=%d\n",
iter, N, N0, K);
std::cout << labels << std::endl;
ts->set_failed_test_info(cvtest::TS::FAIL_MISMATCH);
} }
} }
}; };
@ -2859,6 +2857,35 @@ TEST_P(Core_KMeans_InputVariants, singular)
INSTANTIATE_TEST_CASE_P(AllVariants, Core_KMeans_InputVariants, KMeansInputVariant::all()); INSTANTIATE_TEST_CASE_P(AllVariants, Core_KMeans_InputVariants, KMeansInputVariant::all());
TEST(Core_KMeans, compactness)
{
const int N = 1024;
const int attempts = 4;
const TermCriteria crit = TermCriteria(TermCriteria::COUNT, 5, 0); // low number of iterations
cvtest::TS& ts = *cvtest::TS::ptr();
for (int K = 1; K <= N; K *= 2)
{
Mat data(N, 1, CV_32FC2);
cvtest::randUni(ts.get_rng(), data, Scalar(-200, -200), Scalar(200, 200));
Mat labels, centers;
double compactness = kmeans(data, K, labels, crit, attempts, KMEANS_PP_CENTERS, centers);
centers = centers.reshape(2);
EXPECT_EQ(labels.rows, N);
EXPECT_EQ(centers.rows, K);
EXPECT_GE(compactness, 0.0);
double expected = 0.0;
for (int i = 0; i < N; ++i)
{
int l = labels.at<int>(i);
Point2f d = data.at<Point2f>(i) - centers.at<Point2f>(l);
expected += d.x * d.x + d.y * d.y;
}
EXPECT_NEAR(expected, compactness, expected * 1e-8);
if (K == N)
EXPECT_DOUBLE_EQ(compactness, 0.0);
}
}
TEST(CovariationMatrixVectorOfMat, accuracy) TEST(CovariationMatrixVectorOfMat, accuracy)
{ {
unsigned int col_problem_size = 8, row_problem_size = 8, vector_size = 16; unsigned int col_problem_size = 8, row_problem_size = 8, vector_size = 16;

@ -53,7 +53,7 @@ int main( int /*argc*/, char** /*argv*/ )
randShuffle(points, 1, &rng); randShuffle(points, 1, &rng);
kmeans(points, clusterCount, labels, double compactness = kmeans(points, clusterCount, labels,
TermCriteria( TermCriteria::EPS+TermCriteria::COUNT, 10, 1.0), TermCriteria( TermCriteria::EPS+TermCriteria::COUNT, 10, 1.0),
3, KMEANS_PP_CENTERS, centers); 3, KMEANS_PP_CENTERS, centers);
@ -65,6 +65,12 @@ int main( int /*argc*/, char** /*argv*/ )
Point ipt = points.at<Point2f>(i); Point ipt = points.at<Point2f>(i);
circle( img, ipt, 2, colorTab[clusterIdx], FILLED, LINE_AA ); circle( img, ipt, 2, colorTab[clusterIdx], FILLED, LINE_AA );
} }
for (i = 0; i < centers.rows; ++i)
{
Point2f c = centers.at<Point2f>(i);
circle( img, c, 40, colorTab[i], 1, LINE_AA );
}
cout << "Compactness: " << compactness << endl;
imshow("clusters", img); imshow("clusters", img);

Loading…
Cancel
Save