From 45e0e5f8e947a9c1b4a995477c43a006ec2df43f Mon Sep 17 00:00:00 2001
From: Pierre-Emmanuel Viel
Date: Tue, 17 Dec 2013 12:51:58 +0100
Subject: [PATCH 1/4] Pick centers in KMeans++ with a probability proportional
to their distance^2, instead of simple distance, to previous centers
---
.../opencv2/flann/hierarchical_clustering_index.h | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/modules/flann/include/opencv2/flann/hierarchical_clustering_index.h b/modules/flann/include/opencv2/flann/hierarchical_clustering_index.h
index ce2d622450..02fc278448 100644
--- a/modules/flann/include/opencv2/flann/hierarchical_clustering_index.h
+++ b/modules/flann/include/opencv2/flann/hierarchical_clustering_index.h
@@ -210,8 +210,11 @@ private:
assert(index >=0 && index < n);
centers[0] = dsindices[index];
+ // Computing distance^2 will have the advantage of even higher probability further to pick new centers
+ // far from previous centers (and this complies to "k-means++: the advantages of careful seeding" article)
for (int i = 0; i < n; i++) {
closestDistSq[i] = distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols);
+ closestDistSq[i] *= closestDistSq[i];
currentPot += closestDistSq[i];
}
@@ -237,7 +240,10 @@ private:
// Compute the new potential
double newPot = 0;
- for (int i = 0; i < n; i++) newPot += std::min( distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols), closestDistSq[i] );
+ for (int i = 0; i < n; i++) {
+ DistanceType dist = distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols);
+ newPot += std::min( dist*dist, closestDistSq[i] );
+ }
// Store the best result
if ((bestNewPot < 0)||(newPot < bestNewPot)) {
@@ -249,7 +255,10 @@ private:
// Add the appropriate center
centers[centerCount] = dsindices[bestNewIndex];
currentPot = bestNewPot;
- for (int i = 0; i < n; i++) closestDistSq[i] = std::min( distance(dataset[dsindices[i]], dataset[dsindices[bestNewIndex]], dataset.cols), closestDistSq[i] );
+ for (int i = 0; i < n; i++) {
+ DistanceType dist = distance(dataset[dsindices[i]], dataset[dsindices[bestNewIndex]], dataset.cols);
+ closestDistSq[i] = std::min( dist*dist, closestDistSq[i] );
+ }
}
centers_length = centerCount;
From 5aeeaa6fce4016fd626f31f56025cf83ff07576a Mon Sep 17 00:00:00 2001
From: Pierre-Emmanuel Viel
Date: Tue, 17 Dec 2013 13:04:49 +0100
Subject: [PATCH 2/4] Apply to KMeansIndex KMeanspp the same modification as in
HierarchicalClusteringIndex
---
modules/flann/include/opencv2/flann/kmeans_index.h | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/modules/flann/include/opencv2/flann/kmeans_index.h b/modules/flann/include/opencv2/flann/kmeans_index.h
index 3fea956a74..3bf12047cd 100644
--- a/modules/flann/include/opencv2/flann/kmeans_index.h
+++ b/modules/flann/include/opencv2/flann/kmeans_index.h
@@ -211,6 +211,7 @@ public:
for (int i = 0; i < n; i++) {
closestDistSq[i] = distance_(dataset_[indices[i]], dataset_[indices[index]], dataset_.cols);
+ closestDistSq[i] *= closestDistSq[i];
currentPot += closestDistSq[i];
}
@@ -236,7 +237,10 @@ public:
// Compute the new potential
double newPot = 0;
- for (int i = 0; i < n; i++) newPot += std::min( distance_(dataset_[indices[i]], dataset_[indices[index]], dataset_.cols), closestDistSq[i] );
+ for (int i = 0; i < n; i++) {
+ DistanceType dist = distance_(dataset_[indices[i]], dataset_[indices[index]], dataset_.cols);
+ newPot += std::min( dist*dist, closestDistSq[i] );
+ }
// Store the best result
if ((bestNewPot < 0)||(newPot < bestNewPot)) {
@@ -248,7 +252,10 @@ public:
// Add the appropriate center
centers[centerCount] = indices[bestNewIndex];
currentPot = bestNewPot;
- for (int i = 0; i < n; i++) closestDistSq[i] = std::min( distance_(dataset_[indices[i]], dataset_[indices[bestNewIndex]], dataset_.cols), closestDistSq[i] );
+ for (int i = 0; i < n; i++) {
+ DistanceType dist = distance_(dataset_[indices[i]], dataset_[indices[bestNewIndex]], dataset_.cols);
+ closestDistSq[i] = std::min( dist*dist, closestDistSq[i] );
+ }
}
centers_length = centerCount;
From fa749de0dcb27d3b666eda341b43e8c13f66be8e Mon Sep 17 00:00:00 2001
From: Pierre-Emmanuel Viel
Date: Tue, 17 Dec 2013 13:26:55 +0100
Subject: [PATCH 3/4] As some processed distances are already ^2, use template
to select whether or not we have to ^2 in KMeanspp
---
.../include/opencv2/flann/kmeans_index.h | 62 ++++++++++++++++++-
1 file changed, 59 insertions(+), 3 deletions(-)
diff --git a/modules/flann/include/opencv2/flann/kmeans_index.h b/modules/flann/include/opencv2/flann/kmeans_index.h
index 3bf12047cd..460dc64be9 100644
--- a/modules/flann/include/opencv2/flann/kmeans_index.h
+++ b/modules/flann/include/opencv2/flann/kmeans_index.h
@@ -53,6 +53,62 @@
namespace cvflann
{
+template
+struct squareDistance
+{
+ typedef typename Distance::ResultType ResultType;
+ ResultType operator()( ResultType dist ) { return dist*dist; }
+};
+
+
+template
+struct squareDistance, ElementType>
+{
+ typedef typename L2_Simple::ResultType ResultType;
+ ResultType operator()( ResultType dist ) { return dist; }
+};
+
+template
+struct squareDistance, ElementType>
+{
+ typedef typename L2::ResultType ResultType;
+ ResultType operator()( ResultType dist ) { return dist; }
+};
+
+
+template
+struct squareDistance, ElementType>
+{
+ typedef typename MinkowskiDistance::ResultType ResultType;
+ ResultType operator()( ResultType dist ) { return dist; }
+};
+
+template
+struct squareDistance, ElementType>
+{
+ typedef typename HellingerDistance::ResultType ResultType;
+ ResultType operator()( ResultType dist ) { return dist; }
+};
+
+template
+struct squareDistance, ElementType>
+{
+ typedef typename ChiSquareDistance::ResultType ResultType;
+ ResultType operator()( ResultType dist ) { return dist; }
+};
+
+
+template
+typename Distance::ResultType ensureSquareDistance( typename Distance::ResultType dist )
+{
+ typedef typename Distance::ElementType ElementType;
+
+ squareDistance dummy;
+ return dummy( dist );
+}
+
+
+
struct KMeansIndexParams : public IndexParams
{
KMeansIndexParams(int branching = 32, int iterations = 11,
@@ -211,7 +267,7 @@ public:
for (int i = 0; i < n; i++) {
closestDistSq[i] = distance_(dataset_[indices[i]], dataset_[indices[index]], dataset_.cols);
- closestDistSq[i] *= closestDistSq[i];
+ closestDistSq[i] = ensureSquareDistance( closestDistSq[i] );
currentPot += closestDistSq[i];
}
@@ -239,7 +295,7 @@ public:
double newPot = 0;
for (int i = 0; i < n; i++) {
DistanceType dist = distance_(dataset_[indices[i]], dataset_[indices[index]], dataset_.cols);
- newPot += std::min( dist*dist, closestDistSq[i] );
+ newPot += std::min( ensureSquareDistance(dist), closestDistSq[i] );
}
// Store the best result
@@ -254,7 +310,7 @@ public:
currentPot = bestNewPot;
for (int i = 0; i < n; i++) {
DistanceType dist = distance_(dataset_[indices[i]], dataset_[indices[bestNewIndex]], dataset_.cols);
- closestDistSq[i] = std::min( dist*dist, closestDistSq[i] );
+ closestDistSq[i] = std::min( ensureSquareDistance(dist), closestDistSq[i] );
}
}
From 0d19685f9544ddf2668fa899ce74580fd9d1039f Mon Sep 17 00:00:00 2001
From: Pierre-Emmanuel Viel
Date: Tue, 17 Dec 2013 13:34:20 +0100
Subject: [PATCH 4/4] Move templates in dist.h in order to share them between
KMeansIndex and HierarchicalClusteringIndex classes.
---
modules/flann/include/opencv2/flann/dist.h | 60 +++++++++++++++++++
.../flann/hierarchical_clustering_index.h | 6 +-
.../include/opencv2/flann/kmeans_index.h | 56 -----------------
3 files changed, 63 insertions(+), 59 deletions(-)
diff --git a/modules/flann/include/opencv2/flann/dist.h b/modules/flann/include/opencv2/flann/dist.h
index 80ae2dc916..2afceb8893 100644
--- a/modules/flann/include/opencv2/flann/dist.h
+++ b/modules/flann/include/opencv2/flann/dist.h
@@ -812,6 +812,66 @@ struct ZeroIterator
};
+
+/*
+ * Depending on processed distances, some of them are already squared (e.g. L2)
+ * and some are not (e.g.Hamming). In KMeans++ for instance we want to be sure
+ * we are working on ^2 distances, thus following templates to ensure that.
+ */
+template
+struct squareDistance
+{
+ typedef typename Distance::ResultType ResultType;
+ ResultType operator()( ResultType dist ) { return dist*dist; }
+};
+
+
+template
+struct squareDistance, ElementType>
+{
+ typedef typename L2_Simple::ResultType ResultType;
+ ResultType operator()( ResultType dist ) { return dist; }
+};
+
+template
+struct squareDistance, ElementType>
+{
+ typedef typename L2::ResultType ResultType;
+ ResultType operator()( ResultType dist ) { return dist; }
+};
+
+
+template
+struct squareDistance, ElementType>
+{
+ typedef typename MinkowskiDistance::ResultType ResultType;
+ ResultType operator()( ResultType dist ) { return dist; }
+};
+
+template
+struct squareDistance, ElementType>
+{
+ typedef typename HellingerDistance::ResultType ResultType;
+ ResultType operator()( ResultType dist ) { return dist; }
+};
+
+template
+struct squareDistance, ElementType>
+{
+ typedef typename ChiSquareDistance::ResultType ResultType;
+ ResultType operator()( ResultType dist ) { return dist; }
+};
+
+
+template
+typename Distance::ResultType ensureSquareDistance( typename Distance::ResultType dist )
+{
+ typedef typename Distance::ElementType ElementType;
+
+ squareDistance dummy;
+ return dummy( dist );
+}
+
}
#endif //OPENCV_FLANN_DIST_H_
diff --git a/modules/flann/include/opencv2/flann/hierarchical_clustering_index.h b/modules/flann/include/opencv2/flann/hierarchical_clustering_index.h
index 02fc278448..3ccfa5534b 100644
--- a/modules/flann/include/opencv2/flann/hierarchical_clustering_index.h
+++ b/modules/flann/include/opencv2/flann/hierarchical_clustering_index.h
@@ -214,7 +214,7 @@ private:
// far from previous centers (and this complies to "k-means++: the advantages of careful seeding" article)
for (int i = 0; i < n; i++) {
closestDistSq[i] = distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols);
- closestDistSq[i] *= closestDistSq[i];
+ closestDistSq[i] = ensureSquareDistance( closestDistSq[i] );
currentPot += closestDistSq[i];
}
@@ -242,7 +242,7 @@ private:
double newPot = 0;
for (int i = 0; i < n; i++) {
DistanceType dist = distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols);
- newPot += std::min( dist*dist, closestDistSq[i] );
+ newPot += std::min( ensureSquareDistance(dist), closestDistSq[i] );
}
// Store the best result
@@ -257,7 +257,7 @@ private:
currentPot = bestNewPot;
for (int i = 0; i < n; i++) {
DistanceType dist = distance(dataset[dsindices[i]], dataset[dsindices[bestNewIndex]], dataset.cols);
- closestDistSq[i] = std::min( dist*dist, closestDistSq[i] );
+ closestDistSq[i] = std::min( ensureSquareDistance(dist), closestDistSq[i] );
}
}
diff --git a/modules/flann/include/opencv2/flann/kmeans_index.h b/modules/flann/include/opencv2/flann/kmeans_index.h
index 460dc64be9..3cbee24404 100644
--- a/modules/flann/include/opencv2/flann/kmeans_index.h
+++ b/modules/flann/include/opencv2/flann/kmeans_index.h
@@ -53,62 +53,6 @@
namespace cvflann
{
-template
-struct squareDistance
-{
- typedef typename Distance::ResultType ResultType;
- ResultType operator()( ResultType dist ) { return dist*dist; }
-};
-
-
-template
-struct squareDistance, ElementType>
-{
- typedef typename L2_Simple::ResultType ResultType;
- ResultType operator()( ResultType dist ) { return dist; }
-};
-
-template
-struct squareDistance, ElementType>
-{
- typedef typename L2::ResultType ResultType;
- ResultType operator()( ResultType dist ) { return dist; }
-};
-
-
-template
-struct squareDistance, ElementType>
-{
- typedef typename MinkowskiDistance::ResultType ResultType;
- ResultType operator()( ResultType dist ) { return dist; }
-};
-
-template
-struct squareDistance, ElementType>
-{
- typedef typename HellingerDistance::ResultType ResultType;
- ResultType operator()( ResultType dist ) { return dist; }
-};
-
-template
-struct squareDistance, ElementType>
-{
- typedef typename ChiSquareDistance::ResultType ResultType;
- ResultType operator()( ResultType dist ) { return dist; }
-};
-
-
-template
-typename Distance::ResultType ensureSquareDistance( typename Distance::ResultType dist )
-{
- typedef typename Distance::ElementType ElementType;
-
- squareDistance dummy;
- return dummy( dist );
-}
-
-
-
struct KMeansIndexParams : public IndexParams
{
KMeansIndexParams(int branching = 32, int iterations = 11,