diff --git a/modules/flann/include/opencv2/flann.hpp b/modules/flann/include/opencv2/flann.hpp index 674e6583c5..e8ee91a3ec 100644 --- a/modules/flann/include/opencv2/flann.hpp +++ b/modules/flann/include/opencv2/flann.hpp @@ -191,8 +191,28 @@ public: KDTreeIndexParams( int trees = 4 ); }; @endcode + - **HierarchicalClusteringIndexParams** When passing an object of this type the index constructed + will be a hierarchical tree of clusters, dividing each set of points into n clusters whose centers + are picked among the points without further refinement of their position. + This algorithm fits both floating, integer and binary vectors. : + @code + struct HierarchicalClusteringIndexParams : public IndexParams + { + HierarchicalClusteringIndexParams( + int branching = 32, + flann_centers_init_t centers_init = CENTERS_RANDOM, + int trees = 4, + int leaf_size = 100); + + }; + @endcode - **KMeansIndexParams** When passing an object of this type the index constructed will be a - hierarchical k-means tree. : + hierarchical k-means tree (one tree by default), dividing each set of points into n clusters + whose barycenters are refined iteratively. + Note that this algorithm has been extended to the support of binary vectors as an alternative + to LSH when knn search speed is the criterium. It will also outperform LSH when processing + directly (i.e. without the use of MCA/PCA) datasets whose points share mostly the same values + for most of the dimensions. It is recommended to set more than one tree with binary data. : @code struct KMeansIndexParams : public IndexParams { @@ -201,6 +221,13 @@ public: int iterations = 11, flann_centers_init_t centers_init = CENTERS_RANDOM, float cb_index = 0.2 ); + + KMeansIndexParams( + int branching, + int iterations, + flann_centers_init_t centers_init, + float cb_index, + int trees ); }; @endcode - **CompositeIndexParams** When using a parameters object of this type the index created @@ -219,7 +246,8 @@ public: - **LshIndexParams** When using a parameters object of this type the index created uses multi-probe LSH (by Multi-Probe LSH: Efficient Indexing for High-Dimensional Similarity Search by Qin Lv, William Josephson, Zhe Wang, Moses Charikar, Kai Li., Proceedings of the 33rd - International Conference on Very Large Data Bases (VLDB). Vienna, Austria. September 2007) : + International Conference on Very Large Data Bases (VLDB). Vienna, Austria. September 2007). + This algorithm is designed for binary vectors. : @code struct LshIndexParams : public IndexParams { diff --git a/modules/flann/include/opencv2/flann/kmeans_index.h b/modules/flann/include/opencv2/flann/kmeans_index.h index a50e0cdf8d..a823986e09 100644 --- a/modules/flann/include/opencv2/flann/kmeans_index.h +++ b/modules/flann/include/opencv2/flann/kmeans_index.h @@ -57,8 +57,8 @@ namespace cvflann struct KMeansIndexParams : public IndexParams { - KMeansIndexParams(int branching = 32, int iterations = 11, - flann_centers_init_t centers_init = FLANN_CENTERS_RANDOM, float cb_index = 0.2 ) + void indexParams(int branching, int iterations, + flann_centers_init_t centers_init, float cb_index, int trees) { (*this)["algorithm"] = FLANN_INDEX_KMEANS; // branching factor @@ -69,6 +69,20 @@ struct KMeansIndexParams : public IndexParams (*this)["centers_init"] = centers_init; // cluster boundary index. Used when searching the kmeans tree (*this)["cb_index"] = cb_index; + // number of kmeans trees to search in + (*this)["trees"] = trees; + } + + KMeansIndexParams(int branching = 32, int iterations = 11, + flann_centers_init_t centers_init = FLANN_CENTERS_RANDOM, float cb_index = 0.2 ) + { + indexParams(branching, iterations, centers_init, cb_index, 1); + } + + KMeansIndexParams(int branching, int iterations, + flann_centers_init_t centers_init, float cb_index, int trees) + { + indexParams(branching, iterations, centers_init, cb_index, trees); } }; @@ -347,6 +361,7 @@ public: veclen_ = dataset_.cols; branching_ = get_param(params,"branching",32); + trees_ = get_param(params,"trees",1); iterations_ = get_param(params,"iterations",11); if (iterations_<0) { iterations_ = (std::numeric_limits::max)(); @@ -367,6 +382,13 @@ public: } cb_index_ = 0.4f; + root_ = new KMeansNodePtr[trees_]; + indices_ = new int*[trees_]; + + for (int i=0; i(); - std::memset(root_, 0, sizeof(KMeansNode)); + for (int i=0; i(); + std::memset(root_[i], 0, sizeof(KMeansNode)); - if(is_kdtree_distance::val || is_vector_space_distance::val) - { - computeNodeStatistics(root_, indices_, (unsigned int)size_); - computeClustering(root_, indices_, (int)size_, branching_,0); - } - else - { - computeBitfieldNodeStatistics(root_, indices_, (unsigned int)size_); - computeBitfieldClustering(root_, indices_, (int)size_, branching_,0); + if(is_kdtree_distance::val || is_vector_space_distance::val) { + computeNodeStatistics(root_[i], indices_[i], (unsigned int)size_); + computeClustering(root_[i], indices_[i], (int)size_, branching_,0); + } + else { + computeBitfieldNodeStatistics(root_[i], indices_[i], (unsigned int)size_); + computeBitfieldClustering(root_[i], indices_[i], (int)size_, branching_,0); + } } } @@ -456,35 +481,43 @@ public: save_value(stream, iterations_); save_value(stream, memoryCounter_); save_value(stream, cb_index_); - save_value(stream, *indices_, (int)size_); - - save_tree(stream, root_); + save_value(stream, trees_); + for (int i=0; i& result, const ElementType* vec, const SearchParams& searchParams) CV_OVERRIDE { - int maxChecks = get_param(searchParams,"checks",32); + const int maxChecks = get_param(searchParams,"checks",32); if (maxChecks==FLANN_CHECKS_UNLIMITED) { - findExactNN(root_, result, vec); + findExactNN(root_[0], result, vec); } else { // Priority queue storing intermediate branches in the best-bin-first search Heap* heap = new Heap((int)size_); int checks = 0; - findNN(root_, result, vec, checks, maxChecks, heap); + for (int i=0; i= maxChecks) && result.full()) + break; + } BranchSt branch; while (heap->popMin(branch) && (checkspivot), (int)veclen_); if (node->childs==NULL) { - int indices_offset = (int)(node->indices - indices_); + int indices_offset = (int)(node->indices - indices_[num]); save_value(stream, indices_offset); } else { for(int i=0; ichilds[i]); + save_tree(stream, node->childs[i], num); } } } - void load_tree(FILE* stream, KMeansNodePtr& node) + void load_tree(FILE* stream, KMeansNodePtr& node, int num) { node = pool_.allocate(); load_value(stream, *node); @@ -636,12 +672,12 @@ private: if (node->childs==NULL) { int indices_offset; load_value(stream, indices_offset); - node->indices = indices_ + indices_offset; + node->indices = indices_[num] + indices_offset; } else { node->childs = pool_.allocate(branching_); for(int i=0; ichilds[i]); + load_tree(stream, node->childs[i], num); } } } @@ -660,6 +696,32 @@ private: } } + void free_centers() + { + if (root_ != NULL) { + for(int i=0; ichilds==NULL) { - if (checks>=maxChecks) { - if (result.full()) return; + if ((checks>=maxChecks) && result.full()) { + return; } checks += node->size; for (int i=0; isize; ++i) { @@ -1397,6 +1497,9 @@ private: /** The branching factor used in the hierarchical k-means clustering */ int branching_; + /** Number of kmeans trees (default is one) */ + int trees_; + /** Maximum number of iterations to use when performing k-means clustering */ int iterations_; @@ -1432,12 +1535,12 @@ private: /** * The root node in the tree. */ - KMeansNodePtr root_; + KMeansNodePtr* root_; /** * Array of indices to vectors in the dataset. */ - int* indices_; + int** indices_; /** * The distance