diff --git a/modules/flann/include/opencv2/flann/kmeans_index.h b/modules/flann/include/opencv2/flann/kmeans_index.h index f96b0f81fc..6f63629d05 100644 --- a/modules/flann/include/opencv2/flann/kmeans_index.h +++ b/modules/flann/include/opencv2/flann/kmeans_index.h @@ -50,6 +50,9 @@ #include "logger.h" #define BITS_PER_CHAR 8 +#define BITS_PER_BASE 2 // for DNA/RNA sequences +#define BASE_PER_CHAR (BITS_PER_CHAR/BITS_PER_BASE) +#define HISTOS_PER_BASE (1<( + veclen_*sizeof(CentersType)*(HISTOS_PER_BASE*BASE_PER_CHAR)); + + unsigned long long variance = 0ull; + unsigned int* histograms = new unsigned int[histos_veclen]; + memset(histograms, 0, sizeof(unsigned int)*histos_veclen); + + for (unsigned int i=0; i( ensureSquareDistance( + distance_(dataset_[indices[i]], ZeroIterator(), veclen_))); + + unsigned char* vec = (unsigned char*)dataset_[indices[i]]; + for (size_t k=0, l=0; k>2) & 0x03)]++; + histograms[k + 8 + ((vec[l]>>4) & 0x03)]++; + histograms[k +12 + ((vec[l]>>6) & 0x03)]++; + } + } + + CentersType* mean = new CentersType[veclen_]; + memoryCounter_ += int(veclen_*sizeof(CentersType)); + unsigned char* char_mean = (unsigned char*)mean; + unsigned int* h = histograms; + for (size_t k=0, l=0; k h[k+1] ? h[k+2] > h[k+3] ? h[k] > h[k+2] ? 0x00 : 0x10 + : h[k] > h[k+3] ? 0x00 : 0x11 + : h[k+2] > h[k+3] ? h[k+1] > h[k+2] ? 0x01 : 0x10 + : h[k+1] > h[k+3] ? 0x01 : 0x11) + | (h[k+4]>h[k+5] ? h[k+6] > h[k+7] ? h[k+4] > h[k+6] ? 0x00 : 0x1000 + : h[k+4] > h[k+7] ? 0x00 : 0x1100 + : h[k+6] > h[k+7] ? h[k+5] > h[k+6] ? 0x0100 : 0x1000 + : h[k+5] > h[k+7] ? 0x0100 : 0x1100) + | (h[k+8]>h[k+9] ? h[k+10]>h[k+11] ? h[k+8] >h[k+10] ? 0x00 : 0x100000 + : h[k+8] >h[k+11] ? 0x00 : 0x110000 + : h[k+10]>h[k+11] ? h[k+9] >h[k+10] ? 0x010000 : 0x100000 + : h[k+9] >h[k+11] ? 0x010000 : 0x110000) + | (h[k+12]>h[k+13] ? h[k+14]>h[k+15] ? h[k+12] >h[k+14] ? 0x00 : 0x10000000 + : h[k+12] >h[k+15] ? 0x00 : 0x11000000 + : h[k+14]>h[k+15] ? h[k+13] >h[k+14] ? 0x01000000 : 0x10000000 + : h[k+13] >h[k+15] ? 0x01000000 : 0x11000000); + } + variance = static_cast( + 0.5 + static_cast(variance) / static_cast(indices_length)); + variance -= static_cast( + ensureSquareDistance( + distance_(mean, ZeroIterator(), veclen_))); + + DistanceType radius = 0; + for (unsigned int i=0; iradius) { + radius = tmp; + } + } + + node->variance = static_cast(variance); + node->radius = radius; + node->pivot = mean; + + delete[] histograms; + } + + template void computeNodeStatistics(KMeansNodePtr node, int* indices, unsigned int indices_length, @@ -858,6 +928,22 @@ private: computeBitfieldNodeStatistics(node, indices, indices_length); } + void computeNodeStatistics(KMeansNodePtr node, int* indices, + unsigned int indices_length, + const cvflann::DNAmmingLUT* identifier) + { + (void)identifier; + computeDnaNodeStatistics(node, indices, indices_length); + } + + void computeNodeStatistics(KMeansNodePtr node, int* indices, + unsigned int indices_length, + const cvflann::DNAmming2* identifier) + { + (void)identifier; + computeDnaNodeStatistics(node, indices, indices_length); + } + void refineClustering(int* indices, int indices_length, int branching, CentersType** centers, std::vector& radiuses, int* belongs_to, int* count) @@ -1051,6 +1137,112 @@ private: } + void refineDnaClustering(int* indices, int indices_length, int branching, CentersType** centers, + std::vector& radiuses, int* belongs_to, int* count) + { + for (int i=0; i( + veclen_*sizeof(CentersType)*(HISTOS_PER_BASE*BASE_PER_CHAR)); + cv::AutoBuffer histos_buf(branching*histos_veclen); + Matrix histos(histos_buf.data(), branching, histos_veclen); + + bool converged = false; + int iteration = 0; + while (!converged && iteration>2) & 0x03)]++; + h[k + 8 + ((vec[l]>>4) & 0x03)]++; + h[k +12 + ((vec[l]>>6) & 0x03)]++; + } + } + for (int i=0; i h[k+1] ? h[k+2] > h[k+3] ? h[k] > h[k+2] ? 0x00 : 0x10 + : h[k] > h[k+3] ? 0x00 : 0x11 + : h[k+2] > h[k+3] ? h[k+1] > h[k+2] ? 0x01 : 0x10 + : h[k+1] > h[k+3] ? 0x01 : 0x11) + | (h[k+4]>h[k+5] ? h[k+6] > h[k+7] ? h[k+4] > h[k+6] ? 0x00 : 0x1000 + : h[k+4] > h[k+7] ? 0x00 : 0x1100 + : h[k+6] > h[k+7] ? h[k+5] > h[k+6] ? 0x0100 : 0x1000 + : h[k+5] > h[k+7] ? 0x0100 : 0x1100) + | (h[k+8]>h[k+9] ? h[k+10]>h[k+11] ? h[k+8] >h[k+10] ? 0x00 : 0x100000 + : h[k+8] >h[k+11] ? 0x00 : 0x110000 + : h[k+10]>h[k+11] ? h[k+9] >h[k+10] ? 0x010000 : 0x100000 + : h[k+9] >h[k+11] ? 0x010000 : 0x110000) + | (h[k+12]>h[k+13] ? h[k+14]>h[k+15] ? h[k+12] >h[k+14] ? 0x00 : 0x10000000 + : h[k+12] >h[k+15] ? 0x00 : 0x11000000 + : h[k+14]>h[k+15] ? h[k+13] >h[k+14] ? 0x01000000 : 0x10000000 + : h[k+13] >h[k+15] ? 0x01000000 : 0x11000000); + } + } + + std::vector new_centroids(indices_length); + std::vector dists(indices_length); + + // reassign points to clusters + KMeansDistanceComputer invoker( + distance_, dataset_, branching, indices, centers, veclen_, new_centroids, dists); + parallel_for_(cv::Range(0, (int)indices_length), invoker); + + for (int i=0; i < indices_length; ++i) { + DistanceType dist(dists[i]); + int new_centroid(new_centroids[i]); + if (dist > radiuses[new_centroid]) { + radiuses[new_centroid] = dist; + } + if (new_centroid != belongs_to[i]) { + count[belongs_to[i]]--; + count[new_centroid]++; + belongs_to[i] = new_centroid; + converged = false; + } + } + + for (int i=0; i& radiuses, int* belongs_to, int* count) @@ -1150,7 +1342,7 @@ private: /** * The methods responsible with doing the recursive hierarchical clustering on * binary vectors. - * As some might have heared that KMeans on binary data doesn't make sense, + * As some might have heard that KMeans on binary data doesn't make sense, * it's worth a little explanation why it actually fairly works. As * with the Hierarchical Clustering algortihm, we seed several centers for the * current node by picking some of its points. Then in a first pass each point @@ -1233,6 +1425,34 @@ private: } + void refineAndSplitClustering( + KMeansNodePtr node, int* indices, int indices_length, int branching, + int level, CentersType** centers, std::vector& radiuses, + int* belongs_to, int* count, const cvflann::DNAmmingLUT* identifier) + { + (void)identifier; + refineDnaClustering( + indices, indices_length, branching, centers, radiuses, belongs_to, count); + + computeAnyBitfieldSubClustering(node, indices, indices_length, branching, + level, centers, radiuses, belongs_to, count); + } + + + void refineAndSplitClustering( + KMeansNodePtr node, int* indices, int indices_length, int branching, + int level, CentersType** centers, std::vector& radiuses, + int* belongs_to, int* count, const cvflann::DNAmming2* identifier) + { + (void)identifier; + refineDnaClustering( + indices, indices_length, branching, centers, radiuses, belongs_to, count); + + computeAnyBitfieldSubClustering(node, indices, indices_length, branching, + level, centers, radiuses, belongs_to, count); + } + + /** * The method responsible with actually doing the recursive hierarchical * clustering diff --git a/modules/flann/src/miniflann.cpp b/modules/flann/src/miniflann.cpp index 1f698e7871..a1146ec2e7 100644 --- a/modules/flann/src/miniflann.cpp +++ b/modules/flann/src/miniflann.cpp @@ -345,6 +345,7 @@ typedef ::cvflann::Hamming HammingDistance; #else typedef ::cvflann::HammingLUT HammingDistance; #endif +typedef ::cvflann::DNAmming2 DNAmmingDistance; Index::Index() { @@ -397,6 +398,9 @@ void Index::build(InputArray _data, const IndexParams& params, flann_distance_t buildIndex< ::cvflann::L1 >(index, data, params); break; #if MINIFLANN_SUPPORT_EXOTIC_DISTANCE_TYPES + case FLANN_DIST_DNAMMING: + buildIndex< DNAmmingDistance >(index, data, params); + break; case FLANN_DIST_MAX: buildIndex< ::cvflann::MaxDistance >(index, data, params); break; @@ -452,6 +456,9 @@ void Index::release() deleteIndex< ::cvflann::L1 >(index); break; #if MINIFLANN_SUPPORT_EXOTIC_DISTANCE_TYPES + case FLANN_DIST_DNAMMING: + deleteIndex< DNAmmingDistance >(index); + break; case FLANN_DIST_MAX: deleteIndex< ::cvflann::MaxDistance >(index); break; @@ -573,7 +580,8 @@ void Index::knnSearch(InputArray _query, OutputArray _indices, CV_INSTRUMENT_REGION(); Mat query = _query.getMat(), indices, dists; - int dtype = distType == FLANN_DIST_HAMMING ? CV_32S : CV_32F; + int dtype = (distType == FLANN_DIST_HAMMING) + || (distType == FLANN_DIST_DNAMMING) ? CV_32S : CV_32F; createIndicesDists( _indices, _dists, indices, dists, query.rows, knn, knn, dtype ); @@ -589,6 +597,9 @@ void Index::knnSearch(InputArray _query, OutputArray _indices, runKnnSearch< ::cvflann::L1 >(index, query, indices, dists, knn, params); break; #if MINIFLANN_SUPPORT_EXOTIC_DISTANCE_TYPES + case FLANN_DIST_DNAMMING: + runKnnSearch(index, query, indices, dists, knn, params); + break; case FLANN_DIST_MAX: runKnnSearch< ::cvflann::MaxDistance >(index, query, indices, dists, knn, params); break; @@ -617,7 +628,8 @@ int Index::radiusSearch(InputArray _query, OutputArray _indices, CV_INSTRUMENT_REGION(); Mat query = _query.getMat(), indices, dists; - int dtype = distType == FLANN_DIST_HAMMING ? CV_32S : CV_32F; + int dtype = (distType == FLANN_DIST_HAMMING) + || (distType == FLANN_DIST_DNAMMING) ? CV_32S : CV_32F; CV_Assert( maxResults > 0 ); createIndicesDists( _indices, _dists, indices, dists, query.rows, maxResults, INT_MAX, dtype ); @@ -634,6 +646,8 @@ int Index::radiusSearch(InputArray _query, OutputArray _indices, case FLANN_DIST_L1: return runRadiusSearch< ::cvflann::L1 >(index, query, indices, dists, radius, params); #if MINIFLANN_SUPPORT_EXOTIC_DISTANCE_TYPES + case FLANN_DIST_DNAMMING: + return runRadiusSearch< DNAmmingDistance >(index, query, indices, dists, radius, params); case FLANN_DIST_MAX: return runRadiusSearch< ::cvflann::MaxDistance >(index, query, indices, dists, radius, params); case FLANN_DIST_HIST_INTERSECT: @@ -697,6 +711,9 @@ void Index::save(const String& filename) const saveIndex< ::cvflann::L1 >(this, index, fout); break; #if MINIFLANN_SUPPORT_EXOTIC_DISTANCE_TYPES + case FLANN_DIST_DNAMMING: + saveIndex< DNAmmingDistance >(this, index, fout); + break; case FLANN_DIST_MAX: saveIndex< ::cvflann::MaxDistance >(this, index, fout); break; @@ -778,6 +795,7 @@ bool Index::load(InputArray _data, const String& filename) distType = (flann_distance_t)idistType; if( !((distType == FLANN_DIST_HAMMING && featureType == CV_8U) || + (distType == FLANN_DIST_DNAMMING && featureType == CV_8U) || (distType != FLANN_DIST_HAMMING && featureType == CV_32F)) ) { fprintf(stderr, "Reading FLANN index error: unsupported feature type %d for the index type %d\n", featureType, algo); @@ -797,6 +815,9 @@ bool Index::load(InputArray _data, const String& filename) loadIndex< ::cvflann::L1 >(this, index, data, fin); break; #if MINIFLANN_SUPPORT_EXOTIC_DISTANCE_TYPES + case FLANN_DIST_DNAMMING: + loadIndex< DNAmmingDistance >(this, index, data, fin); + break; case FLANN_DIST_MAX: loadIndex< ::cvflann::MaxDistance >(this, index, data, fin); break;