|
|
|
@ -69,6 +69,7 @@ struct KMeansIndexParams : public IndexParams |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Hierarchical kmeans index |
|
|
|
|
* |
|
|
|
@ -275,7 +276,7 @@ public: |
|
|
|
|
public: |
|
|
|
|
KMeansDistanceComputer(Distance _distance, const Matrix<ElementType>& _dataset, |
|
|
|
|
const int _branching, const int* _indices, const Matrix<double>& _dcenters, const int _veclen, |
|
|
|
|
int* _count, int* _belongs_to, std::vector<DistanceType>& _radiuses, bool* _updated) |
|
|
|
|
int* _count, int* _belongs_to, std::vector<DistanceType>& _radiuses, bool& _converged, cv::Mutex& _mtx) |
|
|
|
|
: distance(_distance) |
|
|
|
|
, dataset(_dataset) |
|
|
|
|
, branching(_branching) |
|
|
|
@ -285,7 +286,8 @@ public: |
|
|
|
|
, count(_count) |
|
|
|
|
, belongs_to(_belongs_to) |
|
|
|
|
, radiuses(_radiuses) |
|
|
|
|
, updated(_updated) |
|
|
|
|
, converged(_converged) |
|
|
|
|
, mtx(_mtx) |
|
|
|
|
{ |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -312,9 +314,9 @@ public: |
|
|
|
|
count[belongs_to[i]]--; |
|
|
|
|
count[new_centroid]++; |
|
|
|
|
belongs_to[i] = new_centroid; |
|
|
|
|
updated[i] = true; |
|
|
|
|
} else { |
|
|
|
|
updated[i] = false; |
|
|
|
|
mtx.lock(); |
|
|
|
|
converged = false; |
|
|
|
|
mtx.unlock(); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
@ -322,14 +324,15 @@ public: |
|
|
|
|
private: |
|
|
|
|
Distance distance; |
|
|
|
|
const Matrix<ElementType>& dataset; |
|
|
|
|
const int branching; |
|
|
|
|
int branching; |
|
|
|
|
const int* indices; |
|
|
|
|
const Matrix<double>& dcenters; |
|
|
|
|
int veclen; |
|
|
|
|
int* count; |
|
|
|
|
int* belongs_to; |
|
|
|
|
std::vector<DistanceType>& radiuses; |
|
|
|
|
bool* updated; |
|
|
|
|
bool& converged; |
|
|
|
|
cv::Mutex& mtx; |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
@ -719,7 +722,8 @@ private: |
|
|
|
|
return; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
int* centers_idx = new int[branching]; |
|
|
|
|
cv::AutoBuffer<int> centers_idx_buf(branching); |
|
|
|
|
int* centers_idx = (int*)centers_idx_buf; |
|
|
|
|
int centers_length; |
|
|
|
|
(this->*chooseCenters)(branching, indices, indices_length, centers_idx, centers_length); |
|
|
|
|
|
|
|
|
@ -727,29 +731,30 @@ private: |
|
|
|
|
node->indices = indices; |
|
|
|
|
std::sort(node->indices,node->indices+indices_length); |
|
|
|
|
node->childs = NULL; |
|
|
|
|
delete [] centers_idx; |
|
|
|
|
return; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Matrix<double> dcenters(new double[branching*veclen_],branching,veclen_); |
|
|
|
|
cv::AutoBuffer<double> dcenters_buf(branching*veclen_); |
|
|
|
|
Matrix<double> dcenters((double*)dcenters_buf,branching,veclen_); |
|
|
|
|
for (int i=0; i<centers_length; ++i) { |
|
|
|
|
ElementType* vec = dataset_[centers_idx[i]]; |
|
|
|
|
for (size_t k=0; k<veclen_; ++k) { |
|
|
|
|
dcenters[i][k] = double(vec[k]); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
delete[] centers_idx; |
|
|
|
|
|
|
|
|
|
std::vector<DistanceType> radiuses(branching); |
|
|
|
|
int* count = new int[branching]; |
|
|
|
|
cv::AutoBuffer<int> count_buf(branching); |
|
|
|
|
int* count = (int*)count_buf; |
|
|
|
|
for (int i=0; i<branching; ++i) { |
|
|
|
|
radiuses[i] = 0; |
|
|
|
|
count[i] = 0; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// assign points to clusters
|
|
|
|
|
int* belongs_to = new int[indices_length]; |
|
|
|
|
cv::AutoBuffer<int> belongs_to_buf(indices_length); |
|
|
|
|
int* belongs_to = (int*)belongs_to_buf; |
|
|
|
|
for (int i=0; i<indices_length; ++i) { |
|
|
|
|
|
|
|
|
|
DistanceType sq_dist = distance_(dataset_[indices[i]], dcenters[0], veclen_); |
|
|
|
@ -769,7 +774,6 @@ private: |
|
|
|
|
|
|
|
|
|
bool converged = false; |
|
|
|
|
int iteration = 0; |
|
|
|
|
bool* updated = new bool[indices_length]; |
|
|
|
|
while (!converged && iteration<iterations_) { |
|
|
|
|
converged = true; |
|
|
|
|
iteration++; |
|
|
|
@ -794,13 +798,9 @@ private: |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// reassign points to clusters
|
|
|
|
|
parallel_for_(cv::Range(0, indices_length), KMeansDistanceComputer(distance_, dataset_, branching, indices, dcenters, veclen_, count, belongs_to, radiuses, updated)); |
|
|
|
|
for (int i=0; i<indices_length; ++i) { |
|
|
|
|
if (updated[i]) { |
|
|
|
|
converged = false; |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
cv::Mutex mtx; |
|
|
|
|
KMeansDistanceComputer invoker(distance_, dataset_, branching, indices, dcenters, veclen_, count, belongs_to, radiuses, converged, mtx); |
|
|
|
|
parallel_for_(cv::Range(0, (int)indices_length), invoker); |
|
|
|
|
|
|
|
|
|
for (int i=0; i<branching; ++i) { |
|
|
|
|
// if one cluster converges to an empty cluster,
|
|
|
|
@ -871,12 +871,6 @@ private: |
|
|
|
|
computeClustering(node->childs[c],indices+start, end-start, branching, level+1); |
|
|
|
|
start=end; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
delete[] dcenters.data; |
|
|
|
|
delete[] centers; |
|
|
|
|
delete[] count; |
|
|
|
|
delete[] belongs_to; |
|
|
|
|
delete[] updated; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|