Merge pull request #1759 from ilya-lavrenov:ocl_distanceToCenters

pull/1771/merge
Roman Donchenko 11 years ago committed by OpenCV Buildbot
commit 800d53f76b
  1. 12
      modules/ocl/doc/ml_machine_learning.rst
  2. 2
      modules/ocl/include/opencv2/ocl/ocl.hpp
  3. 41
      modules/ocl/perf/perf_imgproc.cpp
  4. 94
      modules/ocl/src/kmeans.cpp
  5. 101
      modules/ocl/src/opencl/kmeans_kernel.cl
  6. 67
      modules/ocl/test/test_kmeans.cpp

@ -91,11 +91,7 @@ ocl::distanceToCenters
---------------------- ----------------------
For each samples in ``source``, find its closest neighour in ``centers``. For each samples in ``source``, find its closest neighour in ``centers``.
.. ocv:function:: void ocl::distanceToCenters(oclMat &dists, oclMat &labels, const oclMat &src, const oclMat &centers, int distType = NORM_L2SQR, const oclMat &indices = oclMat()) .. ocv:function:: void ocl::distanceToCenters(const oclMat &src, const oclMat &centers, Mat &dists, Mat &labels, int distType = NORM_L2SQR)
:param dists: The output distances calculated from each sample to the best matched center.
:param labels: The output index of best matched center for each row of sample.
:param src: Floating-point matrix of input samples. One row per sample. :param src: Floating-point matrix of input samples. One row per sample.
@ -103,10 +99,8 @@ For each samples in ``source``, find its closest neighour in ``centers``.
:param distType: Distance metric to calculate distances. Supports ``NORM_L1`` and ``NORM_L2SQR``. :param distType: Distance metric to calculate distances. Supports ``NORM_L1`` and ``NORM_L2SQR``.
:param indices: Optional source indices. If not empty: :param dists: The output distances calculated from each sample to the best matched center.
* only the indexed source samples will be processed :param labels: The output index of best matched center for each row of sample.
* outputs, i.e., ``dists`` and ``labels``, have the same size of indices
* outputs are in the same order of indices instead of the order of src
The method is a utility function which maybe used for multiple clustering algorithms such as K-means. The method is a utility function which maybe used for multiple clustering algorithms such as K-means.

@ -879,7 +879,7 @@ namespace cv
// supports NORM_L1 and NORM_L2 distType // supports NORM_L1 and NORM_L2 distType
// if indices is provided, only the indexed rows will be calculated and their results are in the same // if indices is provided, only the indexed rows will be calculated and their results are in the same
// order of indices // order of indices
CV_EXPORTS void distanceToCenters(oclMat &dists, oclMat &labels, const oclMat &src, const oclMat &centers, int distType = NORM_L2SQR, const oclMat &indices = oclMat()); CV_EXPORTS void distanceToCenters(const oclMat &src, const oclMat &centers, Mat &dists, Mat &labels, int distType = NORM_L2SQR);
//!Does k-means procedure on GPU //!Does k-means procedure on GPU
// supports CV_32FC1/CV_32FC2/CV_32FC4 data type // supports CV_32FC1/CV_32FC2/CV_32FC4 data type

@ -872,58 +872,57 @@ PERF_TEST_P(columnSumFixture, columnSum, OCL_TYPICAL_MAT_SIZES)
//////////////////////////////distanceToCenters//////////////////////////////////////////////// //////////////////////////////distanceToCenters////////////////////////////////////////////////
CV_ENUM(DistType, NORM_L1, NORM_L2SQR); CV_ENUM(DistType, NORM_L1, NORM_L2SQR)
typedef tuple<Size, DistType> distanceToCentersParameters; typedef tuple<Size, DistType> distanceToCentersParameters;
typedef TestBaseWithParam<distanceToCentersParameters> distanceToCentersFixture; typedef TestBaseWithParam<distanceToCentersParameters> distanceToCentersFixture;
static void distanceToCentersPerfTest(Mat& src, Mat& centers, Mat& dists, Mat& labels, int distType) static void distanceToCentersPerfTest(Mat& src, Mat& centers, Mat& dists, Mat& labels, int distType)
{ {
Mat batch_dists; Mat batch_dists;
cv::batchDistance(src,centers,batch_dists, CV_32FC1, noArray(), distType); cv::batchDistance(src, centers, batch_dists, CV_32FC1, noArray(), distType);
std::vector<float> dists_v; std::vector<float> dists_v;
std::vector<int> labels_v; std::vector<int> labels_v;
for(int i = 0; i<batch_dists.rows; i++)
for (int i = 0; i < batch_dists.rows; i++)
{ {
Mat r = batch_dists.row(i); Mat r = batch_dists.row(i);
double mVal; double mVal;
Point mLoc; Point mLoc;
minMaxLoc(r, &mVal, NULL, &mLoc, NULL); minMaxLoc(r, &mVal, NULL, &mLoc, NULL);
dists_v.push_back((float)mVal); dists_v.push_back(static_cast<float>(mVal));
labels_v.push_back(mLoc.x); labels_v.push_back(mLoc.x);
} }
Mat temp_dists(dists_v);
Mat temp_labels(labels_v); Mat(dists_v).copyTo(dists);
temp_dists.reshape(1,1).copyTo(dists); Mat(labels_v).copyTo(labels);
temp_labels.reshape(1,1).copyTo(labels);
} }
PERF_TEST_P(distanceToCentersFixture, distanceToCenters, ::testing::Combine(::testing::Values(cv::Size(256,256), cv::Size(512,512)), DistType::all()) ) PERF_TEST_P(distanceToCentersFixture, distanceToCenters, ::testing::Combine(::testing::Values(cv::Size(256,256), cv::Size(512,512)), DistType::all()) )
{ {
Size size = get<0>(GetParam()); Size size = get<0>(GetParam());
int distType = get<1>(GetParam()); int distType = get<1>(GetParam());
Mat src(size, CV_32FC1);
Mat centers(size, CV_32FC1); Mat src(size, CV_32FC1), centers(size, CV_32FC1);
Mat dists(cv::Size(src.rows,1), CV_32FC1); Mat dists(src.rows, 1, CV_32FC1), labels(src.rows, 1, CV_32SC1);
Mat labels(cv::Size(src.rows,1), CV_32SC1);
declare.in(src, centers, WARMUP_RNG).out(dists, labels); declare.in(src, centers, WARMUP_RNG).out(dists, labels);
if (RUN_OCL_IMPL) if (RUN_OCL_IMPL)
{ {
ocl::oclMat ocl_src(src); ocl::oclMat ocl_src(src), ocl_centers(centers);
ocl::oclMat ocl_centers(centers);
ocl::oclMat ocl_dists(dists);
ocl::oclMat ocl_labels(labels);
OCL_TEST_CYCLE() ocl::distanceToCenters(ocl_dists,ocl_labels,ocl_src, ocl_centers, distType); OCL_TEST_CYCLE() ocl::distanceToCenters(ocl_src, ocl_centers, dists, labels, distType);
ocl_dists.download(dists);
ocl_labels.download(labels);
SANITY_CHECK(dists, 1e-6, ERROR_RELATIVE); SANITY_CHECK(dists, 1e-6, ERROR_RELATIVE);
SANITY_CHECK(labels); SANITY_CHECK(labels);
} }
else if (RUN_PLAIN_IMPL) else if (RUN_PLAIN_IMPL)
{ {
TEST_CYCLE() distanceToCentersPerfTest(src,centers,dists,labels,distType); TEST_CYCLE() distanceToCentersPerfTest(src, centers, dists, labels, distType);
SANITY_CHECK(dists, 1e-6, ERROR_RELATIVE); SANITY_CHECK(dists, 1e-6, ERROR_RELATIVE);
SANITY_CHECK(labels); SANITY_CHECK(labels);
} }

@ -160,63 +160,66 @@ static void generateCentersPP(const Mat& _data, Mat& _out_centers,
} }
} }
void cv::ocl::distanceToCenters(oclMat &dists, oclMat &labels, const oclMat &src, const oclMat &centers, int distType, const oclMat &indices) void cv::ocl::distanceToCenters(const oclMat &src, const oclMat &centers, Mat &dists, Mat &labels, int distType)
{ {
CV_Assert(src.cols*src.oclchannels() == centers.cols*centers.oclchannels()); CV_Assert(src.cols * src.channels() == centers.cols * centers.channels());
CV_Assert(src.depth() == CV_32F && centers.depth() == CV_32F); CV_Assert(src.depth() == CV_32F && centers.depth() == CV_32F);
bool is_label_row_major = false;
ensureSizeIsEnough(1, src.rows, CV_32FC1, dists);
if(labels.empty() || (!labels.empty() && labels.rows == src.rows && labels.cols == 1))
{
ensureSizeIsEnough(src.rows, 1, CV_32SC1, labels);
is_label_row_major = true;
}
CV_Assert(distType == NORM_L1 || distType == NORM_L2SQR); CV_Assert(distType == NORM_L1 || distType == NORM_L2SQR);
std::stringstream build_opt_ss; dists.create(src.rows, 1, CV_32FC1);
build_opt_ss labels.create(src.rows, 1, CV_32SC1);
<< (distType == NORM_L1 ? "-D L1_DIST" : "-D L2SQR_DIST")
<< (indices.empty() ? "" : " -D USE_INDEX");
String build_opt = build_opt_ss.str();
const int src_step = (int)(src.oclchannels() * src.step / src.elemSize()); std::stringstream build_opt_ss;
const int centers_step = (int)(centers.oclchannels() * centers.step / centers.elemSize()); build_opt_ss << (distType == NORM_L1 ? "-D L1_DIST" : "-D L2SQR_DIST");
const int colsNumb = centers.cols*centers.oclchannels();
const int label_step = is_label_row_major ? (int)(labels.step / labels.elemSize()) : 1;
String kernelname = "distanceToCenters";
const int number_of_input = indices.empty() ? src.rows : indices.size().area();
const int src_offset = (int)src.offset/src.elemSize(); int src_step = src.step / src.elemSize1();
const int centers_offset = (int)centers.offset/centers.elemSize(); int centers_step = centers.step / centers.elemSize1();
int feature_width = centers.cols * centers.oclchannels();
int src_offset = src.offset / src.elemSize1();
int centers_offset = centers.offset / centers.elemSize1();
size_t globalThreads[3] = {number_of_input, 1, 1}; int all_dist_count = src.rows * centers.rows;
oclMat all_dist(1, all_dist_count, CV_32FC1);
vector<pair<size_t, const void *> > args; vector<pair<size_t, const void *> > args;
args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data)); args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data));
args.push_back(make_pair(sizeof(cl_mem), (void *)&centers.data)); args.push_back(make_pair(sizeof(cl_mem), (void *)&centers.data));
if(!indices.empty()) args.push_back(make_pair(sizeof(cl_mem), (void *)&all_dist.data));
{
args.push_back(make_pair(sizeof(cl_mem), (void *)&indices.data)); args.push_back(make_pair(sizeof(cl_int), (void *)&feature_width));
}
args.push_back(make_pair(sizeof(cl_mem), (void *)&labels.data));
args.push_back(make_pair(sizeof(cl_mem), (void *)&dists.data));
args.push_back(make_pair(sizeof(cl_int), (void *)&colsNumb));
args.push_back(make_pair(sizeof(cl_int), (void *)&src_step)); args.push_back(make_pair(sizeof(cl_int), (void *)&src_step));
args.push_back(make_pair(sizeof(cl_int), (void *)&centers_step)); args.push_back(make_pair(sizeof(cl_int), (void *)&centers_step));
args.push_back(make_pair(sizeof(cl_int), (void *)&label_step)); args.push_back(make_pair(sizeof(cl_int), (void *)&src.rows));
args.push_back(make_pair(sizeof(cl_int), (void *)&number_of_input));
args.push_back(make_pair(sizeof(cl_int), (void *)&centers.rows)); args.push_back(make_pair(sizeof(cl_int), (void *)&centers.rows));
args.push_back(make_pair(sizeof(cl_int), (void *)&src_offset)); args.push_back(make_pair(sizeof(cl_int), (void *)&src_offset));
args.push_back(make_pair(sizeof(cl_int), (void *)&centers_offset)); args.push_back(make_pair(sizeof(cl_int), (void *)&centers_offset));
size_t globalThreads[3] = { all_dist_count, 1, 1 };
openCLExecuteKernel(Context::getContext(), &kmeans_kernel, openCLExecuteKernel(Context::getContext(), &kmeans_kernel,
kernelname, globalThreads, NULL, args, -1, -1, build_opt.c_str()); "distanceToCenters", globalThreads, NULL, args, -1, -1, build_opt_ss.str().c_str());
Mat all_dist_cpu;
all_dist.download(all_dist_cpu);
for (int i = 0; i < src.rows; ++i)
{
Point p;
double minVal;
Rect roi(i * centers.rows, 0, centers.rows, 1);
Mat hdr(all_dist_cpu, roi);
cv::minMaxLoc(hdr, &minVal, NULL, &p);
dists.at<float>(i, 0) = static_cast<float>(minVal);
labels.at<int>(i, 0) = p.x;
}
} }
///////////////////////////////////k - means ///////////////////////////////////////////////////////// ///////////////////////////////////k - means /////////////////////////////////////////////////////////
double cv::ocl::kmeans(const oclMat &_src, int K, oclMat &_bestLabels, double cv::ocl::kmeans(const oclMat &_src, int K, oclMat &_bestLabels,
TermCriteria criteria, int attempts, int flags, oclMat &_centers) TermCriteria criteria, int attempts, int flags, oclMat &_centers)
{ {
@ -429,28 +432,19 @@ double cv::ocl::kmeans(const oclMat &_src, int K, oclMat &_bestLabels,
break; break;
// assign labels // assign labels
oclMat _dists(1, N, CV_64F); Mat dists(1, N, CV_64F);
_bestLabels.upload(_labels);
_centers.upload(centers); _centers.upload(centers);
distanceToCenters(_src, _centers, dists, _labels);
_bestLabels.upload(_labels);
distanceToCenters(_dists, _bestLabels, _src, _centers);
Mat dists;
_dists.download(dists);
_bestLabels.download(_labels);
float* dist = dists.ptr<float>(0); float* dist = dists.ptr<float>(0);
compactness = 0; compactness = 0;
for( i = 0; i < N; i++ ) for( i = 0; i < N; i++ )
{ compactness += (double)dist[i];
compactness += (double)dist[i];
}
} }
if( compactness < best_compactness ) if( compactness < best_compactness )
{
best_compactness = compactness; best_compactness = compactness;
}
} }
return best_compactness; return best_compactness;

@ -44,81 +44,64 @@
// //
//M*/ //M*/
static float distance_(__global const float * center, __global const float * src, int feature_length)
{
float res = 0;
float4 v0, v1, v2;
int i = 0;
#ifdef L1_DIST #ifdef L1_DIST
# define DISTANCE(A, B) fabs((A) - (B)) float4 sum = (float4)(0.0f);
#elif defined L2SQR_DIST
# define DISTANCE(A, B) ((A) - (B)) * ((A) - (B))
#else
# define DISTANCE(A, B) ((A) - (B)) * ((A) - (B))
#endif #endif
inline float dist(__global const float * center, __global const float * src, int feature_cols) for ( ; i <= feature_length - 4; i += 4)
{
float res = 0;
float4 tmp4;
int i;
for(i = 0; i < feature_cols / 4; i += 4, center += 4, src += 4)
{ {
tmp4 = vload4(0, center) - vload4(0, src); v0 = vload4(0, center + i);
v1 = vload4(0, src + i);
v2 = v1 - v0;
#ifdef L1_DIST #ifdef L1_DIST
tmp4 = fabs(tmp4); v0 = fabs(v2);
sum += v0;
#else #else
tmp4 *= tmp4; res += dot(v2, v2);
#endif #endif
res += tmp4.x + tmp4.y + tmp4.z + tmp4.w;
} }
for(; i < feature_cols; ++i, ++center, ++src) #ifdef L1_DIST
res = sum.x + sum.y + sum.z + sum.w;
#endif
for ( ; i < feature_length; ++i)
{ {
res += DISTANCE(*src, *center); float t0 = src[i];
float t1 = center[i];
#ifdef L1_DIST
res += fabs(t0 - t1);
#else
float t2 = t0 - t1;
res += t2 * t2;
#endif
} }
return res; return res;
} }
// to be distinguished with distanceToCenters in kmeans_kernel.cl __kernel void distanceToCenters(__global const float * src, __global const float * centers,
__kernel void distanceToCenters( __global float * dists, int feature_length,
__global const float *src, int src_step, int centers_step,
__global const float *centers, int features_count, int centers_count,
#ifdef USE_INDEX int src_offset, int centers_offset)
__global const int *indices,
#endif
__global int *labels,
__global float *dists,
int feature_cols,
int src_step,
int centers_step,
int label_step,
int input_size,
int K,
int offset_src,
int offset_centers
)
{ {
int gid = get_global_id(0); int gid = get_global_id(0);
float euDist, minval;
int minCentroid; if (gid < (features_count * centers_count))
if(gid >= input_size)
{
return;
}
src += offset_src;
centers += offset_centers;
#ifdef USE_INDEX
src += indices[gid] * src_step;
#else
src += gid * src_step;
#endif
minval = dist(centers, src, feature_cols);
minCentroid = 0;
for(int i = 1 ; i < K; i++)
{ {
euDist = dist(centers + i * centers_step, src, feature_cols); int feature_index = gid / centers_count;
if(euDist < minval) int center_index = gid % centers_count;
{
minval = euDist; int center_idx = mad24(center_index, centers_step, centers_offset);
minCentroid = i; int src_idx = mad24(feature_index, src_step, src_offset);
}
dists[gid] = distance_(centers + center_idx, src + src_idx, feature_length);
} }
labels[gid * label_step] = minCentroid;
dists[gid] = minval;
} }

@ -61,7 +61,7 @@ PARAM_TEST_CASE(Kmeans, int, int, int)
int type; int type;
int K; int K;
int flags; int flags;
cv::Mat src ; Mat src ;
ocl::oclMat d_src, d_dists; ocl::oclMat d_src, d_dists;
Mat labels, centers; Mat labels, centers;
@ -73,7 +73,7 @@ PARAM_TEST_CASE(Kmeans, int, int, int)
flags = GET_PARAM(2); flags = GET_PARAM(2);
// MWIDTH=256, MHEIGHT=256. defined in utility.hpp // MWIDTH=256, MHEIGHT=256. defined in utility.hpp
cv::Size size = cv::Size(MWIDTH, MHEIGHT); Size size = Size(MWIDTH, MHEIGHT);
src.create(size, type); src.create(size, type);
int row_idx = 0; int row_idx = 0;
const int max_neighbour = MHEIGHT / K - 1; const int max_neighbour = MHEIGHT / K - 1;
@ -159,15 +159,15 @@ INSTANTIATE_TEST_CASE_P(OCL_ML, Kmeans, Combine(
/////////////////////////////// DistanceToCenters ////////////////////////////////////////// /////////////////////////////// DistanceToCenters //////////////////////////////////////////
CV_ENUM(DistType, NORM_L1, NORM_L2SQR); CV_ENUM(DistType, NORM_L1, NORM_L2SQR)
PARAM_TEST_CASE(distanceToCenters, DistType, bool) PARAM_TEST_CASE(distanceToCenters, DistType, bool)
{ {
cv::Size size;
int distType; int distType;
bool useRoi; bool useRoi;
cv::Mat src, centers, src_roi, centers_roi;
cv::ocl::oclMat ocl_src, ocl_centers, ocl_src_roi, ocl_centers_roi; Mat src, centers, src_roi, centers_roi;
ocl::oclMat ocl_src, ocl_centers, ocl_src_roi, ocl_centers_roi;
virtual void SetUp() virtual void SetUp()
{ {
@ -177,70 +177,59 @@ PARAM_TEST_CASE(distanceToCenters, DistType, bool)
void random_roi() void random_roi()
{ {
Size roiSize_src = randomSize(10,1000); Size roiSizeSrc = randomSize(1, MAX_VALUE);
Size roiSize_centers = randomSize(10, 1000); Size roiSizeCenters = randomSize(1, MAX_VALUE);
roiSize_src.width = roiSize_centers.width; roiSizeSrc.width = roiSizeCenters.width;
Border srcBorder = randomBorder(0, useRoi ? 500 : 0); Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(src, src_roi, roiSize_src, srcBorder, CV_32FC1, -SHRT_MAX, SHRT_MAX); randomSubMat(src, src_roi, roiSizeSrc, srcBorder, CV_32FC1, -MAX_VALUE, MAX_VALUE);
Border centersBorder = randomBorder(0, useRoi ? 500 : 0); Border centersBorder = randomBorder(0, useRoi ? 500 : 0);
randomSubMat(centers, centers_roi, roiSize_centers, centersBorder, CV_32FC1, -SHRT_MAX, SHRT_MAX); randomSubMat(centers, centers_roi, roiSizeCenters, centersBorder, CV_32FC1, -MAX_VALUE, MAX_VALUE);
for(int i = 0; i<centers.rows; i++)
centers.at<float>(i, randomInt(0,centers.cols-1)) = (float)randomDouble(SHRT_MAX, INT_MAX);
generateOclMat(ocl_src, ocl_src_roi, src, roiSize_src, srcBorder); for (int i = 0; i < centers.rows; i++)
generateOclMat(ocl_centers, ocl_centers_roi, centers, roiSize_centers, centersBorder); centers.at<float>(i, randomInt(0, centers.cols)) = (float)randomDouble(SHRT_MAX, INT_MAX);
generateOclMat(ocl_src, ocl_src_roi, src, roiSizeSrc, srcBorder);
generateOclMat(ocl_centers, ocl_centers_roi, centers, roiSizeCenters, centersBorder);
} }
}; };
OCL_TEST_P(distanceToCenters, Accuracy) OCL_TEST_P(distanceToCenters, Accuracy)
{ {
for(int j = 0; j< LOOP_TIMES; j++) for (int j = 0; j < LOOP_TIMES; j++)
{ {
random_roi(); random_roi();
cv::ocl::oclMat ocl_dists;
cv::ocl::oclMat ocl_labels;
cv::ocl::distanceToCenters(ocl_dists,ocl_labels,ocl_src_roi, ocl_centers_roi, distType);
Mat labels, dists; Mat labels, dists;
ocl_labels.download(labels); ocl::distanceToCenters(ocl_src_roi, ocl_centers_roi, dists, labels, distType);
ocl_dists.download(dists);
ASSERT_EQ(ocl_dists.cols, ocl_labels.rows); EXPECT_EQ(dists.size(), labels.size());
Mat batch_dists; Mat batch_dists;
cv::batchDistance(src_roi, centers_roi, batch_dists, CV_32FC1, noArray(), distType); cv::batchDistance(src_roi, centers_roi, batch_dists, CV_32FC1, noArray(), distType);
std::vector<double> gold_dists_v; std::vector<float> gold_dists_v;
gold_dists_v.reserve(batch_dists.rows);
for(int i = 0; i<batch_dists.rows; i++) for (int i = 0; i < batch_dists.rows; i++)
{ {
Mat r = batch_dists.row(i); Mat r = batch_dists.row(i);
double mVal; double mVal;
Point mLoc; Point mLoc;
minMaxLoc(r, &mVal, NULL, &mLoc, NULL); minMaxLoc(r, &mVal, NULL, &mLoc, NULL);
int ocl_label = *(int*)labels.row(i).col(0).data; int ocl_label = labels.at<int>(i, 0);
ASSERT_EQ(mLoc.x, ocl_label); EXPECT_EQ(mLoc.x, ocl_label);
gold_dists_v.push_back(mVal); gold_dists_v.push_back(static_cast<float>(mVal));
} }
Mat gold_dists(gold_dists_v);
dists.convertTo(dists, CV_64FC1); double relative_error = cv::norm(Mat(gold_dists_v), dists, NORM_INF | NORM_RELATIVE);
double relative_error = cv::norm(gold_dists.t(), dists, NORM_INF|NORM_RELATIVE);
ASSERT_LE(relative_error, 1e-5); ASSERT_LE(relative_error, 1e-5);
} }
} }
INSTANTIATE_TEST_CASE_P (OCL_ML, distanceToCenters, Combine(DistType::all(), Bool()));
INSTANTIATE_TEST_CASE_P (OCL_ML, distanceToCenters, Combine(DistType::all(), Bool()) );
#endif #endif

Loading…
Cancel
Save