Additional notes with this commit: 1. Add cornerHarris_dxdy and cornerMinEigenVal_dxdy to get the interim dx and dy output of Sobel operator; 2. Add minMax_buf to allow user to reuse buffers in minMax; 3. Fix an error when either min or max pointer fed into minMax is NULL; 4. Corner sorter temporarily uses C++ STL's quick sort. A parallel selection sort in OpneCL is contained in the implementation but disabled due to poor performance at the moment. 5. Accuracy test for ocl gfft.pull/910/head
parent
d4255b7f75
commit
b4a4a05bdc
8 changed files with 833 additions and 24 deletions
@ -0,0 +1,351 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Peng Xiao, pengxiao@outlook.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
#include <iomanip> |
||||
#include "precomp.hpp" |
||||
|
||||
using namespace cv; |
||||
using namespace cv::ocl; |
||||
|
||||
static bool use_cpu_sorter = true; |
||||
|
||||
namespace cv |
||||
{ |
||||
namespace ocl |
||||
{ |
||||
///////////////////////////OpenCL kernel strings///////////////////////////
|
||||
extern const char *imgproc_gfft; |
||||
} |
||||
} |
||||
|
||||
namespace |
||||
{ |
||||
enum SortMethod |
||||
{ |
||||
CPU_STL, |
||||
BITONIC, |
||||
SELECTION |
||||
}; |
||||
|
||||
const int GROUP_SIZE = 256; |
||||
|
||||
template<SortMethod method> |
||||
struct Sorter |
||||
{ |
||||
//typedef EigType;
|
||||
}; |
||||
|
||||
//TODO(pengx): optimize GPU sorter's performance thus CPU sorter is removed.
|
||||
template<> |
||||
struct Sorter<CPU_STL> |
||||
{ |
||||
typedef oclMat EigType; |
||||
static cv::Mutex cs; |
||||
static Mat mat_eig; |
||||
|
||||
//prototype
|
||||
static int clfloat2Gt(cl_float2 pt1, cl_float2 pt2) |
||||
{ |
||||
float v1 = mat_eig.at<float>(cvRound(pt1.s[1]), cvRound(pt1.s[0])); |
||||
float v2 = mat_eig.at<float>(cvRound(pt2.s[1]), cvRound(pt2.s[0])); |
||||
return v1 > v2; |
||||
} |
||||
static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count) |
||||
{ |
||||
cv::AutoLock lock(cs); |
||||
//temporarily use STL's sort function
|
||||
Mat mat_corners = corners; |
||||
mat_eig = eig_tex; |
||||
std::sort(mat_corners.begin<cl_float2>(), mat_corners.begin<cl_float2>() + count, clfloat2Gt); |
||||
corners = mat_corners; |
||||
} |
||||
}; |
||||
cv::Mutex Sorter<CPU_STL>::cs; |
||||
cv::Mat Sorter<CPU_STL>::mat_eig; |
||||
|
||||
template<> |
||||
struct Sorter<BITONIC> |
||||
{ |
||||
typedef TextureCL EigType; |
||||
|
||||
static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count) |
||||
{ |
||||
Context * cxt = Context::getContext(); |
||||
size_t globalThreads[3] = {count / 2, 1, 1}; |
||||
size_t localThreads[3] = {GROUP_SIZE, 1, 1}; |
||||
|
||||
// 2^numStages should be equal to count or the output is invalid
|
||||
int numStages = 0; |
||||
for(int i = count; i > 1; i >>= 1) |
||||
{ |
||||
++numStages; |
||||
} |
||||
const int argc = 5; |
||||
std::vector< std::pair<size_t, const void *> > args(argc); |
||||
std::string kernelname = "sortCorners_bitonicSort"; |
||||
args[0] = std::make_pair(sizeof(cl_mem), (void *)&eig_tex); |
||||
args[1] = std::make_pair(sizeof(cl_mem), (void *)&corners.data); |
||||
args[2] = std::make_pair(sizeof(cl_int), (void *)&count); |
||||
for(int stage = 0; stage < numStages; ++stage) |
||||
{ |
||||
args[3] = std::make_pair(sizeof(cl_int), (void *)&stage); |
||||
for(int passOfStage = 0; passOfStage < stage + 1; ++passOfStage) |
||||
{ |
||||
args[4] = std::make_pair(sizeof(cl_int), (void *)&passOfStage); |
||||
openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1); |
||||
} |
||||
} |
||||
} |
||||
}; |
||||
|
||||
template<> |
||||
struct Sorter<SELECTION> |
||||
{ |
||||
typedef TextureCL EigType; |
||||
|
||||
static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count) |
||||
{ |
||||
Context * cxt = Context::getContext(); |
||||
|
||||
size_t globalThreads[3] = {count, 1, 1}; |
||||
size_t localThreads[3] = {GROUP_SIZE, 1, 1}; |
||||
|
||||
std::vector< std::pair<size_t, const void *> > args; |
||||
//local
|
||||
std::string kernelname = "sortCorners_selectionSortLocal"; |
||||
int lds_size = GROUP_SIZE * sizeof(cl_float2); |
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void*)&eig_tex) ); |
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void*)&corners.data) ); |
||||
args.push_back( std::make_pair( sizeof(cl_int), (void*)&count) ); |
||||
args.push_back( std::make_pair( lds_size, (void*)NULL) ); |
||||
|
||||
openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1); |
||||
|
||||
//final
|
||||
kernelname = "sortCorners_selectionSortFinal"; |
||||
args.pop_back(); |
||||
openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1); |
||||
} |
||||
}; |
||||
|
||||
int findCorners_caller( |
||||
const TextureCL& eig,
|
||||
const float threshold, |
||||
const oclMat& mask, |
||||
oclMat& corners, |
||||
const int max_count) |
||||
{ |
||||
std::vector<int> k; |
||||
Context * cxt = Context::getContext(); |
||||
|
||||
std::vector< std::pair<size_t, const void*> > args; |
||||
std::string kernelname = "findCorners"; |
||||
|
||||
const int mask_strip = mask.step / mask.elemSize1(); |
||||
|
||||
oclMat g_counter(1, 1, CV_32SC1); |
||||
g_counter.setTo(0); |
||||
|
||||
args.push_back(make_pair( sizeof(cl_mem), (void*)&eig )); |
||||
args.push_back(make_pair( sizeof(cl_mem), (void*)&mask.data )); |
||||
args.push_back(make_pair( sizeof(cl_mem), (void*)&corners.data )); |
||||
args.push_back(make_pair( sizeof(cl_int), (void*)&mask_strip)); |
||||
args.push_back(make_pair( sizeof(cl_float), (void*)&threshold )); |
||||
args.push_back(make_pair( sizeof(cl_int), (void*)&eig.rows )); |
||||
args.push_back(make_pair( sizeof(cl_int), (void*)&eig.cols )); |
||||
args.push_back(make_pair( sizeof(cl_int), (void*)&max_count )); |
||||
args.push_back(make_pair( sizeof(cl_mem), (void*)&g_counter.data )); |
||||
|
||||
size_t globalThreads[3] = {eig.cols, eig.rows, 1}; |
||||
size_t localThreads[3] = {16, 16, 1}; |
||||
|
||||
const char * opt = mask.empty() ? "" : "-D WITH_MASK"; |
||||
openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1, opt); |
||||
return std::min(Mat(g_counter).at<int>(0), max_count); |
||||
} |
||||
}//unnamed namespace
|
||||
|
||||
void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, oclMat& corners, const oclMat& mask) |
||||
{ |
||||
CV_Assert(qualityLevel > 0 && minDistance >= 0 && maxCorners >= 0); |
||||
CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size())); |
||||
|
||||
CV_DbgAssert(support_image2d()); |
||||
|
||||
ensureSizeIsEnough(image.size(), CV_32F, eig_); |
||||
|
||||
if (useHarrisDetector) |
||||
cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3, harrisK); |
||||
else |
||||
cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3); |
||||
|
||||
double maxVal = 0; |
||||
minMax_buf(eig_, 0, &maxVal, oclMat(), minMaxbuf_); |
||||
|
||||
ensureSizeIsEnough(1, std::max(1000, static_cast<int>(image.size().area() * 0.05)), CV_32FC2, tmpCorners_); |
||||
|
||||
Ptr<TextureCL> eig_tex = bindTexturePtr(eig_); |
||||
int total = findCorners_caller( |
||||
*eig_tex, |
||||
static_cast<float>(maxVal * qualityLevel), |
||||
mask, |
||||
tmpCorners_, |
||||
tmpCorners_.cols); |
||||
|
||||
if (total == 0) |
||||
{ |
||||
corners.release(); |
||||
return; |
||||
} |
||||
if(use_cpu_sorter) |
||||
{ |
||||
Sorter<CPU_STL>::sortCorners_caller(eig_, tmpCorners_, total); |
||||
} |
||||
else |
||||
{ |
||||
//if total is power of 2
|
||||
if(((total - 1) & (total)) == 0) |
||||
{ |
||||
Sorter<BITONIC>::sortCorners_caller(*eig_tex, tmpCorners_, total); |
||||
} |
||||
else |
||||
{ |
||||
Sorter<SELECTION>::sortCorners_caller(*eig_tex, tmpCorners_, total); |
||||
} |
||||
} |
||||
|
||||
if (minDistance < 1) |
||||
{ |
||||
corners = tmpCorners_(Rect(0, 0, maxCorners > 0 ? std::min(maxCorners, total) : total, 1)); |
||||
} |
||||
else |
||||
{ |
||||
vector<Point2f> tmp(total); |
||||
downloadPoints(tmpCorners_, tmp); |
||||
|
||||
vector<Point2f> tmp2; |
||||
tmp2.reserve(total); |
||||
|
||||
const int cell_size = cvRound(minDistance); |
||||
const int grid_width = (image.cols + cell_size - 1) / cell_size; |
||||
const int grid_height = (image.rows + cell_size - 1) / cell_size; |
||||
|
||||
std::vector< std::vector<Point2f> > grid(grid_width * grid_height); |
||||
|
||||
for (int i = 0; i < total; ++i) |
||||
{ |
||||
Point2f p = tmp[i]; |
||||
|
||||
bool good = true; |
||||
|
||||
int x_cell = static_cast<int>(p.x / cell_size); |
||||
int y_cell = static_cast<int>(p.y / cell_size); |
||||
|
||||
int x1 = x_cell - 1; |
||||
int y1 = y_cell - 1; |
||||
int x2 = x_cell + 1; |
||||
int y2 = y_cell + 1; |
||||
|
||||
// boundary check
|
||||
x1 = std::max(0, x1); |
||||
y1 = std::max(0, y1); |
||||
x2 = std::min(grid_width - 1, x2); |
||||
y2 = std::min(grid_height - 1, y2); |
||||
|
||||
for (int yy = y1; yy <= y2; yy++) |
||||
{ |
||||
for (int xx = x1; xx <= x2; xx++) |
||||
{ |
||||
vector<Point2f>& m = grid[yy * grid_width + xx]; |
||||
|
||||
if (!m.empty()) |
||||
{ |
||||
for(size_t j = 0; j < m.size(); j++) |
||||
{ |
||||
float dx = p.x - m[j].x; |
||||
float dy = p.y - m[j].y; |
||||
|
||||
if (dx * dx + dy * dy < minDistance * minDistance) |
||||
{ |
||||
good = false; |
||||
goto break_out; |
||||
} |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
break_out: |
||||
|
||||
if(good) |
||||
{ |
||||
grid[y_cell * grid_width + x_cell].push_back(p); |
||||
|
||||
tmp2.push_back(p); |
||||
|
||||
if (maxCorners > 0 && tmp2.size() == static_cast<size_t>(maxCorners)) |
||||
break; |
||||
} |
||||
} |
||||
|
||||
corners.upload(Mat(1, static_cast<int>(tmp2.size()), CV_32FC2, &tmp2[0])); |
||||
} |
||||
} |
||||
void cv::ocl::GoodFeaturesToTrackDetector_OCL::downloadPoints(const oclMat &points, vector<Point2f> &points_v) |
||||
{ |
||||
CV_DbgAssert(points.type() == CV_32FC2); |
||||
points_v.resize(points.cols); |
||||
openCLSafeCall(clEnqueueReadBuffer( |
||||
*reinterpret_cast<cl_command_queue*>(getoclCommandQueue()),
|
||||
reinterpret_cast<cl_mem>(points.data),
|
||||
CL_TRUE,
|
||||
0,
|
||||
points.cols * sizeof(Point2f),
|
||||
&points_v[0],
|
||||
0,
|
||||
NULL,
|
||||
NULL)); |
||||
} |
||||
|
||||
|
@ -0,0 +1,276 @@ |
||||
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||
// |
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||
// |
||||
// By downloading, copying, installing or using the software you agree to this license. |
||||
// If you do not agree to this license, do not download, install, |
||||
// copy or use the software. |
||||
// |
||||
// |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. |
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// @Authors |
||||
// Peng Xiao, pengxiao@outlook.com |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other oclMaterials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors as is and |
||||
// any express or implied warranties, including, but not limited to, the implied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
//M*/ |
||||
|
||||
#ifndef WITH_MASK |
||||
#define WITH_MASK 0 |
||||
#endif |
||||
|
||||
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; |
||||
|
||||
inline float ELEM_INT2(image2d_t _eig, int _x, int _y) |
||||
{ |
||||
return read_imagef(_eig, sampler, (int2)(_x, _y)).x; |
||||
} |
||||
|
||||
inline float ELEM_FLT2(image2d_t _eig, float2 pt) |
||||
{ |
||||
return read_imagef(_eig, sampler, pt).x; |
||||
} |
||||
|
||||
__kernel |
||||
void findCorners |
||||
( |
||||
image2d_t eig, |
||||
__global const char * mask, |
||||
__global float2 * corners, |
||||
const int mask_strip,// in pixels |
||||
const float threshold, |
||||
const int rows, |
||||
const int cols, |
||||
const int max_count, |
||||
__global int * g_counter |
||||
) |
||||
{ |
||||
const int j = get_global_id(0); |
||||
const int i = get_global_id(1); |
||||
|
||||
if (i > 0 && i < rows - 1 && j > 0 && j < cols - 1 |
||||
#if WITH_MASK |
||||
&& mask[i * mask_strip + j] != 0 |
||||
#endif |
||||
) |
||||
{ |
||||
const float val = ELEM_INT2(eig, j, i); |
||||
|
||||
if (val > threshold) |
||||
{ |
||||
float maxVal = val; |
||||
|
||||
maxVal = fmax(ELEM_INT2(eig, j - 1, i - 1), maxVal); |
||||
maxVal = fmax(ELEM_INT2(eig, j , i - 1), maxVal); |
||||
maxVal = fmax(ELEM_INT2(eig, j + 1, i - 1), maxVal); |
||||
|
||||
maxVal = fmax(ELEM_INT2(eig, j - 1, i), maxVal); |
||||
maxVal = fmax(ELEM_INT2(eig, j + 1, i), maxVal); |
||||
|
||||
maxVal = fmax(ELEM_INT2(eig, j - 1, i + 1), maxVal); |
||||
maxVal = fmax(ELEM_INT2(eig, j , i + 1), maxVal); |
||||
maxVal = fmax(ELEM_INT2(eig, j + 1, i + 1), maxVal); |
||||
|
||||
if (val == maxVal) |
||||
{ |
||||
const int ind = atomic_inc(g_counter); |
||||
|
||||
if (ind < max_count) |
||||
corners[ind] = (float2)(j, i); |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
//bitonic sort |
||||
__kernel |
||||
void sortCorners_bitonicSort |
||||
( |
||||
image2d_t eig, |
||||
__global float2 * corners, |
||||
const int count, |
||||
const int stage, |
||||
const int passOfStage |
||||
) |
||||
{ |
||||
const int threadId = get_global_id(0); |
||||
if(threadId >= count / 2) |
||||
{ |
||||
return; |
||||
} |
||||
|
||||
const int sortOrder = (((threadId/(1 << stage)) % 2)) == 1 ? 1 : 0; // 0 is descent |
||||
|
||||
const int pairDistance = 1 << (stage - passOfStage); |
||||
const int blockWidth = 2 * pairDistance; |
||||
|
||||
const int leftId = min( (threadId % pairDistance) |
||||
+ (threadId / pairDistance) * blockWidth, count ); |
||||
|
||||
const int rightId = min( leftId + pairDistance, count ); |
||||
|
||||
const float2 leftPt = corners[leftId]; |
||||
const float2 rightPt = corners[rightId]; |
||||
|
||||
const float leftVal = ELEM_FLT2(eig, leftPt); |
||||
const float rightVal = ELEM_FLT2(eig, rightPt); |
||||
|
||||
const bool compareResult = leftVal > rightVal; |
||||
|
||||
float2 greater = compareResult ? leftPt:rightPt; |
||||
float2 lesser = compareResult ? rightPt:leftPt; |
||||
|
||||
corners[leftId] = sortOrder ? lesser : greater; |
||||
corners[rightId] = sortOrder ? greater : lesser; |
||||
} |
||||
|
||||
//selection sort for gfft |
||||
//kernel is ported from Bolt library: |
||||
//https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/sort_kernels.cl |
||||
// Local sort will firstly sort elements of each workgroup using selection sort |
||||
// its performance is O(n) |
||||
__kernel |
||||
void sortCorners_selectionSortLocal |
||||
( |
||||
image2d_t eig, |
||||
__global float2 * corners, |
||||
const int count, |
||||
__local float2 * scratch |
||||
) |
||||
{ |
||||
int i = get_local_id(0); // index in workgroup |
||||
int numOfGroups = get_num_groups(0); // index in workgroup |
||||
int groupID = get_group_id(0); |
||||
int wg = get_local_size(0); // workgroup size = block size |
||||
int n; // number of elements to be processed for this work group |
||||
|
||||
int offset = groupID * wg; |
||||
int same = 0; |
||||
corners += offset; |
||||
n = (groupID == (numOfGroups-1))? (count - wg*(numOfGroups-1)) : wg; |
||||
float2 pt1, pt2; |
||||
|
||||
pt1 = corners[min(i, n)]; |
||||
scratch[i] = pt1; |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
if(i >= n) |
||||
{ |
||||
return; |
||||
} |
||||
|
||||
float val1 = ELEM_FLT2(eig, pt1); |
||||
float val2; |
||||
|
||||
int pos = 0; |
||||
for (int j=0;j<n;++j) |
||||
{ |
||||
pt2 = scratch[j]; |
||||
val2 = ELEM_FLT2(eig, pt2); |
||||
if(val2 > val1) |
||||
pos++;//calculate the rank of this element in this work group |
||||
else |
||||
{ |
||||
if(val1 > val2) |
||||
continue; |
||||
else |
||||
{ |
||||
// val1 and val2 are same |
||||
same++; |
||||
} |
||||
} |
||||
} |
||||
for (int j=0; j< same; j++) |
||||
corners[pos + j] = pt1; |
||||
} |
||||
__kernel |
||||
void sortCorners_selectionSortFinal |
||||
( |
||||
image2d_t eig, |
||||
__global float2 * corners, |
||||
const int count |
||||
) |
||||
{ |
||||
const int i = get_local_id(0); // index in workgroup |
||||
const int numOfGroups = get_num_groups(0); // index in workgroup |
||||
const int groupID = get_group_id(0); |
||||
const int wg = get_local_size(0); // workgroup size = block size |
||||
int pos = 0, same = 0; |
||||
const int offset = get_group_id(0) * wg; |
||||
const int remainder = count - wg*(numOfGroups-1); |
||||
|
||||
if((offset + i ) >= count) |
||||
return; |
||||
float2 pt1, pt2; |
||||
pt1 = corners[groupID*wg + i]; |
||||
|
||||
float val1 = ELEM_FLT2(eig, pt1); |
||||
float val2; |
||||
|
||||
for(int j=0; j<numOfGroups-1; j++ ) |
||||
{ |
||||
for(int k=0; k<wg; k++) |
||||
{ |
||||
pt2 = corners[j*wg + k]; |
||||
val2 = ELEM_FLT2(eig, pt2); |
||||
if(val1 > val2) |
||||
break; |
||||
else |
||||
{ |
||||
//Increment only if the value is not the same. |
||||
if( val2 > val1 ) |
||||
pos++; |
||||
else |
||||
same++; |
||||
} |
||||
} |
||||
} |
||||
|
||||
for(int k=0; k<remainder; k++) |
||||
{ |
||||
pt2 = corners[(numOfGroups-1)*wg + k]; |
||||
val2 = ELEM_FLT2(eig, pt2); |
||||
if(val1 > val2) |
||||
break; |
||||
else |
||||
{ |
||||
//Don't increment if the value is the same. |
||||
//Two elements are same if (*userComp)(jData, iData) and (*userComp)(iData, jData) are both false |
||||
if(val2 > val1) |
||||
pos++; |
||||
else |
||||
same++; |
||||
} |
||||
} |
||||
for (int j=0; j< same; j++) |
||||
corners[pos + j] = pt1; |
||||
} |
||||
|
Loading…
Reference in new issue