|
|
|
@ -98,80 +98,66 @@ namespace cv |
|
|
|
|
/////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
// threshold
|
|
|
|
|
|
|
|
|
|
typedef void (*gpuThresh_t)(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type); |
|
|
|
|
|
|
|
|
|
static void threshold_8u(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type) |
|
|
|
|
static std::vector<uchar> scalarToVector(const cv::Scalar & sc, int depth, int ocn, int cn) |
|
|
|
|
{ |
|
|
|
|
uchar thresh_uchar = cvFloor(thresh); |
|
|
|
|
uchar max_val = cvRound(maxVal); |
|
|
|
|
CV_Assert(ocn == cn || (ocn == 4 && cn == 3)); |
|
|
|
|
|
|
|
|
|
size_t cols = (dst.cols + (dst.offset % 16) + 15) / 16; |
|
|
|
|
size_t bSizeX = 16, bSizeY = 16; |
|
|
|
|
size_t gSizeX = cols % bSizeX == 0 ? cols : (cols + bSizeX - 1) / bSizeX * bSizeX; |
|
|
|
|
size_t gSizeY = dst.rows; |
|
|
|
|
size_t globalThreads[3] = {gSizeX, gSizeY, 1}; |
|
|
|
|
size_t localThreads[3] = {bSizeX, bSizeY, 1}; |
|
|
|
|
static const int sizeMap[] = { sizeof(uchar), sizeof(char), sizeof(ushort), |
|
|
|
|
sizeof(short), sizeof(int), sizeof(float), sizeof(double) }; |
|
|
|
|
|
|
|
|
|
vector< pair<size_t, const void *> > args; |
|
|
|
|
args.push_back( make_pair(sizeof(cl_mem), &src.data)); |
|
|
|
|
args.push_back( make_pair(sizeof(cl_mem), &dst.data)); |
|
|
|
|
args.push_back( make_pair(sizeof(cl_int), (void *)&src.offset)); |
|
|
|
|
args.push_back( make_pair(sizeof(cl_int), (void *)&src.step)); |
|
|
|
|
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.offset)); |
|
|
|
|
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows)); |
|
|
|
|
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols)); |
|
|
|
|
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.step)); |
|
|
|
|
args.push_back( make_pair(sizeof(cl_uchar), (void *)&thresh_uchar)); |
|
|
|
|
args.push_back( make_pair(sizeof(cl_uchar), (void *)&max_val)); |
|
|
|
|
args.push_back( make_pair(sizeof(cl_int), (void *)&type)); |
|
|
|
|
openCLExecuteKernel(src.clCxt, &imgproc_threshold, "threshold", globalThreads, localThreads, args, src.oclchannels(), src.depth()); |
|
|
|
|
int elemSize1 = sizeMap[depth]; |
|
|
|
|
int bufSize = elemSize1 * ocn; |
|
|
|
|
std::vector<uchar> _buf(bufSize); |
|
|
|
|
uchar * buf = &_buf[0]; |
|
|
|
|
scalarToRawData(sc, buf, CV_MAKE_TYPE(depth, cn)); |
|
|
|
|
memset(buf + elemSize1 * cn, 0, (ocn - cn) * elemSize1); |
|
|
|
|
|
|
|
|
|
return _buf; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void threshold_32f(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type) |
|
|
|
|
static void threshold_runner(const oclMat &src, oclMat &dst, double thresh, double maxVal, int thresholdType) |
|
|
|
|
{ |
|
|
|
|
float thresh_f = thresh; |
|
|
|
|
float max_val = maxVal; |
|
|
|
|
int dst_offset = (dst.offset >> 2); |
|
|
|
|
int dst_step = (dst.step >> 2); |
|
|
|
|
int src_offset = (src.offset >> 2); |
|
|
|
|
int src_step = (src.step >> 2); |
|
|
|
|
|
|
|
|
|
size_t cols = (dst.cols + (dst_offset & 3) + 3) / 4; |
|
|
|
|
size_t bSizeX = 16, bSizeY = 16; |
|
|
|
|
size_t gSizeX = cols % bSizeX == 0 ? cols : (cols + bSizeX - 1) / bSizeX * bSizeX; |
|
|
|
|
size_t gSizeY = dst.rows; |
|
|
|
|
size_t globalThreads[3] = {gSizeX, gSizeY, 1}; |
|
|
|
|
size_t localThreads[3] = {bSizeX, bSizeY, 1}; |
|
|
|
|
bool ival = src.depth() < CV_32F; |
|
|
|
|
std::vector<uchar> thresholdValue = scalarToVector(cv::Scalar::all(ival ? cvFloor(thresh) : thresh), dst.depth(), |
|
|
|
|
dst.oclchannels(), dst.channels()); |
|
|
|
|
std::vector<uchar> maxValue = scalarToVector(cv::Scalar::all(maxVal), dst.depth(), dst.oclchannels(), dst.channels()); |
|
|
|
|
|
|
|
|
|
size_t localThreads[3] = { 16, 16, 1 }; |
|
|
|
|
size_t globalThreads[3] = { dst.cols, dst.rows, 1 }; |
|
|
|
|
|
|
|
|
|
const char * const thresholdMap[] = { "THRESH_BINARY", "THRESH_BINARY_INV", "THRESH_TRUNC", |
|
|
|
|
"THRESH_TOZERO", "THRESH_TOZERO_INV" }; |
|
|
|
|
const char * const channelMap[] = { "", "", "2", "4", "4" }; |
|
|
|
|
const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" }; |
|
|
|
|
std::string buildOptions = format("-D T=%s%s -D %s", typeMap[src.depth()], channelMap[src.channels()], |
|
|
|
|
thresholdMap[thresholdType]); |
|
|
|
|
|
|
|
|
|
int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize(); |
|
|
|
|
int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize(); |
|
|
|
|
|
|
|
|
|
vector< pair<size_t, const void *> > args; |
|
|
|
|
args.push_back( make_pair(sizeof(cl_mem), &src.data)); |
|
|
|
|
args.push_back( make_pair(sizeof(cl_mem), &dst.data)); |
|
|
|
|
args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data)); |
|
|
|
|
args.push_back( make_pair(sizeof(cl_int), (void *)&src_offset)); |
|
|
|
|
args.push_back( make_pair(sizeof(cl_int), (void *)&src_step)); |
|
|
|
|
args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data)); |
|
|
|
|
args.push_back( make_pair(sizeof(cl_int), (void *)&dst_offset)); |
|
|
|
|
args.push_back( make_pair(sizeof(cl_int), (void *)&dst_step)); |
|
|
|
|
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows)); |
|
|
|
|
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols)); |
|
|
|
|
args.push_back( make_pair(sizeof(cl_int), (void *)&dst_step)); |
|
|
|
|
args.push_back( make_pair(sizeof(cl_float), (void *)&thresh_f)); |
|
|
|
|
args.push_back( make_pair(sizeof(cl_float), (void *)&max_val)); |
|
|
|
|
args.push_back( make_pair(sizeof(cl_int), (void *)&type)); |
|
|
|
|
|
|
|
|
|
openCLExecuteKernel(src.clCxt, &imgproc_threshold, "threshold", globalThreads, localThreads, args, src.oclchannels(), src.depth()); |
|
|
|
|
args.push_back( make_pair(thresholdValue.size(), (void *)&thresholdValue[0])); |
|
|
|
|
args.push_back( make_pair(maxValue.size(), (void *)&maxValue[0])); |
|
|
|
|
|
|
|
|
|
openCLExecuteKernel(src.clCxt, &imgproc_threshold, "threshold", globalThreads, localThreads, args, |
|
|
|
|
-1, -1, buildOptions.c_str()); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// threshold: support 8UC1 and 32FC1 data type and five threshold type
|
|
|
|
|
double threshold(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type) |
|
|
|
|
double threshold(const oclMat &src, oclMat &dst, double thresh, double maxVal, int thresholdType) |
|
|
|
|
{ |
|
|
|
|
//TODO: These limitations shall be removed later.
|
|
|
|
|
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_32FC1); |
|
|
|
|
CV_Assert(type == THRESH_BINARY || type == THRESH_BINARY_INV || type == THRESH_TRUNC |
|
|
|
|
|| type == THRESH_TOZERO || type == THRESH_TOZERO_INV ); |
|
|
|
|
CV_Assert(thresholdType == THRESH_BINARY || thresholdType == THRESH_BINARY_INV || thresholdType == THRESH_TRUNC |
|
|
|
|
|| thresholdType == THRESH_TOZERO || thresholdType == THRESH_TOZERO_INV); |
|
|
|
|
|
|
|
|
|
static const gpuThresh_t gpuThresh_callers[2] = {threshold_8u, threshold_32f}; |
|
|
|
|
|
|
|
|
|
dst.create( src.size(), src.type() ); |
|
|
|
|
gpuThresh_callers[(src.type() == CV_32FC1)](src, dst, thresh, maxVal, type); |
|
|
|
|
dst.create(src.size(), src.type()); |
|
|
|
|
threshold_runner(src, dst, thresh, maxVal, thresholdType); |
|
|
|
|
|
|
|
|
|
return thresh; |
|
|
|
|
} |
|
|
|
|