mirror of https://github.com/opencv/opencv.git
parent
6161a3335c
commit
fa5113f303
4 changed files with 1357 additions and 0 deletions
@ -0,0 +1,419 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Peng Xiao, pengxiao@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include <iomanip> |
||||
#include "precomp.hpp" |
||||
|
||||
using namespace cv; |
||||
using namespace cv::ocl; |
||||
using namespace std; |
||||
|
||||
#if !defined (HAVE_OPENCL) |
||||
void cv::ocl::Canny(const oclMat& image, oclMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false) { throw_nogpu(); } |
||||
void cv::ocl::Canny(const oclMat& image, CannyBuf& buf, oclMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false){ throw_nogpu(); } |
||||
void cv::ocl::Canny(const oclMat& dx, const oclMat& dy, oclMat& edges, double low_thresh, double high_thresh, bool L2gradient = false){ throw_nogpu(); } |
||||
void cv::ocl::Canny(const oclMat& dx, const oclMat& dy, CannyBuf& buf, oclMat& edges, double low_thresh, double high_thresh, bool L2gradient = false){ throw_nogpu(); } |
||||
#else |
||||
|
||||
namespace cv |
||||
{ |
||||
namespace ocl |
||||
{ |
||||
///////////////////////////OpenCL kernel strings///////////////////////////
|
||||
extern const char *imgproc_canny; |
||||
} |
||||
} |
||||
|
||||
cv::ocl::CannyBuf::CannyBuf(const oclMat& dx_, const oclMat& dy_) : dx(dx_), dy(dy_) |
||||
{ |
||||
CV_Assert(dx_.type() == CV_32SC1 && dy_.type() == CV_32SC1 && dx_.size() == dy_.size()); |
||||
|
||||
create(dx_.size(), -1); |
||||
} |
||||
|
||||
void cv::ocl::CannyBuf::create(const Size& image_size, int apperture_size) |
||||
{ |
||||
dx.create(image_size, CV_32SC1); |
||||
dy.create(image_size, CV_32SC1); |
||||
|
||||
if(apperture_size == 3) |
||||
{ |
||||
dx_buf.create(image_size, CV_32SC1); |
||||
dy_buf.create(image_size, CV_32SC1); |
||||
} |
||||
else if(apperture_size > 0) |
||||
{ |
||||
Mat kx, ky; |
||||
if (!filterDX) |
||||
{ |
||||
filterDX = createDerivFilter_GPU(CV_32F, CV_32F, 1, 0, apperture_size, BORDER_REPLICATE); |
||||
} |
||||
if (!filterDY) |
||||
{ |
||||
filterDY = createDerivFilter_GPU(CV_32F, CV_32F, 0, 1, apperture_size, BORDER_REPLICATE); |
||||
} |
||||
} |
||||
edgeBuf.create(image_size.height + 2, image_size.width + 2, CV_32FC1); |
||||
|
||||
trackBuf1.create(1, image_size.width * image_size.height, CV_16UC2); |
||||
trackBuf2.create(1, image_size.width * image_size.height, CV_16UC2); |
||||
|
||||
counter.create(1,1, CV_32SC1); |
||||
} |
||||
|
||||
void cv::ocl::CannyBuf::release() |
||||
{ |
||||
dx.release(); |
||||
dy.release(); |
||||
dx_buf.release(); |
||||
dy_buf.release(); |
||||
edgeBuf.release(); |
||||
trackBuf1.release(); |
||||
trackBuf2.release(); |
||||
counter.release(); |
||||
} |
||||
|
||||
namespace cv { namespace ocl { |
||||
namespace canny |
||||
{ |
||||
void calcSobelRowPass_gpu(const oclMat& src, oclMat& dx_buf, oclMat& dy_buf, int rows, int cols); |
||||
|
||||
void calcMagnitude_gpu(const oclMat& dx_buf, const oclMat& dy_buf, oclMat& dx, oclMat& dy, oclMat& mag, int rows, int cols, bool L2Grad); |
||||
void calcMagnitude_gpu(const oclMat& dx, const oclMat& dy, oclMat& mag, int rows, int cols, bool L2Grad); |
||||
|
||||
void calcMap_gpu(oclMat& dx, oclMat& dy, oclMat& mag, oclMat& map, int rows, int cols, float low_thresh, float high_thresh); |
||||
|
||||
void edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, oclMat& counter, int rows, int cols); |
||||
|
||||
void edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, oclMat& counter, int rows, int cols); |
||||
|
||||
void getEdges_gpu(oclMat& map, oclMat& dst, int rows, int cols); |
||||
} |
||||
}}// cv::ocl
|
||||
|
||||
namespace |
||||
{ |
||||
void CannyCaller(CannyBuf& buf, oclMat& dst, float low_thresh, float high_thresh) |
||||
{ |
||||
using namespace ::cv::ocl::canny; |
||||
calcMap_gpu(buf.dx, buf.dy, buf.edgeBuf, buf.edgeBuf, dst.rows, dst.cols, low_thresh, high_thresh); |
||||
|
||||
edgesHysteresisLocal_gpu(buf.edgeBuf, buf.trackBuf1, buf.counter, dst.rows, dst.cols); |
||||
|
||||
edgesHysteresisGlobal_gpu(buf.edgeBuf, buf.trackBuf1, buf.trackBuf2, buf.counter, dst.rows, dst.cols); |
||||
|
||||
getEdges_gpu(buf.edgeBuf, dst, dst.rows, dst.cols); |
||||
} |
||||
} |
||||
|
||||
void cv::ocl::Canny(const oclMat& src, oclMat& dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient) |
||||
{ |
||||
CannyBuf buf(src.size(), apperture_size); |
||||
Canny(src, buf, dst, low_thresh, high_thresh, apperture_size, L2gradient); |
||||
} |
||||
|
||||
void cv::ocl::Canny(const oclMat& src, CannyBuf& buf, oclMat& dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient) |
||||
{ |
||||
using namespace ::cv::ocl::canny; |
||||
|
||||
CV_Assert(src.type() == CV_8UC1); |
||||
|
||||
if( low_thresh > high_thresh ) |
||||
std::swap( low_thresh, high_thresh ); |
||||
|
||||
dst.create(src.size(), CV_8U); |
||||
dst.setTo(Scalar::all(0)); |
||||
|
||||
buf.create(src.size(), apperture_size); |
||||
buf.edgeBuf.setTo(Scalar::all(0)); |
||||
buf.counter.setTo(Scalar::all(0)); |
||||
|
||||
if (apperture_size == 3) |
||||
{ |
||||
calcSobelRowPass_gpu(src, buf.dx_buf, buf.dy_buf, src.rows, src.cols); |
||||
|
||||
calcMagnitude_gpu(buf.dx_buf, buf.dy_buf, buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient); |
||||
} |
||||
else |
||||
{ |
||||
// FIXME:
|
||||
// current ocl implementation requires the src and dst having same type
|
||||
// convertTo is time consuming so this may be optimized later.
|
||||
oclMat src_omat32f = src; |
||||
src.convertTo(src_omat32f, CV_32F); // FIXME
|
||||
|
||||
buf.filterDX->apply(src_omat32f, buf.dx); |
||||
buf.filterDY->apply(src_omat32f, buf.dy); |
||||
|
||||
buf.dx.convertTo(buf.dx, CV_32S); // FIXME
|
||||
buf.dy.convertTo(buf.dy, CV_32S); // FIXME
|
||||
|
||||
calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient); |
||||
} |
||||
CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh)); |
||||
} |
||||
void cv::ocl::Canny(const oclMat& dx, const oclMat& dy, oclMat& dst, double low_thresh, double high_thresh, bool L2gradient) |
||||
{ |
||||
CannyBuf buf(dx, dy); |
||||
Canny(dx, dy, buf, dst, low_thresh, high_thresh, L2gradient); |
||||
} |
||||
|
||||
void cv::ocl::Canny(const oclMat& dx, const oclMat& dy, CannyBuf& buf, oclMat& dst, double low_thresh, double high_thresh, bool L2gradient) |
||||
{ |
||||
using namespace ::cv::ocl::canny; |
||||
|
||||
CV_Assert(dx.type() == CV_32SC1 && dy.type() == CV_32SC1 && dx.size() == dy.size()); |
||||
|
||||
if( low_thresh > high_thresh ) |
||||
std::swap( low_thresh, high_thresh); |
||||
|
||||
dst.create(dx.size(), CV_8U); |
||||
dst.setTo(Scalar::all(0)); |
||||
|
||||
buf.dx = dx; buf.dy = dy; |
||||
buf.create(dx.size(), -1); |
||||
buf.edgeBuf.setTo(Scalar::all(0)); |
||||
buf.counter.setTo(Scalar::all(0)); |
||||
calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, dx.rows, dx.cols, L2gradient); |
||||
|
||||
CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh)); |
||||
} |
||||
|
||||
void canny::calcSobelRowPass_gpu(const oclMat& src, oclMat& dx_buf, oclMat& dy_buf, int rows, int cols) |
||||
{ |
||||
Context *clCxt = src.clCxt; |
||||
string kernelName = "calcSobelRowPass"; |
||||
vector< pair<size_t, const void *> > args; |
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data)); |
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dx_buf.data)); |
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dy_buf.data)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&rows)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cols)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.step)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src.offset)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dx_buf.step)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dx_buf.offset)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dy_buf.step)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dy_buf.offset)); |
||||
|
||||
size_t globalThreads[3] = {cols, rows, 1}; |
||||
size_t localThreads[3] = {16, 16, 1}; |
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); |
||||
} |
||||
|
||||
void canny::calcMagnitude_gpu(const oclMat& dx_buf, const oclMat& dy_buf, oclMat& dx, oclMat& dy, oclMat& mag, int rows, int cols, bool L2Grad) |
||||
{ |
||||
Context *clCxt = dx_buf.clCxt; |
||||
string kernelName = "calcMagnitude_buf"; |
||||
vector< pair<size_t, const void *> > args; |
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dx_buf.data)); |
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dy_buf.data)); |
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dx.data)); |
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dy.data)); |
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&mag.data)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&rows)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cols)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dx_buf.step)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dx_buf.offset)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dy_buf.step)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dy_buf.offset)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dx.step)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dx.offset)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dy.step)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dy.offset)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&mag.step)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&mag.offset)); |
||||
|
||||
size_t globalThreads[3] = {cols, rows, 1}; |
||||
size_t localThreads[3] = {16, 16, 1}; |
||||
|
||||
char build_options [15] = ""; |
||||
if(L2Grad) |
||||
{ |
||||
strcat(build_options, "-D L2GRAD"); |
||||
} |
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options); |
||||
} |
||||
void canny::calcMagnitude_gpu(const oclMat& dx, const oclMat& dy, oclMat& mag, int rows, int cols, bool L2Grad) |
||||
{ |
||||
Context *clCxt = dx.clCxt; |
||||
string kernelName = "calcMagnitude"; |
||||
vector< pair<size_t, const void *> > args; |
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dx.data)); |
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dy.data)); |
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&mag.data)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&rows)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cols)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dx.step)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dx.offset)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dy.step)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dy.offset)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&mag.step)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&mag.offset)); |
||||
|
||||
size_t globalThreads[3] = {cols, rows, 1}; |
||||
size_t localThreads[3] = {16, 16, 1}; |
||||
|
||||
char build_options [15] = ""; |
||||
if(L2Grad) |
||||
{ |
||||
strcat(build_options, "-D L2GRAD"); |
||||
} |
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options); |
||||
} |
||||
|
||||
void canny::calcMap_gpu(oclMat& dx, oclMat& dy, oclMat& mag, oclMat& map, int rows, int cols, float low_thresh, float high_thresh) |
||||
{ |
||||
Context *clCxt = dx.clCxt; |
||||
|
||||
vector< pair<size_t, const void *> > args; |
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dx.data)); |
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dy.data)); |
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&mag.data)); |
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&rows)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cols)); |
||||
args.push_back( make_pair( sizeof(cl_float), (void *)&low_thresh)); |
||||
args.push_back( make_pair( sizeof(cl_float), (void *)&high_thresh)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dx.step)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dx.offset)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dy.step)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dy.offset)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&mag.step)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&mag.offset)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.step)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset)); |
||||
|
||||
#if CALCMAP_FIXED |
||||
size_t globalThreads[3] = {cols, rows, 1}; |
||||
string kernelName = "calcMap"; |
||||
size_t localThreads[3] = {16, 16, 1}; |
||||
#else |
||||
size_t globalThreads[3] = {cols, rows, 1}; |
||||
string kernelName = "calcMap_2"; |
||||
size_t localThreads[3] = {256, 1, 1}; |
||||
#endif |
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); |
||||
} |
||||
|
||||
void canny::edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, oclMat& counter, int rows, int cols) |
||||
{ |
||||
Context *clCxt = map.clCxt; |
||||
string kernelName = "edgesHysteresisLocal"; |
||||
vector< pair<size_t, const void *> > args; |
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data)); |
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&st1.data)); |
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&counter.data)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&rows)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cols)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.step)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset)); |
||||
|
||||
size_t globalThreads[3] = {cols, rows, 1}; |
||||
size_t localThreads[3] = {16, 16, 1}; |
||||
|
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); |
||||
} |
||||
|
||||
void canny::edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, oclMat& counter, int rows, int cols) |
||||
{ |
||||
unsigned int count = Mat(counter).at<unsigned int>(0); |
||||
|
||||
Context *clCxt = map.clCxt; |
||||
string kernelName = "edgesHysteresisGlobal"; |
||||
vector< pair<size_t, const void *> > args; |
||||
size_t localThreads[3] = {128, 1, 1}; |
||||
|
||||
#define DIVUP(a, b) ((a)+(b)-1)/(b) |
||||
|
||||
while(count > 0) |
||||
{ |
||||
counter.setTo(0); |
||||
args.clear(); |
||||
size_t globalThreads[3] = {std::min(count, 65535u) * 128, DIVUP(count, 65535), 1}; |
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data)); |
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&st1.data)); |
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&st2.data)); |
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&counter.data)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&rows)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cols)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&count)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.step)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset)); |
||||
|
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); |
||||
count = Mat(counter).at<unsigned int>(0); |
||||
std::swap(st1, st2); |
||||
} |
||||
#undef DIVUP |
||||
} |
||||
|
||||
void canny::getEdges_gpu(oclMat& map, oclMat& dst, int rows, int cols) |
||||
{ |
||||
Context *clCxt = map.clCxt; |
||||
string kernelName = "getEdges"; |
||||
vector< pair<size_t, const void *> > args; |
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data)); |
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&rows)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cols)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.step)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step)); |
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset)); |
||||
|
||||
size_t globalThreads[3] = {cols, rows, 1}; |
||||
size_t localThreads[3] = {16, 16, 1}; |
||||
|
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); |
||||
} |
||||
|
||||
#endif // HAVE_OPENCL
|
@ -0,0 +1,798 @@ |
||||
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||
// |
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||
// |
||||
// By downloading, copying, installing or using the software you agree to this license. |
||||
// If you do not agree to this license, do not download, install, |
||||
// copy or use the software. |
||||
// |
||||
// |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. |
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// @Authors |
||||
// Peng Xiao, pengxiao@multicorewareinc.com |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other oclMaterials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors as is and |
||||
// any express or implied warranties, including, but not limited to, the implied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
//M*/ |
||||
|
||||
#pragma OPENCL EXTENSION cl_amd_printf : enable |
||||
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable |
||||
#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable |
||||
|
||||
#ifdef L2GRAD |
||||
inline float calc(int x, int y) |
||||
{ |
||||
return sqrt((float)(x * x + y * y)); |
||||
} |
||||
#else |
||||
inline float calc(int x, int y) |
||||
{ |
||||
return (float)abs(x) + abs(y); |
||||
} |
||||
#endif // |
||||
|
||||
// Smoothing perpendicular to the derivative direction with a triangle filter |
||||
// only support 3x3 Sobel kernel |
||||
// h (-1) = 1, h (0) = 2, h (1) = 1 |
||||
// h'(-1) = -1, h'(0) = 0, h'(1) = 1 |
||||
// thus sobel 2D operator can be calculated as: |
||||
// h'(x, y) = h'(x)h(y) for x direction |
||||
// |
||||
// src input 8bit single channel image data |
||||
// dx_buf output dx buffer |
||||
// dy_buf output dy buffer |
||||
__kernel |
||||
void calcSobelRowPass |
||||
( |
||||
__global const uchar * src, |
||||
__global int * dx_buf, |
||||
__global int * dy_buf, |
||||
int rows, |
||||
int cols, |
||||
int src_step, |
||||
int src_offset, |
||||
int dx_buf_step, |
||||
int dx_buf_offset, |
||||
int dy_buf_step, |
||||
int dy_buf_offset |
||||
) |
||||
{ |
||||
//src_step /= sizeof(*src); |
||||
//src_offset /= sizeof(*src); |
||||
dx_buf_step /= sizeof(*dx_buf); |
||||
dx_buf_offset /= sizeof(*dx_buf); |
||||
dy_buf_step /= sizeof(*dy_buf); |
||||
dy_buf_offset /= sizeof(*dy_buf); |
||||
|
||||
int gidx = get_global_id(0); |
||||
int gidy = get_global_id(1); |
||||
|
||||
int lidx = get_local_id(0); |
||||
int lidy = get_local_id(1); |
||||
|
||||
__local int smem[16][18]; |
||||
|
||||
if(gidy < rows) |
||||
{ |
||||
smem[lidy][lidx + 1] = src[gidx + gidy * src_step + src_offset]; |
||||
if(lidx == 0) |
||||
{ |
||||
smem[lidy][0] = src[max(gidx - 1, 0) + gidy * src_step + src_offset]; |
||||
smem[lidy][17] = src[min(gidx + 16, cols - 1) + gidy * src_step + src_offset]; |
||||
} |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
if(gidx < cols) |
||||
{ |
||||
dx_buf[gidx + gidy * dx_buf_step + dx_buf_offset] = |
||||
-smem[lidy][lidx] + smem[lidy][lidx + 2]; |
||||
dy_buf[gidx + gidy * dy_buf_step + dy_buf_offset] = |
||||
smem[lidy][lidx] + 2 * smem[lidy][lidx + 1] + smem[lidy][lidx + 2]; |
||||
} |
||||
} |
||||
} |
||||
|
||||
// calculate the magnitude of the filter pass combining both x and y directions |
||||
// This is the buffered version(3x3 sobel) |
||||
// |
||||
// dx_buf dx buffer, calculated from calcSobelRowPass |
||||
// dy_buf dy buffer, calculated from calcSobelRowPass |
||||
// dx direvitive in x direction output |
||||
// dy direvitive in y direction output |
||||
// mag magnitude direvitive of xy output |
||||
__kernel |
||||
void calcMagnitude_buf |
||||
( |
||||
__global const int * dx_buf, |
||||
__global const int * dy_buf, |
||||
__global int * dx, |
||||
__global int * dy, |
||||
__global float * mag, |
||||
int rows, |
||||
int cols, |
||||
int dx_buf_step, |
||||
int dx_buf_offset, |
||||
int dy_buf_step, |
||||
int dy_buf_offset, |
||||
int dx_step, |
||||
int dx_offset, |
||||
int dy_step, |
||||
int dy_offset, |
||||
int mag_step, |
||||
int mag_offset |
||||
) |
||||
{ |
||||
dx_buf_step /= sizeof(*dx_buf); |
||||
dx_buf_offset /= sizeof(*dx_buf); |
||||
dy_buf_step /= sizeof(*dy_buf); |
||||
dy_buf_offset /= sizeof(*dy_buf); |
||||
dx_step /= sizeof(*dx); |
||||
dx_offset /= sizeof(*dx); |
||||
dy_step /= sizeof(*dy); |
||||
dy_offset /= sizeof(*dy); |
||||
mag_step /= sizeof(*mag); |
||||
mag_offset /= sizeof(*mag); |
||||
|
||||
int gidx = get_global_id(0); |
||||
int gidy = get_global_id(1); |
||||
|
||||
int lidx = get_local_id(0); |
||||
int lidy = get_local_id(1); |
||||
|
||||
__local int sdx[18][16]; |
||||
__local int sdy[18][16]; |
||||
|
||||
if(gidx < cols) |
||||
{ |
||||
sdx[lidy + 1][lidx] = dx_buf[gidx + gidy * dx_buf_step + dx_buf_offset]; |
||||
sdy[lidy + 1][lidx] = dy_buf[gidx + gidy * dy_buf_step + dy_buf_offset]; |
||||
if(lidy == 0) |
||||
{ |
||||
sdx[0][lidx] = dx_buf[gidx + max(gidy - 1, 0) * dx_buf_step + dx_buf_offset]; |
||||
sdx[17][lidx] = dx_buf[gidx + min(gidy + 16, rows - 1) * dx_buf_step + dx_buf_offset]; |
||||
|
||||
sdy[0][lidx] = dy_buf[gidx + max(gidy - 1, 0) * dy_buf_step + dy_buf_offset]; |
||||
sdy[17][lidx] = dy_buf[gidx + min(gidy + 16, rows - 1) * dy_buf_step + dy_buf_offset]; |
||||
} |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
if(gidy < rows) |
||||
{ |
||||
int x = sdx[lidy][lidx] + 2 * sdx[lidy + 1][lidx] + sdx[lidy + 2][lidx]; |
||||
int y = -sdy[lidy][lidx] + sdy[lidy + 2][lidx]; |
||||
|
||||
dx[gidx + gidy * dx_step + dx_offset] = x; |
||||
dy[gidx + gidy * dy_step + dy_offset] = y; |
||||
|
||||
mag[(gidx + 1) + (gidy + 1) * mag_step + mag_offset] = calc(x, y); |
||||
} |
||||
} |
||||
} |
||||
|
||||
// calculate the magnitude of the filter pass combining both x and y directions |
||||
// This is the non-buffered version(non-3x3 sobel) |
||||
// |
||||
// dx_buf dx buffer, calculated from calcSobelRowPass |
||||
// dy_buf dy buffer, calculated from calcSobelRowPass |
||||
// dx direvitive in x direction output |
||||
// dy direvitive in y direction output |
||||
// mag magnitude direvitive of xy output |
||||
__kernel |
||||
void calcMagnitude |
||||
( |
||||
__global const int * dx, |
||||
__global const int * dy, |
||||
__global float * mag, |
||||
int rows, |
||||
int cols, |
||||
int dx_step, |
||||
int dx_offset, |
||||
int dy_step, |
||||
int dy_offset, |
||||
int mag_step, |
||||
int mag_offset |
||||
) |
||||
{ |
||||
dx_step /= sizeof(*dx); |
||||
dx_offset /= sizeof(*dx); |
||||
dy_step /= sizeof(*dy); |
||||
dy_offset /= sizeof(*dy); |
||||
mag_step /= sizeof(*mag); |
||||
mag_offset /= sizeof(*mag); |
||||
|
||||
int gidx = get_global_id(0); |
||||
int gidy = get_global_id(1); |
||||
|
||||
if(gidy < rows && gidx < cols) |
||||
{ |
||||
mag[(gidx + 1) + (gidy + 1) * mag_step + mag_offset] = |
||||
calc( |
||||
dx[gidx + gidy * dx_step + dx_offset], |
||||
dy[gidx + gidy * dy_step + dy_offset] |
||||
); |
||||
} |
||||
} |
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////// |
||||
// 0.4142135623730950488016887242097 is tan(22.5) |
||||
#define CANNY_SHIFT 15 |
||||
#define TG22 (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5) |
||||
|
||||
//First pass of edge detection and non-maximum suppression |
||||
// edgetype is set to for each pixel: |
||||
// 0 - below low thres, not an edge |
||||
// 1 - maybe an edge |
||||
// 2 - is an edge, either magnitude is greater than high thres, or |
||||
// Given estimates of the image gradients, a search is then carried out |
||||
// to determine if the gradient magnitude assumes a local maximum in the gradient direction. |
||||
// if the rounded gradient angle is zero degrees (i.e. the edge is in the north-south direction) the point will be considered to be on the edge if its gradient magnitude is greater than the magnitudes in the west and east directions, |
||||
// if the rounded gradient angle is 90 degrees (i.e. the edge is in the east-west direction) the point will be considered to be on the edge if its gradient magnitude is greater than the magnitudes in the north and south directions, |
||||
// if the rounded gradient angle is 135 degrees (i.e. the edge is in the north east-south west direction) the point will be considered to be on the edge if its gradient magnitude is greater than the magnitudes in the north west and south east directions, |
||||
// if the rounded gradient angle is 45 degrees (i.e. the edge is in the north west-south east direction)the point will be considered to be on the edge if its gradient magnitude is greater than the magnitudes in the north east and south west directions. |
||||
// |
||||
// dx, dy direvitives of x and y direction |
||||
// mag magnitudes calculated from calcMagnitude function |
||||
// map output containing raw edge types |
||||
__kernel |
||||
void calcMap |
||||
( |
||||
__global const int * dx, |
||||
__global const int * dy, |
||||
__global const float * mag, |
||||
__global int * map, |
||||
int rows, |
||||
int cols, |
||||
float low_thresh, |
||||
float high_thresh, |
||||
int dx_step, |
||||
int dx_offset, |
||||
int dy_step, |
||||
int dy_offset, |
||||
int mag_step, |
||||
int mag_offset, |
||||
int map_step, |
||||
int map_offset |
||||
) |
||||
{ |
||||
dx_step /= sizeof(*dx); |
||||
dx_offset /= sizeof(*dx); |
||||
dy_step /= sizeof(*dy); |
||||
dy_offset /= sizeof(*dy); |
||||
mag_step /= sizeof(*mag); |
||||
mag_offset /= sizeof(*mag); |
||||
map_step /= sizeof(*map); |
||||
map_offset /= sizeof(*map); |
||||
|
||||
__local float smem[18][18]; |
||||
|
||||
int gidx = get_global_id(0); |
||||
int gidy = get_global_id(1); |
||||
|
||||
int lidx = get_local_id(0); |
||||
int lidy = get_local_id(1); |
||||
|
||||
int grp_idx = get_global_id(0) & 0xFFFFF0; |
||||
int grp_idy = get_global_id(1) & 0xFFFFF0; |
||||
|
||||
int tid = lidx + lidy * 16; |
||||
int lx = tid % 18; |
||||
int ly = tid / 18; |
||||
if(ly < 14) |
||||
{ |
||||
smem[ly][lx] = mag[grp_idx + lx + (grp_idy + ly) * mag_step]; |
||||
} |
||||
if(ly < 4 && grp_idy + ly + 14 <= rows && grp_idx + lx <= cols) |
||||
{ |
||||
smem[ly + 14][lx] = mag[grp_idx + lx + (grp_idy + ly + 14) * mag_step]; |
||||
} |
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
if(gidy < rows && gidx < cols) |
||||
{ |
||||
int x = dx[gidx + gidy * dx_step]; |
||||
int y = dy[gidx + gidy * dy_step]; |
||||
const int s = (x ^ y) < 0 ? -1 : 1; |
||||
const float m = smem[lidy + 1][lidx + 1]; |
||||
x = abs(x); |
||||
y = abs(y); |
||||
|
||||
// 0 - the pixel can not belong to an edge |
||||
// 1 - the pixel might belong to an edge |
||||
// 2 - the pixel does belong to an edge |
||||
int edge_type = 0; |
||||
if(m > low_thresh) |
||||
{ |
||||
const int tg22x = x * TG22; |
||||
const int tg67x = tg22x + (x << (1 + CANNY_SHIFT)); |
||||
y <<= CANNY_SHIFT; |
||||
if(y < tg22x) |
||||
{ |
||||
if(m > smem[lidy + 1][lidx] && m >= smem[lidy + 1][lidx + 2]) |
||||
{ |
||||
edge_type = 1 + (int)(m > high_thresh); |
||||
} |
||||
} |
||||
else if (y > tg67x) |
||||
{ |
||||
if(m > smem[lidy][lidx + 1]&& m >= smem[lidy + 2][lidx + 1]) |
||||
{ |
||||
edge_type = 1 + (int)(m > high_thresh); |
||||
} |
||||
} |
||||
else |
||||
{ |
||||
if(m > smem[lidy][lidx + 1 - s]&& m > smem[lidy + 2][lidx + 1 + s]) |
||||
{ |
||||
edge_type = 1 + (int)(m > high_thresh); |
||||
} |
||||
} |
||||
} |
||||
map[gidx + 1 + (gidy + 1) * map_step] = edge_type; |
||||
} |
||||
} |
||||
|
||||
// non local memory version |
||||
__kernel |
||||
void calcMap_2 |
||||
( |
||||
__global const int * dx, |
||||
__global const int * dy, |
||||
__global const float * mag, |
||||
__global int * map, |
||||
int rows, |
||||
int cols, |
||||
float low_thresh, |
||||
float high_thresh, |
||||
int dx_step, |
||||
int dx_offset, |
||||
int dy_step, |
||||
int dy_offset, |
||||
int mag_step, |
||||
int mag_offset, |
||||
int map_step, |
||||
int map_offset |
||||
) |
||||
{ |
||||
dx_step /= sizeof(*dx); |
||||
dx_offset /= sizeof(*dx); |
||||
dy_step /= sizeof(*dy); |
||||
dy_offset /= sizeof(*dy); |
||||
mag_step /= sizeof(*mag); |
||||
mag_offset /= sizeof(*mag); |
||||
map_step /= sizeof(*map); |
||||
map_offset /= sizeof(*map); |
||||
|
||||
|
||||
int gidx = get_global_id(0); |
||||
int gidy = get_global_id(1); |
||||
|
||||
if(gidy < rows && gidx < cols) |
||||
{ |
||||
int x = dx[gidx + gidy * dx_step]; |
||||
int y = dy[gidx + gidy * dy_step]; |
||||
const int s = (x ^ y) < 0 ? -1 : 1; |
||||
const float m = mag[gidx + 1 + (gidy + 1) * mag_step]; |
||||
x = abs(x); |
||||
y = abs(y); |
||||
|
||||
// 0 - the pixel can not belong to an edge |
||||
// 1 - the pixel might belong to an edge |
||||
// 2 - the pixel does belong to an edge |
||||
int edge_type = 0; |
||||
if(m > low_thresh) |
||||
{ |
||||
const int tg22x = x * TG22; |
||||
const int tg67x = tg22x + (x << (1 + CANNY_SHIFT)); |
||||
y <<= CANNY_SHIFT; |
||||
if(y < tg22x) |
||||
{ |
||||
if(m > mag[gidx + (gidy + 1) * mag_step] && m >= mag[gidx + 2 + (gidy + 1) * mag_step]) |
||||
{ |
||||
edge_type = 1 + (int)(m > high_thresh); |
||||
} |
||||
} |
||||
else if (y > tg67x) |
||||
{ |
||||
if(m > mag[gidx + 1 + gidy* mag_step] && m >= mag[gidx + 1 + (gidy + 2) * mag_step]) |
||||
{ |
||||
edge_type = 1 + (int)(m > high_thresh); |
||||
} |
||||
} |
||||
else |
||||
{ |
||||
if(m > mag[gidx + 1 - s + gidy * mag_step] && m > mag[gidx + 1 + s + (gidy + 2) * mag_step]) |
||||
{ |
||||
edge_type = 1 + (int)(m > high_thresh); |
||||
} |
||||
} |
||||
} |
||||
map[gidx + 1 + (gidy + 1) * map_step] = edge_type; |
||||
} |
||||
} |
||||
|
||||
// [256, 1, 1] threaded, local memory version |
||||
__kernel |
||||
void calcMap_3 |
||||
( |
||||
__global const int * dx, |
||||
__global const int * dy, |
||||
__global const float * mag, |
||||
__global int * map, |
||||
int rows, |
||||
int cols, |
||||
float low_thresh, |
||||
float high_thresh, |
||||
int dx_step, |
||||
int dx_offset, |
||||
int dy_step, |
||||
int dy_offset, |
||||
int mag_step, |
||||
int mag_offset, |
||||
int map_step, |
||||
int map_offset |
||||
) |
||||
{ |
||||
dx_step /= sizeof(*dx); |
||||
dx_offset /= sizeof(*dx); |
||||
dy_step /= sizeof(*dy); |
||||
dy_offset /= sizeof(*dy); |
||||
mag_step /= sizeof(*mag); |
||||
mag_offset /= sizeof(*mag); |
||||
map_step /= sizeof(*map); |
||||
map_offset /= sizeof(*map); |
||||
|
||||
__local float smem[18][18]; |
||||
|
||||
int lidx = get_local_id(0) % 16; |
||||
int lidy = get_local_id(0) / 16; |
||||
|
||||
int grp_pix = get_global_id(0); // identifies which pixel is processing currently in the target block |
||||
int grp_ind = get_global_id(1); // identifies which block of pixels is currently processing |
||||
|
||||
int grp_idx = (grp_ind % (cols/16)) * 16; |
||||
int grp_idy = (grp_ind / (cols/16)) * 16; //(grp_ind / (cols/16)) * 16 |
||||
|
||||
int gidx = grp_idx + lidx; |
||||
int gidy = grp_idy + lidy; |
||||
|
||||
int tid = get_global_id(0) % 256; |
||||
int lx = tid % 18; |
||||
int ly = tid / 18; |
||||
if(ly < 14) |
||||
{ |
||||
smem[ly][lx] = mag[grp_idx + lx + (grp_idy + ly) * mag_step]; |
||||
} |
||||
if(ly < 4 && grp_idy + ly + 14 <= rows && grp_idx + lx <= cols) |
||||
{ |
||||
smem[ly + 14][lx] = mag[grp_idx + lx + (grp_idy + ly + 14) * mag_step]; |
||||
} |
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
if(gidy < rows && gidx < cols) |
||||
{ |
||||
int x = dx[gidx + gidy * dx_step]; |
||||
int y = dy[gidx + gidy * dy_step]; |
||||
const int s = (x ^ y) < 0 ? -1 : 1; |
||||
const float m = smem[lidy + 1][lidx + 1]; |
||||
x = abs(x); |
||||
y = abs(y); |
||||
|
||||
// 0 - the pixel can not belong to an edge |
||||
// 1 - the pixel might belong to an edge |
||||
// 2 - the pixel does belong to an edge |
||||
int edge_type = 0; |
||||
if(m > low_thresh) |
||||
{ |
||||
const int tg22x = x * TG22; |
||||
const int tg67x = tg22x + (x << (1 + CANNY_SHIFT)); |
||||
y <<= CANNY_SHIFT; |
||||
if(y < tg22x) |
||||
{ |
||||
if(m > smem[lidy + 1][lidx] && m >= smem[lidy + 1][lidx + 2]) |
||||
{ |
||||
edge_type = 1 + (int)(m > high_thresh); |
||||
} |
||||
} |
||||
else if (y > tg67x) |
||||
{ |
||||
if(m > smem[lidy][lidx + 1]&& m >= smem[lidy + 2][lidx + 1]) |
||||
{ |
||||
edge_type = 1 + (int)(m > high_thresh); |
||||
} |
||||
} |
||||
else |
||||
{ |
||||
if(m > smem[lidy][lidx + 1 - s]&& m > smem[lidy + 2][lidx + 1 + s]) |
||||
{ |
||||
edge_type = 1 + (int)(m > high_thresh); |
||||
} |
||||
} |
||||
} |
||||
map[gidx + 1 + (gidy + 1) * map_step] = edge_type; |
||||
} |
||||
} |
||||
|
||||
#undef CANNY_SHIFT |
||||
#undef TG22 |
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////// |
||||
// do Hysteresis for pixel whose edge type is 1 |
||||
// |
||||
// If candidate pixel (edge type is 1) has a neighbour pixel (in 3x3 area) with type 2, it is believed to be part of an edge and |
||||
// marked as edge. Each thread will iterate for 16 times to connect local edges. |
||||
// Candidate pixel being identified as edge will then be tested if there is nearby potiential edge points. If there is, counter will |
||||
// be incremented by 1 and the point location is stored. These potiential candidates will be processed further in next kernel. |
||||
// |
||||
// map raw edge type results calculated from calcMap. |
||||
// st the potiential edge points found in this kernel call |
||||
// counter the number of potiential edge points |
||||
__kernel |
||||
void edgesHysteresisLocal |
||||
( |
||||
__global int * map, |
||||
__global ushort2 * st, |
||||
volatile __global unsigned int * counter, |
||||
int rows, |
||||
int cols, |
||||
int map_step, |
||||
int map_offset |
||||
) |
||||
{ |
||||
map_step /= sizeof(*map); |
||||
map_offset /= sizeof(*map); |
||||
|
||||
__local int smem[18][18]; |
||||
|
||||
int gidx = get_global_id(0); |
||||
int gidy = get_global_id(1); |
||||
|
||||
int lidx = get_local_id(0); |
||||
int lidy = get_local_id(1); |
||||
|
||||
int grp_idx = get_global_id(0) & 0xFFFFF0; |
||||
int grp_idy = get_global_id(1) & 0xFFFFF0; |
||||
|
||||
int tid = lidx + lidy * 16; |
||||
int lx = tid % 18; |
||||
int ly = tid / 18; |
||||
if(ly < 14) |
||||
{ |
||||
smem[ly][lx] = map[grp_idx + lx + (grp_idy + ly) * map_step + map_offset]; |
||||
} |
||||
if(ly < 4 && grp_idy + ly + 14 <= rows && grp_idx + lx <= cols) |
||||
{ |
||||
smem[ly + 14][lx] = map[grp_idx + lx + (grp_idy + ly + 14) * map_step + map_offset]; |
||||
} |
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
if(gidy < rows && gidx < cols) |
||||
{ |
||||
int n; |
||||
|
||||
#pragma unroll |
||||
for (int k = 0; k < 16; ++k) |
||||
{ |
||||
n = 0; |
||||
|
||||
if (smem[lidy + 1][lidx + 1] == 1) |
||||
{ |
||||
n += smem[lidy ][lidx ] == 2; |
||||
n += smem[lidy ][lidx + 1] == 2; |
||||
n += smem[lidy ][lidx + 2] == 2; |
||||
|
||||
n += smem[lidy + 1][lidx ] == 2; |
||||
n += smem[lidy + 1][lidx + 2] == 2; |
||||
|
||||
n += smem[lidy + 2][lidx ] == 2; |
||||
n += smem[lidy + 2][lidx + 1] == 2; |
||||
n += smem[lidy + 2][lidx + 2] == 2; |
||||
} |
||||
|
||||
if (n > 0) |
||||
smem[lidy + 1][lidx + 1] = 2; |
||||
} |
||||
|
||||
const int e = smem[lidy + 1][lidx + 1]; |
||||
map[gidx + 1 + (gidy + 1) * map_step] = e; |
||||
|
||||
n = 0; |
||||
if(e == 2) |
||||
{ |
||||
n += smem[lidy ][lidx ] == 1; |
||||
n += smem[lidy ][lidx + 1] == 1; |
||||
n += smem[lidy ][lidx + 2] == 1; |
||||
|
||||
n += smem[lidy + 1][lidx ] == 1; |
||||
n += smem[lidy + 1][lidx + 2] == 1; |
||||
|
||||
n += smem[lidy + 2][lidx ] == 1; |
||||
n += smem[lidy + 2][lidx + 1] == 1; |
||||
n += smem[lidy + 2][lidx + 2] == 1; |
||||
} |
||||
|
||||
if(n > 0) |
||||
{ |
||||
unsigned int ind = atomic_inc(counter); |
||||
st[ind] = (ushort2)(gidx + 1, gidy + 1); |
||||
} |
||||
} |
||||
} |
||||
|
||||
__constant int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1}; |
||||
__constant c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1}; |
||||
|
||||
#define stack_size 512 |
||||
__kernel |
||||
void edgesHysteresisGlobal |
||||
( |
||||
__global int * map, |
||||
__global ushort2 * st1, |
||||
__global ushort2 * st2, |
||||
volatile __global int * counter, |
||||
int rows, |
||||
int cols, |
||||
int count, |
||||
int map_step, |
||||
int map_offset |
||||
) |
||||
{ |
||||
|
||||
map_step /= sizeof(*map); |
||||
map_offset /= sizeof(*map); |
||||
|
||||
int gidx = get_global_id(0); |
||||
int gidy = get_global_id(1); |
||||
|
||||
int lidx = get_local_id(0); |
||||
int lidy = get_local_id(1); |
||||
|
||||
int grp_idx = get_group_id(0); |
||||
int grp_idy = get_group_id(1); |
||||
|
||||
volatile __local unsigned int s_counter; |
||||
__local unsigned int s_ind; |
||||
|
||||
__local ushort2 s_st[stack_size]; |
||||
|
||||
if(lidx == 0) |
||||
{ |
||||
s_counter = 0; |
||||
} |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
int ind = grp_idy * get_num_groups(0) + grp_idx; |
||||
|
||||
if(ind < count) |
||||
{ |
||||
ushort2 pos = st1[ind]; |
||||
if (pos.x > 0 && pos.x <= cols && pos.y > 0 && pos.y <= rows) |
||||
{ |
||||
if (lidx < 8) |
||||
{ |
||||
pos.x += c_dx[lidx]; |
||||
pos.y += c_dy[lidx]; |
||||
|
||||
if (map[pos.x + pos.y * map_step] == 1) |
||||
{ |
||||
map[pos.x + pos.y * map_step] = 2; |
||||
|
||||
ind = atomic_inc(&s_counter); |
||||
|
||||
s_st[ind] = pos; |
||||
} |
||||
} |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
while (s_counter > 0 && s_counter <= stack_size - get_num_groups(0)) |
||||
{ |
||||
const int subTaskIdx = lidx >> 3; |
||||
const int portion = min(s_counter, get_num_groups(0) >> 3); |
||||
|
||||
pos.x = pos.y = 0; |
||||
|
||||
if (subTaskIdx < portion) |
||||
pos = s_st[s_counter - 1 - subTaskIdx]; |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
if (lidx == 0) |
||||
s_counter -= portion; |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
if (pos.x > 0 && pos.x <= cols && pos.y > 0 && pos.y <= rows) |
||||
{ |
||||
pos.x += c_dx[lidx & 7]; |
||||
pos.y += c_dy[lidx & 7]; |
||||
|
||||
if (map[pos.x + map_offset + pos.y * map_step] == 1) |
||||
{ |
||||
map[pos.x + map_offset + pos.y * map_step] = 2; |
||||
|
||||
ind = atomic_inc(&s_counter); |
||||
|
||||
s_st[ind] = pos; |
||||
} |
||||
} |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
} |
||||
|
||||
if (s_counter > 0) |
||||
{ |
||||
if (lidx == 0) |
||||
{ |
||||
ind = atomic_add(counter, s_counter); |
||||
s_ind = ind - s_counter; |
||||
} |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
ind = s_ind; |
||||
|
||||
for (int i = lidx; i < s_counter; i += get_num_groups(0)) |
||||
{ |
||||
st2[ind + i] = s_st[i]; |
||||
} |
||||
} |
||||
} |
||||
} |
||||
} |
||||
#undef stack_size |
||||
|
||||
//Get the edge result. egde type of value 2 will be marked as an edge point and set to 255. Otherwise 0. |
||||
// map edge type mappings |
||||
// dst edge output |
||||
__kernel |
||||
void getEdges |
||||
( |
||||
__global const int * map, |
||||
__global uchar * dst, |
||||
int rows, |
||||
int cols, |
||||
int map_step, |
||||
int map_offset, |
||||
int dst_step, |
||||
int dst_offset |
||||
) |
||||
{ |
||||
map_step /= sizeof(*map); |
||||
map_offset /= sizeof(*map); |
||||
//dst_step /= sizeof(*dst); |
||||
//dst_offset /= sizeof(*dst); |
||||
|
||||
int gidx = get_global_id(0); |
||||
int gidy = get_global_id(1); |
||||
|
||||
if(gidy < rows && gidx < cols) |
||||
{ |
||||
//dst[gidx + gidy * dst_step] = map[gidx + 1 + (gidy + 1) * map_step] == 2 ? 255: 0; |
||||
dst[gidx + gidy * dst_step] = (uchar)(-(map[gidx + 1 + (gidy + 1) * map_step] / 2)); |
||||
} |
||||
} |
@ -0,0 +1,112 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Peng Xiao, pengxiao@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp" |
||||
|
||||
#ifdef WIN32 |
||||
#define FILTER_IMAGE "C:/Users/Public/Pictures/Sample Pictures/Penguins.jpg" |
||||
#else |
||||
#define FILTER_IMAGE "/Users/Test/Valve_original.PNG" // user need to specify a valid image path
|
||||
#endif |
||||
#define SHOW_RESULT 0 |
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// Canny
|
||||
|
||||
IMPLEMENT_PARAM_CLASS(AppertureSize, int); |
||||
IMPLEMENT_PARAM_CLASS(L2gradient, bool); |
||||
|
||||
PARAM_TEST_CASE(Canny, AppertureSize, L2gradient) |
||||
{ |
||||
int apperture_size; |
||||
bool useL2gradient; |
||||
|
||||
cv::Mat edges_gold; |
||||
std::vector<cv::ocl::Info> oclinfo; |
||||
virtual void SetUp() |
||||
{ |
||||
apperture_size = GET_PARAM(0); |
||||
useL2gradient = GET_PARAM(1); |
||||
int devnums = getDevice(oclinfo); |
||||
CV_Assert(devnums > 0); |
||||
} |
||||
}; |
||||
|
||||
TEST_P(Canny, Accuracy) |
||||
{ |
||||
cv::Mat img = readImage(FILTER_IMAGE, cv::IMREAD_GRAYSCALE); |
||||
ASSERT_FALSE(img.empty()); |
||||
|
||||
double low_thresh = 50.0; |
||||
double high_thresh = 100.0; |
||||
|
||||
cv::resize(img, img, cv::Size(512, 384)); |
||||
cv::ocl::oclMat ocl_img = cv::ocl::oclMat(img); |
||||
|
||||
cv::ocl::oclMat edges; |
||||
cv::ocl::Canny(ocl_img, edges, low_thresh, high_thresh, apperture_size, useL2gradient); |
||||
|
||||
char filename [100]; |
||||
sprintf(filename, "G:/Valve_edges_a%d_L2Grad%d.jpg", apperture_size, (int)useL2gradient); |
||||
|
||||
cv::Mat edges_gold; |
||||
cv::Canny(img, edges_gold, low_thresh, high_thresh, apperture_size, useL2gradient); |
||||
|
||||
#if SHOW_RESULT |
||||
cv::Mat edges_x2, ocl_edges(edges); |
||||
edges_x2.create(edges.rows, edges.cols * 2, edges.type()); |
||||
edges_x2.setTo(0); |
||||
cv::add(edges_gold,cv::Mat(edges_x2,cv::Rect(0,0,edges_gold.cols,edges_gold.rows)), cv::Mat(edges_x2,cv::Rect(0,0,edges_gold.cols,edges_gold.rows))); |
||||
cv::add(ocl_edges,cv::Mat(edges_x2,cv::Rect(edges_gold.cols,0,edges_gold.cols,edges_gold.rows)), cv::Mat(edges_x2,cv::Rect(edges_gold.cols,0,edges_gold.cols,edges_gold.rows))); |
||||
cv::namedWindow("Canny result (left: cpu, right: ocl)"); |
||||
cv::imshow("Canny result (left: cpu, right: ocl)", edges_x2); |
||||
cv::waitKey(); |
||||
#endif //OUTPUT_RESULT
|
||||
EXPECT_MAT_SIMILAR(edges_gold, edges, 1e-2); |
||||
} |
||||
|
||||
INSTANTIATE_TEST_CASE_P(ocl_ImgProc, Canny, testing::Combine( |
||||
testing::Values(AppertureSize(3), AppertureSize(5)), |
||||
testing::Values(L2gradient(false), L2gradient(true)))); |
Loading…
Reference in new issue