remove warnings of some functions in ocl module

pull/32/head
yao 12 years ago
parent 82b30963d2
commit 2e36338636
  1. 26
      modules/ocl/src/canny.cpp
  2. 8
      modules/ocl/src/kernels/imgproc_canny.cl
  3. 15
      modules/ocl/src/match_template.cpp
  4. 459
      modules/ocl/src/surf.cpp
  5. 1
      modules/ocl/test/test_match_template.cpp

@ -75,13 +75,13 @@ cv::ocl::CannyBuf::CannyBuf(const oclMat& dx_, const oclMat& dy_) : dx(dx_), dy(
void cv::ocl::CannyBuf::create(const Size& image_size, int apperture_size)
{
dx.create(image_size, CV_32SC1);
dy.create(image_size, CV_32SC1);
ensureSizeIsEnough(image_size, CV_32SC1, dx);
ensureSizeIsEnough(image_size, CV_32SC1, dy);
if(apperture_size == 3)
{
dx_buf.create(image_size, CV_32SC1);
dy_buf.create(image_size, CV_32SC1);
ensureSizeIsEnough(image_size, CV_32SC1, dx_buf);
ensureSizeIsEnough(image_size, CV_32SC1, dy_buf);
}
else if(apperture_size > 0)
{
@ -95,18 +95,18 @@ void cv::ocl::CannyBuf::create(const Size& image_size, int apperture_size)
filterDY = createDerivFilter_GPU(CV_8U, CV_32S, 0, 1, apperture_size, BORDER_REPLICATE);
}
}
edgeBuf.create(image_size.height + 2, image_size.width + 2, CV_32FC1);
ensureSizeIsEnough(image_size.height + 2, image_size.width + 2, CV_32FC1, edgeBuf);
trackBuf1.create(1, image_size.width * image_size.height, CV_16UC2);
trackBuf2.create(1, image_size.width * image_size.height, CV_16UC2);
ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf1);
ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf2);
float counter_f [1] = { 0 };
int counter_i [1] = { 0 };
int err = 0;
if(counter)
{
openCLFree(counter);
}
counter = clCreateBuffer( Context::getContext()->impl->clContext, CL_MEM_COPY_HOST_PTR, sizeof(float), counter_f, &err );
counter = clCreateBuffer( Context::getContext()->impl->clContext, CL_MEM_COPY_HOST_PTR, sizeof(int), counter_i, &err );
openCLSafeCall(err);
}
@ -357,16 +357,18 @@ void canny::edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, void * counter, i
void canny::edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, void * counter, int rows, int cols)
{
unsigned int count;
openCLSafeCall(clEnqueueReadBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(float), &count, NULL, NULL, NULL));
openCLSafeCall(clEnqueueReadBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(float), &count, 0, NULL, NULL));
Context *clCxt = map.clCxt;
string kernelName = "edgesHysteresisGlobal";
vector< pair<size_t, const void *> > args;
size_t localThreads[3] = {128, 1, 1};
#define DIVUP(a, b) ((a)+(b)-1)/(b)
int count_i[1] = {0};
while(count > 0)
{
openCLSafeCall(clEnqueueWriteBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(int), &count_i, 0, NULL, NULL));
args.clear();
size_t globalThreads[3] = {std::min(count, 65535u) * 128, DIVUP(count, 65535), 1};
args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
@ -380,7 +382,7 @@ void canny::edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, voi
args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
openCLSafeCall(clEnqueueReadBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(float), &count, NULL, NULL, NULL));
openCLSafeCall(clEnqueueReadBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL));
std::swap(st1, st2);
}
#undef DIVUP

@ -601,7 +601,7 @@ __kernel
{
int n;
#pragma unroll
#pragma unroll
for (int k = 0; k < 16; ++k)
{
n = 0;
@ -686,12 +686,6 @@ __kernel
__local ushort2 s_st[stack_size];
if(gidx + gidy == 0)
{
*counter = 0;
}
barrier(CLK_GLOBAL_MEM_FENCE);
if(lidx == 0)
{
s_counter = 0;

@ -114,7 +114,7 @@ namespace cv { namespace ocl
//////////////////////////////////////////////////////////////////////
// SQDIFF
void matchTemplate_SQDIFF(
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &)
{
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth()))
@ -167,10 +167,11 @@ namespace cv { namespace ocl
}
void matchTemplateNaive_SQDIFF(
const oclMat& image, const oclMat& templ, oclMat& result, int cn)
const oclMat& image, const oclMat& templ, oclMat& result, int)
{
CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
|| (image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F);
|| ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F)
);
CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1);
CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
@ -263,10 +264,11 @@ namespace cv { namespace ocl
}
void matchTemplateNaive_CCORR(
const oclMat& image, const oclMat& templ, oclMat& result, int cn)
const oclMat& image, const oclMat& templ, oclMat& result, int)
{
CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
|| (image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F);
|| ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F)
);
CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1);
CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
@ -341,6 +343,7 @@ namespace cv { namespace ocl
templ_sum = sum(templ) / templ.size().area();
buf.image_sums.resize(buf.images.size());
for(int i = 0; i < image.channels(); i ++)
{
integral(buf.images[i], buf.image_sums[i]);
@ -408,7 +411,7 @@ namespace cv { namespace ocl
#else
oclMat templ_sqr = templ;
multiply(templ,templ, templ_sqr);
templ_sqsum = sum(templ_sqr)[0];
templ_sqsum = saturate_cast<float>(sum(templ_sqr)[0]);
#endif //SQRSUM_FIXED
templ_sqsum -= scale * templ_sum * templ_sum;
templ_sum *= scale;

@ -44,7 +44,7 @@
//M*/
#include <iomanip>
#include "precomp.hpp"
#include "opencv2/highgui/highgui.hpp"
using namespace cv;
using namespace cv::ocl;
@ -72,195 +72,197 @@ namespace cv { namespace ocl
extern const char * nonfree_surf;
}}
namespace
static inline int divUp(int total, int grain)
{
static inline int divUp(int total, int grain)
{
return (total + grain - 1) / grain;
}
static inline int calcSize(int octave, int layer)
{
/* Wavelet size at first layer of first octave. */
const int HAAR_SIZE0 = 9;
return (total + grain - 1) / grain;
}
static inline int calcSize(int octave, int layer)
{
/* Wavelet size at first layer of first octave. */
const int HAAR_SIZE0 = 9;
/* Wavelet size increment between layers. This should be an even number,
such that the wavelet sizes in an octave are either all even or all odd.
This ensures that when looking for the neighbours of a sample, the layers
/* Wavelet size increment between layers. This should be an even number,
such that the wavelet sizes in an octave are either all even or all odd.
This ensures that when looking for the neighbours of a sample, the layers
above and below are aligned correctly. */
const int HAAR_SIZE_INC = 6;
above and below are aligned correctly. */
const int HAAR_SIZE_INC = 6;
return (HAAR_SIZE0 + HAAR_SIZE_INC * layer) << octave;
}
return (HAAR_SIZE0 + HAAR_SIZE_INC * layer) << octave;
}
class SURF_OCL_Invoker
{
public:
// facilities
void bindImgTex(const oclMat& img);
void bindSumTex(const oclMat& sum);
void bindMaskSumTex(const oclMat& maskSum);
class SURF_OCL_Invoker
{
public:
// facilities
void bindImgTex(const oclMat& img, cl_mem & texture);
//void loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold);
//void loadOctaveConstants(int octave, int layer_rows, int layer_cols);
//void loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold);
//void loadOctaveConstants(int octave, int layer_rows, int layer_cols);
// kernel callers declearations
void icvCalcLayerDetAndTrace_gpu(oclMat& det, oclMat& trace, int octave, int nOctaveLayers, int layer_rows);
// kernel callers declearations
void icvCalcLayerDetAndTrace_gpu(oclMat& det, oclMat& trace, int octave, int nOctaveLayers, int layer_rows);
void icvFindMaximaInLayer_gpu(const oclMat& det, const oclMat& trace, oclMat& maxPosBuffer, oclMat& maxCounter, int counterOffset,
int octave, bool use_mask, int nLayers, int layer_rows, int layer_cols);
void icvFindMaximaInLayer_gpu(const oclMat& det, const oclMat& trace, oclMat& maxPosBuffer, oclMat& maxCounter, int counterOffset,
int octave, bool use_mask, int nLayers, int layer_rows, int layer_cols);
void icvInterpolateKeypoint_gpu(const oclMat& det, const oclMat& maxPosBuffer, unsigned int maxCounter,
oclMat& keypoints, oclMat& counters, int octave, int layer_rows, int maxFeatures);
void icvInterpolateKeypoint_gpu(const oclMat& det, const oclMat& maxPosBuffer, unsigned int maxCounter,
oclMat& keypoints, oclMat& counters, int octave, int layer_rows, int maxFeatures);
void icvCalcOrientation_gpu(const oclMat& keypoints, int nFeatures);
void icvCalcOrientation_gpu(const oclMat& keypoints, int nFeatures);
void compute_descriptors_gpu(const oclMat& descriptors, const oclMat& keypoints, int nFeatures);
// end of kernel callers declearations
void compute_descriptors_gpu(const oclMat& descriptors, const oclMat& keypoints, int nFeatures);
// end of kernel callers declearations
SURF_OCL_Invoker(SURF_OCL& surf, const oclMat& img, const oclMat& mask) :
surf_(surf),
img_cols(img.cols), img_rows(img.rows),
use_mask(!mask.empty()),
imgTex(NULL), sumTex(NULL), maskSumTex(NULL)
{
CV_Assert(!img.empty() && img.type() == CV_8UC1);
CV_Assert(mask.empty() || (mask.size() == img.size() && mask.type() == CV_8UC1));
CV_Assert(surf_.nOctaves > 0 && surf_.nOctaveLayers > 0);
SURF_OCL_Invoker(SURF_OCL& surf, const oclMat& img, const oclMat& mask) :
surf_(surf),
img_cols(img.cols), img_rows(img.rows),
use_mask(!mask.empty()),
imgTex(NULL), sumTex(NULL), maskSumTex(NULL)
{
CV_Assert(!img.empty() && img.type() == CV_8UC1);
CV_Assert(mask.empty() || (mask.size() == img.size() && mask.type() == CV_8UC1));
CV_Assert(surf_.nOctaves > 0 && surf_.nOctaveLayers > 0);
const int min_size = calcSize(surf_.nOctaves - 1, 0);
CV_Assert(img_rows - min_size >= 0);
CV_Assert(img_cols - min_size >= 0);
const int min_size = calcSize(surf_.nOctaves - 1, 0);
CV_Assert(img_rows - min_size >= 0);
CV_Assert(img_cols - min_size >= 0);
const int layer_rows = img_rows >> (surf_.nOctaves - 1);
const int layer_cols = img_cols >> (surf_.nOctaves - 1);
const int min_margin = ((calcSize((surf_.nOctaves - 1), 2) >> 1) >> (surf_.nOctaves - 1)) + 1;
CV_Assert(layer_rows - 2 * min_margin > 0);
CV_Assert(layer_cols - 2 * min_margin > 0);
const int layer_rows = img_rows >> (surf_.nOctaves - 1);
const int layer_cols = img_cols >> (surf_.nOctaves - 1);
const int min_margin = ((calcSize((surf_.nOctaves - 1), 2) >> 1) >> (surf_.nOctaves - 1)) + 1;
CV_Assert(layer_rows - 2 * min_margin > 0);
CV_Assert(layer_cols - 2 * min_margin > 0);
maxFeatures = std::min(static_cast<int>(img.size().area() * surf.keypointsRatio), 65535);
maxCandidates = std::min(static_cast<int>(1.5 * maxFeatures), 65535);
maxFeatures = std::min(static_cast<int>(img.size().area() * surf.keypointsRatio), 65535);
maxCandidates = std::min(static_cast<int>(1.5 * maxFeatures), 65535);
CV_Assert(maxFeatures > 0);
CV_Assert(maxFeatures > 0);
counters.create(1, surf_.nOctaves + 1, CV_32SC1);
counters.setTo(Scalar::all(0));
counters.create(1, surf_.nOctaves + 1, CV_32SC1);
counters.setTo(Scalar::all(0));
//loadGlobalConstants(maxCandidates, maxFeatures, img_rows, img_cols, surf_.nOctaveLayers, static_cast<float>(surf_.hessianThreshold));
//loadGlobalConstants(maxCandidates, maxFeatures, img_rows, img_cols, surf_.nOctaveLayers, static_cast<float>(surf_.hessianThreshold));
bindImgTex(img);
integral(img, surf_.sum); // the two argumented integral version is incorrect
bindImgTex(img, imgTex);
integral(img, surf_.sum); // the two argumented integral version is incorrect
bindSumTex(surf_.sum);
maskSumTex = 0;
bindImgTex(surf_.sum, sumTex);
maskSumTex = 0;
if (use_mask)
{
throw std::exception();
//!FIXME
// temp fix for missing min overload
oclMat temp(mask.size(), mask.type());
temp.setTo(Scalar::all(1.0));
//cv::ocl::min(mask, temp, surf_.mask1); ///////// disable this
integral(surf_.mask1, surf_.maskSum);
bindMaskSumTex(surf_.maskSum);
}
if (use_mask)
{
throw std::exception();
//!FIXME
// temp fix for missing min overload
oclMat temp(mask.size(), mask.type());
temp.setTo(Scalar::all(1.0));
//cv::ocl::min(mask, temp, surf_.mask1); ///////// disable this
integral(surf_.mask1, surf_.maskSum);
bindImgTex(surf_.maskSum, maskSumTex);
}
}
void detectKeypoints(oclMat& keypoints)
{
// create image pyramid buffers
// different layers have same sized buffers, but they are sampled from gaussin kernel.
surf_.det.create(img_rows * (surf_.nOctaveLayers + 2), img_cols, CV_32FC1);
surf_.trace.create(img_rows * (surf_.nOctaveLayers + 2), img_cols, CV_32FC1);
void detectKeypoints(oclMat& keypoints)
{
// create image pyramid buffers
// different layers have same sized buffers, but they are sampled from gaussin kernel.
ensureSizeIsEnough(img_rows * (surf_.nOctaveLayers + 2), img_cols, CV_32FC1, surf_.det);
ensureSizeIsEnough(img_rows * (surf_.nOctaveLayers + 2), img_cols, CV_32FC1, surf_.trace);
surf_.maxPosBuffer.create(1, maxCandidates, CV_32SC4);
keypoints.create(SURF_OCL::ROWS_COUNT, maxFeatures, CV_32FC1);
keypoints.setTo(Scalar::all(0));
ensureSizeIsEnough(1, maxCandidates, CV_32SC4, surf_.maxPosBuffer);
ensureSizeIsEnough(SURF_OCL::ROWS_COUNT, maxFeatures, CV_32FC1, keypoints);
keypoints.setTo(Scalar::all(0));
for (int octave = 0; octave < surf_.nOctaves; ++octave)
{
const int layer_rows = img_rows >> octave;
const int layer_cols = img_cols >> octave;
for (int octave = 0; octave < surf_.nOctaves; ++octave)
{
const int layer_rows = img_rows >> octave;
const int layer_cols = img_cols >> octave;
//loadOctaveConstants(octave, layer_rows, layer_cols);
//loadOctaveConstants(octave, layer_rows, layer_cols);
icvCalcLayerDetAndTrace_gpu(surf_.det, surf_.trace, octave, surf_.nOctaveLayers, layer_rows);
icvCalcLayerDetAndTrace_gpu(surf_.det, surf_.trace, octave, surf_.nOctaveLayers, layer_rows);
icvFindMaximaInLayer_gpu(surf_.det, surf_.trace, surf_.maxPosBuffer, counters, 1 + octave,
octave, use_mask, surf_.nOctaveLayers, layer_rows, layer_cols);
icvFindMaximaInLayer_gpu(surf_.det, surf_.trace, surf_.maxPosBuffer, counters, 1 + octave,
octave, use_mask, surf_.nOctaveLayers, layer_rows, layer_cols);
unsigned int maxCounter = Mat(counters).at<unsigned int>(1 + octave);
maxCounter = std::min(maxCounter, static_cast<unsigned int>(maxCandidates));
unsigned int maxCounter = Mat(counters).at<unsigned int>(1 + octave);
maxCounter = std::min(maxCounter, static_cast<unsigned int>(maxCandidates));
if (maxCounter > 0)
{
icvInterpolateKeypoint_gpu(surf_.det, surf_.maxPosBuffer, maxCounter,
keypoints, counters, octave, layer_rows, maxFeatures);
}
if (maxCounter > 0)
{
icvInterpolateKeypoint_gpu(surf_.det, surf_.maxPosBuffer, maxCounter,
keypoints, counters, octave, layer_rows, maxFeatures);
}
unsigned int featureCounter = Mat(counters).at<unsigned int>(0);
featureCounter = std::min(featureCounter, static_cast<unsigned int>(maxFeatures));
}
unsigned int featureCounter = Mat(counters).at<unsigned int>(0);
featureCounter = std::min(featureCounter, static_cast<unsigned int>(maxFeatures));
keypoints.cols = featureCounter;
keypoints.cols = featureCounter;
if (surf_.upright)
keypoints.row(SURF_OCL::ANGLE_ROW).setTo(Scalar::all(90.0));
else
findOrientation(keypoints);
}
if (surf_.upright)
keypoints.row(SURF_OCL::ANGLE_ROW).setTo(Scalar::all(90.0));
else
findOrientation(keypoints);
}
void findOrientation(oclMat& keypoints)
void findOrientation(oclMat& keypoints)
{
const int nFeatures = keypoints.cols;
if (nFeatures > 0)
{
const int nFeatures = keypoints.cols;
if (nFeatures > 0)
{
icvCalcOrientation_gpu(keypoints, nFeatures);
}
icvCalcOrientation_gpu(keypoints, nFeatures);
}
}
void computeDescriptors(const oclMat& keypoints, oclMat& descriptors, int descriptorSize)
void computeDescriptors(const oclMat& keypoints, oclMat& descriptors, int descriptorSize)
{
const int nFeatures = keypoints.cols;
if (nFeatures > 0)
{
const int nFeatures = keypoints.cols;
if (nFeatures > 0)
{
descriptors.create(nFeatures, descriptorSize, CV_32F);
compute_descriptors_gpu(descriptors, keypoints, nFeatures);
}
ensureSizeIsEnough(nFeatures, descriptorSize, CV_32F, descriptors);
compute_descriptors_gpu(descriptors, keypoints, nFeatures);
}
}
~SURF_OCL_Invoker()
{
if(imgTex)
openCLFree(imgTex);
if(sumTex)
openCLFree(sumTex);
if(maskSumTex)
openCLFree(maskSumTex);
additioalParamBuffer.release();
}
~SURF_OCL_Invoker()
{
if(imgTex)
openCLFree(imgTex);
if(sumTex)
openCLFree(sumTex);
if(maskSumTex)
openCLFree(maskSumTex);
additioalParamBuffer.release();
}
private:
SURF_OCL& surf_;
private:
SURF_OCL& surf_;
int img_cols, img_rows;
int img_cols, img_rows;
bool use_mask;
bool use_mask;
int maxCandidates;
int maxFeatures;
int maxCandidates;
int maxFeatures;
oclMat counters;
oclMat counters;
// texture buffers
cl_mem imgTex;
cl_mem sumTex;
cl_mem maskSumTex;
// texture buffers
cl_mem imgTex;
cl_mem sumTex;
cl_mem maskSumTex;
oclMat additioalParamBuffer;
};
}
oclMat additioalParamBuffer;
SURF_OCL_Invoker& operator= (const SURF_OCL_Invoker& right)
{
(*this) = right;
return *this;
} // remove warning C4512
};
cv::ocl::SURF_OCL::SURF_OCL()
{
@ -274,7 +276,7 @@ cv::ocl::SURF_OCL::SURF_OCL()
cv::ocl::SURF_OCL::SURF_OCL(double _threshold, int _nOctaves, int _nOctaveLayers, bool _extended, float _keypointsRatio, bool _upright)
{
hessianThreshold = _threshold;
hessianThreshold = saturate_cast<float>(_threshold);
extended = _extended;
nOctaves = _nOctaves;
nOctaveLayers = _nOctaveLayers;
@ -440,150 +442,77 @@ void cv::ocl::SURF_OCL::releaseMemory()
maxPosBuffer.release();
}
// Facilities
//// load SURF constants into device memory
//void SURF_OCL_Invoker::loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold)
//{
// Mat tmp(1, 9, CV_32FC1);
// float * tmp_data = tmp.ptr<float>();
// *tmp_data = maxCandidates;
// *(++tmp_data) = maxFeatures;
// *(++tmp_data) = img_rows;
// *(++tmp_data) = img_cols;
// *(++tmp_data) = nOctaveLayers;
// *(++tmp_data) = hessianThreshold;
// additioalParamBuffer = tmp;
//}
//void SURF_OCL_Invoker::loadOctaveConstants(int octave, int layer_rows, int layer_cols)
//{
// Mat tmp = additioalParamBuffer;
// float * tmp_data = tmp.ptr<float>();
// tmp_data += 6;
// *tmp_data = octave;
// *(++tmp_data) = layer_rows;
// *(++tmp_data) = layer_cols;
// additioalParamBuffer = tmp;
//}
// create and bind source buffer to image oject.
void SURF_OCL_Invoker::bindImgTex(const oclMat& img)
// bind source buffer to image oject.
void SURF_OCL_Invoker::bindImgTex(const oclMat& img, cl_mem& texture)
{
Mat cpu_img(img); // time consuming
cl_image_format format;
int err;
int depth = img.depth();
int channels = img.channels();
format.image_channel_data_type = CL_UNSIGNED_INT8;
format.image_channel_order = CL_R;
if(imgTex)
switch(depth)
{
openCLFree(imgTex);
case CV_8U:
format.image_channel_data_type = CL_UNSIGNED_INT8;
break;
case CV_32S:
format.image_channel_data_type = CL_UNSIGNED_INT32;
break;
case CV_32F:
format.image_channel_data_type = CL_FLOAT;
break;
default:
throw std::exception();
break;
}
#if CL_VERSION_1_2
cl_image_desc desc;
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
desc.image_width = cpu_img.cols;
desc.image_height = cpu_img.rows;
desc.image_depth = NULL;
desc.image_array_size = 1;
desc.image_row_pitch = cpu_img.step;
desc.image_slice_pitch= 0;
desc.buffer = NULL;
desc.num_mip_levels = 0;
desc.num_samples = 0;
imgTex = clCreateImage(img.clCxt->impl->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &format, &desc, cpu_img.data, &err);
#else
imgTex = clCreateImage2D(
img.clCxt->impl->clContext,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
&format,
cpu_img.cols,
cpu_img.rows,
cpu_img.step,
cpu_img.data,
&err);
#endif
openCLSafeCall(err);
}
void SURF_OCL_Invoker::bindSumTex(const oclMat& sum)
{
Mat cpu_img(sum); // time consuming
cl_image_format format;
int err;
format.image_channel_data_type = CL_UNSIGNED_INT32;
format.image_channel_order = CL_R;
if(sumTex)
switch(channels)
{
openCLFree(sumTex);
case 1:
format.image_channel_order = CL_R;
break;
case 3:
format.image_channel_order = CL_RGB;
break;
case 4:
format.image_channel_order = CL_RGBA;
break;
default:
throw std::exception();
break;
}
#if CL_VERSION_1_2
cl_image_desc desc;
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
desc.image_width = cpu_img.cols;
desc.image_height = cpu_img.rows;
desc.image_depth = NULL;
desc.image_array_size = 1;
desc.image_row_pitch = cpu_img.step;
desc.image_slice_pitch= 0;
desc.buffer = NULL;
desc.num_mip_levels = 0;
desc.num_samples = 0;
sumTex = clCreateImage(sum.clCxt->impl->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &format, &desc, cpu_img.data, &err);
#else
sumTex = clCreateImage2D(
sum.clCxt->impl->clContext,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
&format,
cpu_img.cols,
cpu_img.rows,
cpu_img.step,
cpu_img.data,
&err);
#endif
openCLSafeCall(err);
}
void SURF_OCL_Invoker::bindMaskSumTex(const oclMat& maskSum)
{
Mat cpu_img(maskSum); // time consuming
cl_image_format format;
int err;
format.image_channel_data_type = CL_UNSIGNED_INT32;
format.image_channel_order = CL_R;
if(maskSumTex)
if(texture)
{
openCLFree(maskSumTex);
openCLFree(texture);
}
#if CL_VERSION_1_2
cl_image_desc desc;
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
desc.image_width = cpu_img.cols;
desc.image_height = cpu_img.rows;
desc.image_depth = NULL;
desc.image_width = img.step / img.elemSize();
desc.image_height = img.rows;
desc.image_depth = 0;
desc.image_array_size = 1;
desc.image_row_pitch = cpu_img.step;
desc.image_row_pitch = 0;
desc.image_slice_pitch= 0;
desc.buffer = NULL;
desc.num_mip_levels = 0;
desc.num_samples = 0;
maskSumTex = clCreateImage(maskSum.clCxt->impl->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &format, &desc, cpu_img.data, &err);
texture = clCreateImage(Context::getContext()->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
#else
maskSumTex = clCreateImage2D(
maskSum.clCxt->impl->clContext,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
texture = clCreateImage2D(
Context::getContext()->impl->clContext,
CL_MEM_READ_WRITE,
&format,
cpu_img.cols,
cpu_img.rows,
cpu_img.step,
cpu_img.data,
img.step / img.elemSize(),
img.rows,
0,
NULL,
&err);
#endif
size_t origin[] = { 0, 0, 0 };
size_t region[] = { img.step/img.elemSize(), img.rows, 1 };
clEnqueueCopyBufferToImage(img.clCxt->impl->clCmdQueue, (cl_mem)img.data, texture, 0, origin, region, 0, NULL, 0);
openCLSafeCall(err);
}
@ -676,7 +605,7 @@ void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat& det, const oclMa
args.push_back( make_pair( sizeof(cl_int), (void *)&maxFeatures));
size_t localThreads[3] = {3, 3, 3};
size_t globalThreads[3] = {maxCounter * localThreads[0], 1, 1};
size_t globalThreads[3] = {maxCounter * localThreads[0], localThreads[1], 1};
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
}

@ -44,7 +44,6 @@
#include "precomp.hpp"
#define PERF_TEST 0
////////////////////////////////////////////////////////////////////////////////
// MatchTemplate

Loading…
Cancel
Save