From 2e36338636d3eeb437548519b7a75649fb8287f6 Mon Sep 17 00:00:00 2001 From: yao Date: Thu, 20 Sep 2012 09:23:11 +0800 Subject: [PATCH] remove warnings of some functions in ocl module --- modules/ocl/src/canny.cpp | 26 +- modules/ocl/src/kernels/imgproc_canny.cl | 8 +- modules/ocl/src/match_template.cpp | 15 +- modules/ocl/src/surf.cpp | 459 ++++++++++------------- modules/ocl/test/test_match_template.cpp | 1 - 5 files changed, 218 insertions(+), 291 deletions(-) diff --git a/modules/ocl/src/canny.cpp b/modules/ocl/src/canny.cpp index 2ff395498b..6a40fdcdc5 100644 --- a/modules/ocl/src/canny.cpp +++ b/modules/ocl/src/canny.cpp @@ -75,13 +75,13 @@ cv::ocl::CannyBuf::CannyBuf(const oclMat& dx_, const oclMat& dy_) : dx(dx_), dy( void cv::ocl::CannyBuf::create(const Size& image_size, int apperture_size) { - dx.create(image_size, CV_32SC1); - dy.create(image_size, CV_32SC1); + ensureSizeIsEnough(image_size, CV_32SC1, dx); + ensureSizeIsEnough(image_size, CV_32SC1, dy); if(apperture_size == 3) { - dx_buf.create(image_size, CV_32SC1); - dy_buf.create(image_size, CV_32SC1); + ensureSizeIsEnough(image_size, CV_32SC1, dx_buf); + ensureSizeIsEnough(image_size, CV_32SC1, dy_buf); } else if(apperture_size > 0) { @@ -95,18 +95,18 @@ void cv::ocl::CannyBuf::create(const Size& image_size, int apperture_size) filterDY = createDerivFilter_GPU(CV_8U, CV_32S, 0, 1, apperture_size, BORDER_REPLICATE); } } - edgeBuf.create(image_size.height + 2, image_size.width + 2, CV_32FC1); + ensureSizeIsEnough(image_size.height + 2, image_size.width + 2, CV_32FC1, edgeBuf); - trackBuf1.create(1, image_size.width * image_size.height, CV_16UC2); - trackBuf2.create(1, image_size.width * image_size.height, CV_16UC2); + ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf1); + ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf2); - float counter_f [1] = { 0 }; + int counter_i [1] = { 0 }; int err = 0; if(counter) { openCLFree(counter); } - counter = clCreateBuffer( Context::getContext()->impl->clContext, CL_MEM_COPY_HOST_PTR, sizeof(float), counter_f, &err ); + counter = clCreateBuffer( Context::getContext()->impl->clContext, CL_MEM_COPY_HOST_PTR, sizeof(int), counter_i, &err ); openCLSafeCall(err); } @@ -357,16 +357,18 @@ void canny::edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, void * counter, i void canny::edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, void * counter, int rows, int cols) { unsigned int count; - openCLSafeCall(clEnqueueReadBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(float), &count, NULL, NULL, NULL)); + openCLSafeCall(clEnqueueReadBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(float), &count, 0, NULL, NULL)); Context *clCxt = map.clCxt; string kernelName = "edgesHysteresisGlobal"; vector< pair > args; size_t localThreads[3] = {128, 1, 1}; #define DIVUP(a, b) ((a)+(b)-1)/(b) - + int count_i[1] = {0}; while(count > 0) { + openCLSafeCall(clEnqueueWriteBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(int), &count_i, 0, NULL, NULL)); + args.clear(); size_t globalThreads[3] = {std::min(count, 65535u) * 128, DIVUP(count, 65535), 1}; args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data)); @@ -380,7 +382,7 @@ void canny::edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, voi args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset)); openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); - openCLSafeCall(clEnqueueReadBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(float), &count, NULL, NULL, NULL)); + openCLSafeCall(clEnqueueReadBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL)); std::swap(st1, st2); } #undef DIVUP diff --git a/modules/ocl/src/kernels/imgproc_canny.cl b/modules/ocl/src/kernels/imgproc_canny.cl index 65df508589..59835c3c3b 100644 --- a/modules/ocl/src/kernels/imgproc_canny.cl +++ b/modules/ocl/src/kernels/imgproc_canny.cl @@ -601,7 +601,7 @@ __kernel { int n; -#pragma unroll + #pragma unroll for (int k = 0; k < 16; ++k) { n = 0; @@ -686,12 +686,6 @@ __kernel __local ushort2 s_st[stack_size]; - if(gidx + gidy == 0) - { - *counter = 0; - } - barrier(CLK_GLOBAL_MEM_FENCE); - if(lidx == 0) { s_counter = 0; diff --git a/modules/ocl/src/match_template.cpp b/modules/ocl/src/match_template.cpp index 289ba1259e..bf209fdf1a 100644 --- a/modules/ocl/src/match_template.cpp +++ b/modules/ocl/src/match_template.cpp @@ -114,7 +114,7 @@ namespace cv { namespace ocl ////////////////////////////////////////////////////////////////////// // SQDIFF void matchTemplate_SQDIFF( - const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf) + const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &) { result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F); if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth())) @@ -167,10 +167,11 @@ namespace cv { namespace ocl } void matchTemplateNaive_SQDIFF( - const oclMat& image, const oclMat& templ, oclMat& result, int cn) + const oclMat& image, const oclMat& templ, oclMat& result, int) { CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U ) - || (image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F); + || ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F) + ); CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1); CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1); @@ -263,10 +264,11 @@ namespace cv { namespace ocl } void matchTemplateNaive_CCORR( - const oclMat& image, const oclMat& templ, oclMat& result, int cn) + const oclMat& image, const oclMat& templ, oclMat& result, int) { CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U ) - || (image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F); + || ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F) + ); CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1); CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1); @@ -341,6 +343,7 @@ namespace cv { namespace ocl templ_sum = sum(templ) / templ.size().area(); buf.image_sums.resize(buf.images.size()); + for(int i = 0; i < image.channels(); i ++) { integral(buf.images[i], buf.image_sums[i]); @@ -408,7 +411,7 @@ namespace cv { namespace ocl #else oclMat templ_sqr = templ; multiply(templ,templ, templ_sqr); - templ_sqsum = sum(templ_sqr)[0]; + templ_sqsum = saturate_cast(sum(templ_sqr)[0]); #endif //SQRSUM_FIXED templ_sqsum -= scale * templ_sum * templ_sum; templ_sum *= scale; diff --git a/modules/ocl/src/surf.cpp b/modules/ocl/src/surf.cpp index 3f015e297e..a59ae7ca78 100644 --- a/modules/ocl/src/surf.cpp +++ b/modules/ocl/src/surf.cpp @@ -44,7 +44,7 @@ //M*/ #include #include "precomp.hpp" - +#include "opencv2/highgui/highgui.hpp" using namespace cv; using namespace cv::ocl; @@ -72,195 +72,197 @@ namespace cv { namespace ocl extern const char * nonfree_surf; }} -namespace + +static inline int divUp(int total, int grain) { - static inline int divUp(int total, int grain) - { - return (total + grain - 1) / grain; - } - static inline int calcSize(int octave, int layer) - { - /* Wavelet size at first layer of first octave. */ - const int HAAR_SIZE0 = 9; + return (total + grain - 1) / grain; +} +static inline int calcSize(int octave, int layer) +{ + /* Wavelet size at first layer of first octave. */ + const int HAAR_SIZE0 = 9; - /* Wavelet size increment between layers. This should be an even number, - such that the wavelet sizes in an octave are either all even or all odd. - This ensures that when looking for the neighbours of a sample, the layers + /* Wavelet size increment between layers. This should be an even number, + such that the wavelet sizes in an octave are either all even or all odd. + This ensures that when looking for the neighbours of a sample, the layers - above and below are aligned correctly. */ - const int HAAR_SIZE_INC = 6; + above and below are aligned correctly. */ + const int HAAR_SIZE_INC = 6; - return (HAAR_SIZE0 + HAAR_SIZE_INC * layer) << octave; - } + return (HAAR_SIZE0 + HAAR_SIZE_INC * layer) << octave; +} - class SURF_OCL_Invoker - { - public: - // facilities - void bindImgTex(const oclMat& img); - void bindSumTex(const oclMat& sum); - void bindMaskSumTex(const oclMat& maskSum); +class SURF_OCL_Invoker +{ +public: + // facilities + void bindImgTex(const oclMat& img, cl_mem & texture); - //void loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold); - //void loadOctaveConstants(int octave, int layer_rows, int layer_cols); + //void loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold); + //void loadOctaveConstants(int octave, int layer_rows, int layer_cols); - // kernel callers declearations - void icvCalcLayerDetAndTrace_gpu(oclMat& det, oclMat& trace, int octave, int nOctaveLayers, int layer_rows); + // kernel callers declearations + void icvCalcLayerDetAndTrace_gpu(oclMat& det, oclMat& trace, int octave, int nOctaveLayers, int layer_rows); - void icvFindMaximaInLayer_gpu(const oclMat& det, const oclMat& trace, oclMat& maxPosBuffer, oclMat& maxCounter, int counterOffset, - int octave, bool use_mask, int nLayers, int layer_rows, int layer_cols); + void icvFindMaximaInLayer_gpu(const oclMat& det, const oclMat& trace, oclMat& maxPosBuffer, oclMat& maxCounter, int counterOffset, + int octave, bool use_mask, int nLayers, int layer_rows, int layer_cols); - void icvInterpolateKeypoint_gpu(const oclMat& det, const oclMat& maxPosBuffer, unsigned int maxCounter, - oclMat& keypoints, oclMat& counters, int octave, int layer_rows, int maxFeatures); + void icvInterpolateKeypoint_gpu(const oclMat& det, const oclMat& maxPosBuffer, unsigned int maxCounter, + oclMat& keypoints, oclMat& counters, int octave, int layer_rows, int maxFeatures); - void icvCalcOrientation_gpu(const oclMat& keypoints, int nFeatures); + void icvCalcOrientation_gpu(const oclMat& keypoints, int nFeatures); - void compute_descriptors_gpu(const oclMat& descriptors, const oclMat& keypoints, int nFeatures); - // end of kernel callers declearations + void compute_descriptors_gpu(const oclMat& descriptors, const oclMat& keypoints, int nFeatures); + // end of kernel callers declearations - SURF_OCL_Invoker(SURF_OCL& surf, const oclMat& img, const oclMat& mask) : - surf_(surf), - img_cols(img.cols), img_rows(img.rows), - use_mask(!mask.empty()), - imgTex(NULL), sumTex(NULL), maskSumTex(NULL) - { - CV_Assert(!img.empty() && img.type() == CV_8UC1); - CV_Assert(mask.empty() || (mask.size() == img.size() && mask.type() == CV_8UC1)); - CV_Assert(surf_.nOctaves > 0 && surf_.nOctaveLayers > 0); + SURF_OCL_Invoker(SURF_OCL& surf, const oclMat& img, const oclMat& mask) : + surf_(surf), + img_cols(img.cols), img_rows(img.rows), + use_mask(!mask.empty()), + imgTex(NULL), sumTex(NULL), maskSumTex(NULL) + { + CV_Assert(!img.empty() && img.type() == CV_8UC1); + CV_Assert(mask.empty() || (mask.size() == img.size() && mask.type() == CV_8UC1)); + CV_Assert(surf_.nOctaves > 0 && surf_.nOctaveLayers > 0); - const int min_size = calcSize(surf_.nOctaves - 1, 0); - CV_Assert(img_rows - min_size >= 0); - CV_Assert(img_cols - min_size >= 0); + const int min_size = calcSize(surf_.nOctaves - 1, 0); + CV_Assert(img_rows - min_size >= 0); + CV_Assert(img_cols - min_size >= 0); - const int layer_rows = img_rows >> (surf_.nOctaves - 1); - const int layer_cols = img_cols >> (surf_.nOctaves - 1); - const int min_margin = ((calcSize((surf_.nOctaves - 1), 2) >> 1) >> (surf_.nOctaves - 1)) + 1; - CV_Assert(layer_rows - 2 * min_margin > 0); - CV_Assert(layer_cols - 2 * min_margin > 0); + const int layer_rows = img_rows >> (surf_.nOctaves - 1); + const int layer_cols = img_cols >> (surf_.nOctaves - 1); + const int min_margin = ((calcSize((surf_.nOctaves - 1), 2) >> 1) >> (surf_.nOctaves - 1)) + 1; + CV_Assert(layer_rows - 2 * min_margin > 0); + CV_Assert(layer_cols - 2 * min_margin > 0); - maxFeatures = std::min(static_cast(img.size().area() * surf.keypointsRatio), 65535); - maxCandidates = std::min(static_cast(1.5 * maxFeatures), 65535); + maxFeatures = std::min(static_cast(img.size().area() * surf.keypointsRatio), 65535); + maxCandidates = std::min(static_cast(1.5 * maxFeatures), 65535); - CV_Assert(maxFeatures > 0); + CV_Assert(maxFeatures > 0); - counters.create(1, surf_.nOctaves + 1, CV_32SC1); - counters.setTo(Scalar::all(0)); + counters.create(1, surf_.nOctaves + 1, CV_32SC1); + counters.setTo(Scalar::all(0)); - //loadGlobalConstants(maxCandidates, maxFeatures, img_rows, img_cols, surf_.nOctaveLayers, static_cast(surf_.hessianThreshold)); + //loadGlobalConstants(maxCandidates, maxFeatures, img_rows, img_cols, surf_.nOctaveLayers, static_cast(surf_.hessianThreshold)); - bindImgTex(img); - integral(img, surf_.sum); // the two argumented integral version is incorrect + bindImgTex(img, imgTex); + integral(img, surf_.sum); // the two argumented integral version is incorrect - bindSumTex(surf_.sum); - maskSumTex = 0; + bindImgTex(surf_.sum, sumTex); + maskSumTex = 0; - if (use_mask) - { - throw std::exception(); - //!FIXME - // temp fix for missing min overload - oclMat temp(mask.size(), mask.type()); - temp.setTo(Scalar::all(1.0)); - //cv::ocl::min(mask, temp, surf_.mask1); ///////// disable this - integral(surf_.mask1, surf_.maskSum); - bindMaskSumTex(surf_.maskSum); - } + if (use_mask) + { + throw std::exception(); + //!FIXME + // temp fix for missing min overload + oclMat temp(mask.size(), mask.type()); + temp.setTo(Scalar::all(1.0)); + //cv::ocl::min(mask, temp, surf_.mask1); ///////// disable this + integral(surf_.mask1, surf_.maskSum); + bindImgTex(surf_.maskSum, maskSumTex); } + } - void detectKeypoints(oclMat& keypoints) - { - // create image pyramid buffers - // different layers have same sized buffers, but they are sampled from gaussin kernel. - surf_.det.create(img_rows * (surf_.nOctaveLayers + 2), img_cols, CV_32FC1); - surf_.trace.create(img_rows * (surf_.nOctaveLayers + 2), img_cols, CV_32FC1); + void detectKeypoints(oclMat& keypoints) + { + // create image pyramid buffers + // different layers have same sized buffers, but they are sampled from gaussin kernel. + ensureSizeIsEnough(img_rows * (surf_.nOctaveLayers + 2), img_cols, CV_32FC1, surf_.det); + ensureSizeIsEnough(img_rows * (surf_.nOctaveLayers + 2), img_cols, CV_32FC1, surf_.trace); - surf_.maxPosBuffer.create(1, maxCandidates, CV_32SC4); - keypoints.create(SURF_OCL::ROWS_COUNT, maxFeatures, CV_32FC1); - keypoints.setTo(Scalar::all(0)); + ensureSizeIsEnough(1, maxCandidates, CV_32SC4, surf_.maxPosBuffer); + ensureSizeIsEnough(SURF_OCL::ROWS_COUNT, maxFeatures, CV_32FC1, keypoints); + keypoints.setTo(Scalar::all(0)); - for (int octave = 0; octave < surf_.nOctaves; ++octave) - { - const int layer_rows = img_rows >> octave; - const int layer_cols = img_cols >> octave; + for (int octave = 0; octave < surf_.nOctaves; ++octave) + { + const int layer_rows = img_rows >> octave; + const int layer_cols = img_cols >> octave; - //loadOctaveConstants(octave, layer_rows, layer_cols); + //loadOctaveConstants(octave, layer_rows, layer_cols); - icvCalcLayerDetAndTrace_gpu(surf_.det, surf_.trace, octave, surf_.nOctaveLayers, layer_rows); + icvCalcLayerDetAndTrace_gpu(surf_.det, surf_.trace, octave, surf_.nOctaveLayers, layer_rows); - icvFindMaximaInLayer_gpu(surf_.det, surf_.trace, surf_.maxPosBuffer, counters, 1 + octave, - octave, use_mask, surf_.nOctaveLayers, layer_rows, layer_cols); + icvFindMaximaInLayer_gpu(surf_.det, surf_.trace, surf_.maxPosBuffer, counters, 1 + octave, + octave, use_mask, surf_.nOctaveLayers, layer_rows, layer_cols); - unsigned int maxCounter = Mat(counters).at(1 + octave); - maxCounter = std::min(maxCounter, static_cast(maxCandidates)); + unsigned int maxCounter = Mat(counters).at(1 + octave); + maxCounter = std::min(maxCounter, static_cast(maxCandidates)); - if (maxCounter > 0) - { - icvInterpolateKeypoint_gpu(surf_.det, surf_.maxPosBuffer, maxCounter, - keypoints, counters, octave, layer_rows, maxFeatures); - } + if (maxCounter > 0) + { + icvInterpolateKeypoint_gpu(surf_.det, surf_.maxPosBuffer, maxCounter, + keypoints, counters, octave, layer_rows, maxFeatures); } - unsigned int featureCounter = Mat(counters).at(0); - featureCounter = std::min(featureCounter, static_cast(maxFeatures)); + } + unsigned int featureCounter = Mat(counters).at(0); + featureCounter = std::min(featureCounter, static_cast(maxFeatures)); - keypoints.cols = featureCounter; + keypoints.cols = featureCounter; - if (surf_.upright) - keypoints.row(SURF_OCL::ANGLE_ROW).setTo(Scalar::all(90.0)); - else - findOrientation(keypoints); - } + if (surf_.upright) + keypoints.row(SURF_OCL::ANGLE_ROW).setTo(Scalar::all(90.0)); + else + findOrientation(keypoints); + } - void findOrientation(oclMat& keypoints) + void findOrientation(oclMat& keypoints) + { + const int nFeatures = keypoints.cols; + if (nFeatures > 0) { - const int nFeatures = keypoints.cols; - if (nFeatures > 0) - { - icvCalcOrientation_gpu(keypoints, nFeatures); - } + icvCalcOrientation_gpu(keypoints, nFeatures); } + } - void computeDescriptors(const oclMat& keypoints, oclMat& descriptors, int descriptorSize) + void computeDescriptors(const oclMat& keypoints, oclMat& descriptors, int descriptorSize) + { + const int nFeatures = keypoints.cols; + if (nFeatures > 0) { - const int nFeatures = keypoints.cols; - if (nFeatures > 0) - { - descriptors.create(nFeatures, descriptorSize, CV_32F); - compute_descriptors_gpu(descriptors, keypoints, nFeatures); - } + ensureSizeIsEnough(nFeatures, descriptorSize, CV_32F, descriptors); + compute_descriptors_gpu(descriptors, keypoints, nFeatures); } + } - ~SURF_OCL_Invoker() - { - if(imgTex) - openCLFree(imgTex); - if(sumTex) - openCLFree(sumTex); - if(maskSumTex) - openCLFree(maskSumTex); - additioalParamBuffer.release(); - } + ~SURF_OCL_Invoker() + { + if(imgTex) + openCLFree(imgTex); + if(sumTex) + openCLFree(sumTex); + if(maskSumTex) + openCLFree(maskSumTex); + additioalParamBuffer.release(); + } - private: - SURF_OCL& surf_; +private: + SURF_OCL& surf_; - int img_cols, img_rows; + int img_cols, img_rows; - bool use_mask; + bool use_mask; - int maxCandidates; - int maxFeatures; + int maxCandidates; + int maxFeatures; - oclMat counters; + oclMat counters; - // texture buffers - cl_mem imgTex; - cl_mem sumTex; - cl_mem maskSumTex; + // texture buffers + cl_mem imgTex; + cl_mem sumTex; + cl_mem maskSumTex; - oclMat additioalParamBuffer; - }; -} + oclMat additioalParamBuffer; + + SURF_OCL_Invoker& operator= (const SURF_OCL_Invoker& right) + { + (*this) = right; + return *this; + } // remove warning C4512 +}; cv::ocl::SURF_OCL::SURF_OCL() { @@ -274,7 +276,7 @@ cv::ocl::SURF_OCL::SURF_OCL() cv::ocl::SURF_OCL::SURF_OCL(double _threshold, int _nOctaves, int _nOctaveLayers, bool _extended, float _keypointsRatio, bool _upright) { - hessianThreshold = _threshold; + hessianThreshold = saturate_cast(_threshold); extended = _extended; nOctaves = _nOctaves; nOctaveLayers = _nOctaveLayers; @@ -440,150 +442,77 @@ void cv::ocl::SURF_OCL::releaseMemory() maxPosBuffer.release(); } -// Facilities - -//// load SURF constants into device memory -//void SURF_OCL_Invoker::loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold) -//{ -// Mat tmp(1, 9, CV_32FC1); -// float * tmp_data = tmp.ptr(); -// *tmp_data = maxCandidates; -// *(++tmp_data) = maxFeatures; -// *(++tmp_data) = img_rows; -// *(++tmp_data) = img_cols; -// *(++tmp_data) = nOctaveLayers; -// *(++tmp_data) = hessianThreshold; -// additioalParamBuffer = tmp; -//} -//void SURF_OCL_Invoker::loadOctaveConstants(int octave, int layer_rows, int layer_cols) -//{ -// Mat tmp = additioalParamBuffer; -// float * tmp_data = tmp.ptr(); -// tmp_data += 6; -// *tmp_data = octave; -// *(++tmp_data) = layer_rows; -// *(++tmp_data) = layer_cols; -// additioalParamBuffer = tmp; -//} - -// create and bind source buffer to image oject. -void SURF_OCL_Invoker::bindImgTex(const oclMat& img) + +// bind source buffer to image oject. +void SURF_OCL_Invoker::bindImgTex(const oclMat& img, cl_mem& texture) { - Mat cpu_img(img); // time consuming cl_image_format format; int err; + int depth = img.depth(); + int channels = img.channels(); - format.image_channel_data_type = CL_UNSIGNED_INT8; - format.image_channel_order = CL_R; - - if(imgTex) + switch(depth) { - openCLFree(imgTex); + case CV_8U: + format.image_channel_data_type = CL_UNSIGNED_INT8; + break; + case CV_32S: + format.image_channel_data_type = CL_UNSIGNED_INT32; + break; + case CV_32F: + format.image_channel_data_type = CL_FLOAT; + break; + default: + throw std::exception(); + break; } - -#if CL_VERSION_1_2 - cl_image_desc desc; - desc.image_type = CL_MEM_OBJECT_IMAGE2D; - desc.image_width = cpu_img.cols; - desc.image_height = cpu_img.rows; - desc.image_depth = NULL; - desc.image_array_size = 1; - desc.image_row_pitch = cpu_img.step; - desc.image_slice_pitch= 0; - desc.buffer = NULL; - desc.num_mip_levels = 0; - desc.num_samples = 0; - imgTex = clCreateImage(img.clCxt->impl->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &format, &desc, cpu_img.data, &err); -#else - imgTex = clCreateImage2D( - img.clCxt->impl->clContext, - CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, - &format, - cpu_img.cols, - cpu_img.rows, - cpu_img.step, - cpu_img.data, - &err); -#endif - openCLSafeCall(err); -} - -void SURF_OCL_Invoker::bindSumTex(const oclMat& sum) -{ - Mat cpu_img(sum); // time consuming - cl_image_format format; - int err; - format.image_channel_data_type = CL_UNSIGNED_INT32; - format.image_channel_order = CL_R; - - if(sumTex) + switch(channels) { - openCLFree(sumTex); + case 1: + format.image_channel_order = CL_R; + break; + case 3: + format.image_channel_order = CL_RGB; + break; + case 4: + format.image_channel_order = CL_RGBA; + break; + default: + throw std::exception(); + break; } - -#if CL_VERSION_1_2 - cl_image_desc desc; - desc.image_type = CL_MEM_OBJECT_IMAGE2D; - desc.image_width = cpu_img.cols; - desc.image_height = cpu_img.rows; - desc.image_depth = NULL; - desc.image_array_size = 1; - desc.image_row_pitch = cpu_img.step; - desc.image_slice_pitch= 0; - desc.buffer = NULL; - desc.num_mip_levels = 0; - desc.num_samples = 0; - sumTex = clCreateImage(sum.clCxt->impl->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &format, &desc, cpu_img.data, &err); -#else - sumTex = clCreateImage2D( - sum.clCxt->impl->clContext, - CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, - &format, - cpu_img.cols, - cpu_img.rows, - cpu_img.step, - cpu_img.data, - &err); -#endif - openCLSafeCall(err); -} -void SURF_OCL_Invoker::bindMaskSumTex(const oclMat& maskSum) -{ - Mat cpu_img(maskSum); // time consuming - cl_image_format format; - int err; - format.image_channel_data_type = CL_UNSIGNED_INT32; - format.image_channel_order = CL_R; - - if(maskSumTex) + if(texture) { - openCLFree(maskSumTex); + openCLFree(texture); } #if CL_VERSION_1_2 cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE2D; - desc.image_width = cpu_img.cols; - desc.image_height = cpu_img.rows; - desc.image_depth = NULL; + desc.image_width = img.step / img.elemSize(); + desc.image_height = img.rows; + desc.image_depth = 0; desc.image_array_size = 1; - desc.image_row_pitch = cpu_img.step; + desc.image_row_pitch = 0; desc.image_slice_pitch= 0; desc.buffer = NULL; desc.num_mip_levels = 0; desc.num_samples = 0; - maskSumTex = clCreateImage(maskSum.clCxt->impl->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &format, &desc, cpu_img.data, &err); + texture = clCreateImage(Context::getContext()->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err); #else - maskSumTex = clCreateImage2D( - maskSum.clCxt->impl->clContext, - CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, + texture = clCreateImage2D( + Context::getContext()->impl->clContext, + CL_MEM_READ_WRITE, &format, - cpu_img.cols, - cpu_img.rows, - cpu_img.step, - cpu_img.data, + img.step / img.elemSize(), + img.rows, + 0, + NULL, &err); #endif + size_t origin[] = { 0, 0, 0 }; + size_t region[] = { img.step/img.elemSize(), img.rows, 1 }; + clEnqueueCopyBufferToImage(img.clCxt->impl->clCmdQueue, (cl_mem)img.data, texture, 0, origin, region, 0, NULL, 0); openCLSafeCall(err); } @@ -676,7 +605,7 @@ void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat& det, const oclMa args.push_back( make_pair( sizeof(cl_int), (void *)&maxFeatures)); size_t localThreads[3] = {3, 3, 3}; - size_t globalThreads[3] = {maxCounter * localThreads[0], 1, 1}; + size_t globalThreads[3] = {maxCounter * localThreads[0], localThreads[1], 1}; openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1); } diff --git a/modules/ocl/test/test_match_template.cpp b/modules/ocl/test/test_match_template.cpp index e3b8778d82..63708ea347 100644 --- a/modules/ocl/test/test_match_template.cpp +++ b/modules/ocl/test/test_match_template.cpp @@ -44,7 +44,6 @@ #include "precomp.hpp" -#define PERF_TEST 0 //////////////////////////////////////////////////////////////////////////////// // MatchTemplate