some host side optimizations to ocl::GaussianBlur

pull/315/merge
yao 12 years ago committed by Andrey Kamaev
parent 9060365f5e
commit e05112a364
  1. 341
      modules/ocl/src/filtering.cpp

@ -47,6 +47,7 @@
//M*/ //M*/
#include "precomp.hpp" #include "precomp.hpp"
#include "mcwutil.hpp"
#include <iostream> #include <iostream>
using namespace std; using namespace std;
using namespace cv; using namespace cv;
@ -109,7 +110,7 @@ Ptr<FilterEngine_GPU> cv::ocl::createLinearFilter_GPU(int, int, const Mat &, con
return Ptr<FilterEngine_GPU>(0); return Ptr<FilterEngine_GPU>(0);
} }
Ptr<FilterEngine_GPU> cv::ocl::createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType ) Ptr<FilterEngine_GPU> cv::ocl::createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, int borderType)
{ {
throw_nogpu(); throw_nogpu();
return Ptr<FilterEngine_GPU>(0); return Ptr<FilterEngine_GPU>(0);
@ -150,17 +151,17 @@ void cv::ocl::Laplacian(const oclMat &, oclMat &, int, int, double)
throw_nogpu(); throw_nogpu();
} }
void cv::ocl::erode( const oclMat &, oclMat &, const Mat &, Point, int) void cv::ocl::erode(const oclMat &, oclMat &, const Mat &, Point, int)
{ {
throw_nogpu(); throw_nogpu();
} }
void cv::ocl::dilate( const oclMat &, oclMat &, const Mat &, Point, int) void cv::ocl::dilate(const oclMat &, oclMat &, const Mat &, Point, int)
{ {
throw_nogpu(); throw_nogpu();
} }
void cv::ocl::morphologyEx( const oclMat &, oclMat &, int, const Mat &, Point, int) void cv::ocl::morphologyEx(const oclMat &, oclMat &, int, const Mat &, Point, int)
{ {
throw_nogpu(); throw_nogpu();
} }
@ -194,7 +195,9 @@ namespace
inline void normalizeAnchor(int &anchor, int ksize) inline void normalizeAnchor(int &anchor, int ksize)
{ {
if (anchor < 0) if (anchor < 0)
{
anchor = ksize >> 1; anchor = ksize >> 1;
}
CV_Assert(0 <= anchor && anchor < ksize); CV_Assert(0 <= anchor && anchor < ksize);
} }
@ -208,7 +211,10 @@ inline void normalizeAnchor(Point &anchor, const Size &ksize)
inline void normalizeROI(Rect &roi, const Size &ksize, const Point &anchor, const Size &src_size) inline void normalizeROI(Rect &roi, const Size &ksize, const Point &anchor, const Size &src_size)
{ {
if (roi == Rect(0, 0, -1, -1)) if (roi == Rect(0, 0, -1, -1))
{
roi = Rect(0, 0, src_size.width, src_size.height); roi = Rect(0, 0, src_size.width, src_size.height);
}
CV_Assert(ksize.height > 0 && ksize.width > 0 && ((ksize.height & 1) == 1) && ((ksize.width & 1) == 1)); CV_Assert(ksize.height > 0 && ksize.width > 0 && ((ksize.height & 1) == 1) && ((ksize.width & 1) == 1));
CV_Assert((anchor.x == -1 && anchor.y == -1) || (anchor.x == ksize.width >> 1 && anchor.y == ksize.height >> 1)); CV_Assert((anchor.x == -1 && anchor.y == -1) || (anchor.x == ksize.width >> 1 && anchor.y == ksize.height >> 1));
CV_Assert(roi.x >= 0 && roi.y >= 0 && roi.width <= src_size.width && roi.height <= src_size.height); CV_Assert(roi.x >= 0 && roi.y >= 0 && roi.width <= src_size.width && roi.height <= src_size.height);
@ -218,7 +224,11 @@ inline void normalizeROI(Rect &roi, const Size &ksize, const Point &anchor, cons
inline void normalizeKernel(const Mat &kernel, oclMat &gpu_krnl, int type = CV_8U, int *nDivisor = 0, bool reverse = false) inline void normalizeKernel(const Mat &kernel, oclMat &gpu_krnl, int type = CV_8U, int *nDivisor = 0, bool reverse = false)
{ {
int scale = nDivisor && (kernel.depth() == CV_32F || kernel.depth() == CV_64F) ? 256 : 1; int scale = nDivisor && (kernel.depth() == CV_32F || kernel.depth() == CV_64F) ? 256 : 1;
if (nDivisor) *nDivisor = scale;
if (nDivisor)
{
*nDivisor = scale;
}
Mat temp(kernel.size(), type); Mat temp(kernel.size(), type);
kernel.convertTo(temp, type, scale); kernel.convertTo(temp, type, scale);
@ -227,6 +237,7 @@ inline void normalizeKernel(const Mat &kernel, oclMat &gpu_krnl, int type = CV_8
if (reverse) if (reverse)
{ {
int count = cont_krnl.cols >> 1; int count = cont_krnl.cols >> 1;
for (int i = 0; i < count; ++i) for (int i = 0; i < count; ++i)
{ {
std::swap(cont_krnl.at<int>(0, i), cont_krnl.at<int>(0, cont_krnl.cols - 1 - i)); std::swap(cont_krnl.at<int>(0, i), cont_krnl.at<int>(0, cont_krnl.cols - 1 - i));
@ -326,9 +337,9 @@ void GPUErode(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &ksize, c
//Normalize the result by default //Normalize the result by default
//float alpha = ksize.height * ksize.width; //float alpha = ksize.height * ksize.width;
CV_Assert(src.clCxt == dst.clCxt); CV_Assert(src.clCxt == dst.clCxt);
CV_Assert( (src.cols == dst.cols) && CV_Assert((src.cols == dst.cols) &&
(src.rows == dst.rows) ); (src.rows == dst.rows));
CV_Assert( (src.oclchannels() == dst.oclchannels()) ); CV_Assert((src.oclchannels() == dst.oclchannels()));
int srcStep = src.step1() / src.oclchannels(); int srcStep = src.step1() / src.oclchannels();
int dstStep = dst.step1() / dst.oclchannels(); int dstStep = dst.step1() / dst.oclchannels();
@ -342,19 +353,21 @@ void GPUErode(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &ksize, c
size_t localThreads[3] = {16, 16, 1}; size_t localThreads[3] = {16, 16, 1};
size_t globalThreads[3] = {(src.cols + localThreads[0] - 1) / localThreads[0] *localThreads[0], (src.rows + localThreads[1] - 1) / localThreads[1] *localThreads[1], 1}; size_t globalThreads[3] = {(src.cols + localThreads[0] - 1) / localThreads[0] *localThreads[0], (src.rows + localThreads[1] - 1) / localThreads[1] *localThreads[1], 1};
if(src.type() == CV_8UC1) if (src.type() == CV_8UC1)
{ {
kernelName = "morph_C1_D0"; kernelName = "morph_C1_D0";
globalThreads[0] = ((src.cols + 3) / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0]; globalThreads[0] = ((src.cols + 3) / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0];
CV_Assert( localThreads[0]*localThreads[1] * 8 >= (localThreads[0] * 4 + ksize.width - 1) * (localThreads[1] + ksize.height - 1) ); CV_Assert(localThreads[0]*localThreads[1] * 8 >= (localThreads[0] * 4 + ksize.width - 1) * (localThreads[1] + ksize.height - 1));
} }
else else
{ {
kernelName = "morph"; kernelName = "morph";
CV_Assert( localThreads[0]*localThreads[1] * 2 >= (localThreads[0] + ksize.width - 1) * (localThreads[1] + ksize.height - 1) ); CV_Assert(localThreads[0]*localThreads[1] * 2 >= (localThreads[0] + ksize.width - 1) * (localThreads[1] + ksize.height - 1));
} }
char s[64]; char s[64];
switch(src.type())
switch (src.type())
{ {
case CV_8UC1: case CV_8UC1:
sprintf(s, "-D VAL=255"); sprintf(s, "-D VAL=255");
@ -373,21 +386,22 @@ void GPUErode(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &ksize, c
default: default:
CV_Error(CV_StsUnsupportedFormat, "unsupported type"); CV_Error(CV_StsUnsupportedFormat, "unsupported type");
} }
char compile_option[128]; char compile_option[128];
sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D ERODE %s", anchor.x, anchor.y, localThreads[0], localThreads[1], s); sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D ERODE %s", anchor.x, anchor.y, localThreads[0], localThreads[1], s);
vector< pair<size_t, const void *> > args; vector< pair<size_t, const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data)); args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data)); args.push_back(make_pair(sizeof(cl_mem), (void *)&dst.data));
args.push_back( make_pair( sizeof(cl_int), (void *)&srcOffset_x)); args.push_back(make_pair(sizeof(cl_int), (void *)&srcOffset_x));
args.push_back( make_pair( sizeof(cl_int), (void *)&srcOffset_y)); args.push_back(make_pair(sizeof(cl_int), (void *)&srcOffset_y));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols)); args.push_back(make_pair(sizeof(cl_int), (void *)&src.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows)); args.push_back(make_pair(sizeof(cl_int), (void *)&src.rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&srcStep)); args.push_back(make_pair(sizeof(cl_int), (void *)&srcStep));
args.push_back( make_pair( sizeof(cl_int), (void *)&dstStep)); args.push_back(make_pair(sizeof(cl_int), (void *)&dstStep));
args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_kernel.data)); args.push_back(make_pair(sizeof(cl_mem), (void *)&mat_kernel.data));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholecols)); args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholecols));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholerows)); args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholerows));
args.push_back( make_pair( sizeof(cl_int), (void *)&dstOffset)); args.push_back(make_pair(sizeof(cl_int), (void *)&dstOffset));
openCLExecuteKernel(clCxt, &filtering_morph, kernelName, globalThreads, localThreads, args, -1, -1, compile_option); openCLExecuteKernel(clCxt, &filtering_morph, kernelName, globalThreads, localThreads, args, -1, -1, compile_option);
} }
@ -398,9 +412,9 @@ void GPUDilate(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &ksize,
//Normalize the result by default //Normalize the result by default
//float alpha = ksize.height * ksize.width; //float alpha = ksize.height * ksize.width;
CV_Assert(src.clCxt == dst.clCxt); CV_Assert(src.clCxt == dst.clCxt);
CV_Assert( (src.cols == dst.cols) && CV_Assert((src.cols == dst.cols) &&
(src.rows == dst.rows) ); (src.rows == dst.rows));
CV_Assert( (src.oclchannels() == dst.oclchannels()) ); CV_Assert((src.oclchannels() == dst.oclchannels()));
int srcStep = src.step1() / src.oclchannels(); int srcStep = src.step1() / src.oclchannels();
int dstStep = dst.step1() / dst.oclchannels(); int dstStep = dst.step1() / dst.oclchannels();
@ -414,19 +428,21 @@ void GPUDilate(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &ksize,
size_t localThreads[3] = {16, 16, 1}; size_t localThreads[3] = {16, 16, 1};
size_t globalThreads[3] = {(src.cols + localThreads[0]) / localThreads[0] *localThreads[0], (src.rows + localThreads[1]) / localThreads[1] *localThreads[1], 1}; size_t globalThreads[3] = {(src.cols + localThreads[0]) / localThreads[0] *localThreads[0], (src.rows + localThreads[1]) / localThreads[1] *localThreads[1], 1};
if(src.type() == CV_8UC1) if (src.type() == CV_8UC1)
{ {
kernelName = "morph_C1_D0"; kernelName = "morph_C1_D0";
globalThreads[0] = ((src.cols + 3) / 4 + localThreads[0]) / localThreads[0] * localThreads[0]; globalThreads[0] = ((src.cols + 3) / 4 + localThreads[0]) / localThreads[0] * localThreads[0];
CV_Assert( localThreads[0]*localThreads[1] * 8 >= (localThreads[0] * 4 + ksize.width - 1) * (localThreads[1] + ksize.height - 1) ); CV_Assert(localThreads[0]*localThreads[1] * 8 >= (localThreads[0] * 4 + ksize.width - 1) * (localThreads[1] + ksize.height - 1));
} }
else else
{ {
kernelName = "morph"; kernelName = "morph";
CV_Assert( localThreads[0]*localThreads[1] * 2 >= (localThreads[0] + ksize.width - 1) * (localThreads[1] + ksize.height - 1) ); CV_Assert(localThreads[0]*localThreads[1] * 2 >= (localThreads[0] + ksize.width - 1) * (localThreads[1] + ksize.height - 1));
} }
char s[64]; char s[64];
switch(src.type())
switch (src.type())
{ {
case CV_8UC1: case CV_8UC1:
sprintf(s, "-D VAL=0"); sprintf(s, "-D VAL=0");
@ -445,21 +461,22 @@ void GPUDilate(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &ksize,
default: default:
CV_Error(CV_StsUnsupportedFormat, "unsupported type"); CV_Error(CV_StsUnsupportedFormat, "unsupported type");
} }
char compile_option[128]; char compile_option[128];
sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D DILATE %s", anchor.x, anchor.y, localThreads[0], localThreads[1], s); sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D DILATE %s", anchor.x, anchor.y, localThreads[0], localThreads[1], s);
vector< pair<size_t, const void *> > args; vector< pair<size_t, const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data)); args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data)); args.push_back(make_pair(sizeof(cl_mem), (void *)&dst.data));
args.push_back( make_pair( sizeof(cl_int), (void *)&srcOffset_x)); args.push_back(make_pair(sizeof(cl_int), (void *)&srcOffset_x));
args.push_back( make_pair( sizeof(cl_int), (void *)&srcOffset_y)); args.push_back(make_pair(sizeof(cl_int), (void *)&srcOffset_y));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols)); args.push_back(make_pair(sizeof(cl_int), (void *)&src.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows)); args.push_back(make_pair(sizeof(cl_int), (void *)&src.rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&srcStep)); args.push_back(make_pair(sizeof(cl_int), (void *)&srcStep));
args.push_back( make_pair( sizeof(cl_int), (void *)&dstStep)); args.push_back(make_pair(sizeof(cl_int), (void *)&dstStep));
args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_kernel.data)); args.push_back(make_pair(sizeof(cl_mem), (void *)&mat_kernel.data));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholecols)); args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholecols));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholerows)); args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholerows));
args.push_back( make_pair( sizeof(cl_int), (void *)&dstOffset)); args.push_back(make_pair(sizeof(cl_int), (void *)&dstOffset));
openCLExecuteKernel(clCxt, &filtering_morph, kernelName, globalThreads, localThreads, args, -1, -1, compile_option); openCLExecuteKernel(clCxt, &filtering_morph, kernelName, globalThreads, localThreads, args, -1, -1, compile_option);
} }
@ -492,6 +509,7 @@ public:
virtual void apply(const oclMat &src, oclMat &dst) virtual void apply(const oclMat &src, oclMat &dst)
{ {
Filter2DEngine_GPU::apply(src, dst); Filter2DEngine_GPU::apply(src, dst);
//if (iters > 1) //if (iters > 1)
//{ //{
// Size wholesize; // Size wholesize;
@ -506,7 +524,7 @@ public:
// //Filter2DEngine_GPU::apply(dst, morfBuf); // //Filter2DEngine_GPU::apply(dst, morfBuf);
// //morfBuf.copyTo(dst); // //morfBuf.copyTo(dst);
//} //}
for(int i = 1; i < iters; ++i) for (int i = 1; i < iters; ++i)
{ {
//dst.swap(morfBuf); //dst.swap(morfBuf);
Size wholesize; Size wholesize;
@ -541,16 +559,17 @@ namespace
{ {
void morphOp(int op, const oclMat &src, oclMat &dst, const Mat &_kernel, Point anchor, int iterations, int borderType, const Scalar &borderValue) void morphOp(int op, const oclMat &src, oclMat &dst, const Mat &_kernel, Point anchor, int iterations, int borderType, const Scalar &borderValue)
{ {
if((borderType != cv::BORDER_CONSTANT) || (borderValue != morphologyDefaultBorderValue())) if ((borderType != cv::BORDER_CONSTANT) || (borderValue != morphologyDefaultBorderValue()))
{ {
CV_Error(CV_StsBadArg, "unsupported border type"); CV_Error(CV_StsBadArg, "unsupported border type");
} }
Mat kernel; Mat kernel;
Size ksize = _kernel.data ? _kernel.size() : Size(3, 3); Size ksize = _kernel.data ? _kernel.size() : Size(3, 3);
normalizeAnchor(anchor, ksize); normalizeAnchor(anchor, ksize);
if (iterations == 0 || _kernel.rows * _kernel.cols == 1) if (iterations == 0 || _kernel.rows *_kernel.cols == 1)
{ {
src.copyTo(dst); src.copyTo(dst);
return; return;
@ -572,7 +591,9 @@ void morphOp(int op, const oclMat &src, oclMat &dst, const Mat &_kernel, Point a
iterations = 1; iterations = 1;
} }
else else
{
kernel = _kernel; kernel = _kernel;
}
Ptr<FilterEngine_GPU> f = createMorphologyFilter_GPU(op, src.type(), kernel, anchor, iterations); Ptr<FilterEngine_GPU> f = createMorphologyFilter_GPU(op, src.type(), kernel, anchor, iterations);
@ -580,63 +601,69 @@ void morphOp(int op, const oclMat &src, oclMat &dst, const Mat &_kernel, Point a
} }
} }
void cv::ocl::erode( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor, int iterations, void cv::ocl::erode(const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor, int iterations,
int borderType, const Scalar &borderValue) int borderType, const Scalar &borderValue)
{ {
bool allZero = true; bool allZero = true;
for(int i = 0; i < kernel.rows * kernel.cols; ++i)
if(kernel.data[i] != 0) for (int i = 0; i < kernel.rows * kernel.cols; ++i)
if (kernel.data[i] != 0)
{
allZero = false; allZero = false;
if(allZero) }
if (allZero)
{ {
kernel.data[0] = 1; kernel.data[0] = 1;
} }
morphOp(MORPH_ERODE, src, dst, kernel, anchor, iterations, borderType, borderValue); morphOp(MORPH_ERODE, src, dst, kernel, anchor, iterations, borderType, borderValue);
} }
void cv::ocl::dilate( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor, int iterations, void cv::ocl::dilate(const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor, int iterations,
int borderType, const Scalar &borderValue) int borderType, const Scalar &borderValue)
{ {
morphOp(MORPH_DILATE, src, dst, kernel, anchor, iterations, borderType, borderValue); morphOp(MORPH_DILATE, src, dst, kernel, anchor, iterations, borderType, borderValue);
} }
void cv::ocl::morphologyEx( const oclMat &src, oclMat &dst, int op, const Mat &kernel, Point anchor, int iterations, void cv::ocl::morphologyEx(const oclMat &src, oclMat &dst, int op, const Mat &kernel, Point anchor, int iterations,
int borderType, const Scalar &borderValue) int borderType, const Scalar &borderValue)
{ {
oclMat temp; oclMat temp;
switch( op )
switch (op)
{ {
case MORPH_ERODE: case MORPH_ERODE:
erode( src, dst, kernel, anchor, iterations, borderType, borderValue); erode(src, dst, kernel, anchor, iterations, borderType, borderValue);
break; break;
case MORPH_DILATE: case MORPH_DILATE:
dilate( src, dst, kernel, anchor, iterations, borderType, borderValue); dilate(src, dst, kernel, anchor, iterations, borderType, borderValue);
break; break;
case MORPH_OPEN: case MORPH_OPEN:
erode( src, temp, kernel, anchor, iterations, borderType, borderValue); erode(src, temp, kernel, anchor, iterations, borderType, borderValue);
dilate( temp, dst, kernel, anchor, iterations, borderType, borderValue); dilate(temp, dst, kernel, anchor, iterations, borderType, borderValue);
break; break;
case CV_MOP_CLOSE: case CV_MOP_CLOSE:
dilate( src, temp, kernel, anchor, iterations, borderType, borderValue); dilate(src, temp, kernel, anchor, iterations, borderType, borderValue);
erode( temp, dst, kernel, anchor, iterations, borderType, borderValue); erode(temp, dst, kernel, anchor, iterations, borderType, borderValue);
break; break;
case CV_MOP_GRADIENT: case CV_MOP_GRADIENT:
erode( src, temp, kernel, anchor, iterations, borderType, borderValue); erode(src, temp, kernel, anchor, iterations, borderType, borderValue);
dilate( src, dst, kernel, anchor, iterations, borderType, borderValue); dilate(src, dst, kernel, anchor, iterations, borderType, borderValue);
subtract(dst, temp, dst); subtract(dst, temp, dst);
break; break;
case CV_MOP_TOPHAT: case CV_MOP_TOPHAT:
erode( src, dst, kernel, anchor, iterations, borderType, borderValue); erode(src, dst, kernel, anchor, iterations, borderType, borderValue);
dilate( dst, temp, kernel, anchor, iterations, borderType, borderValue); dilate(dst, temp, kernel, anchor, iterations, borderType, borderValue);
subtract(src, temp, dst); subtract(src, temp, dst);
break; break;
case CV_MOP_BLACKHAT: case CV_MOP_BLACKHAT:
dilate( src, dst, kernel, anchor, iterations, borderType, borderValue); dilate(src, dst, kernel, anchor, iterations, borderType, borderValue);
erode( dst, temp, kernel, anchor, iterations, borderType, borderValue); erode(dst, temp, kernel, anchor, iterations, borderType, borderValue);
subtract(temp, src, dst); subtract(temp, src, dst);
break; break;
default: default:
CV_Error( CV_StsBadArg, "unknown morphological operation" ); CV_Error(CV_StsBadArg, "unknown morphological operation");
} }
} }
@ -668,10 +695,10 @@ void GPUFilter2D(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
Size &ksize, const Point anchor, const int borderType) Size &ksize, const Point anchor, const int borderType)
{ {
CV_Assert(src.clCxt == dst.clCxt); CV_Assert(src.clCxt == dst.clCxt);
CV_Assert( (src.cols == dst.cols) && CV_Assert((src.cols == dst.cols) &&
(src.rows == dst.rows) ); (src.rows == dst.rows));
CV_Assert( (src.oclchannels() == dst.oclchannels()) ); CV_Assert((src.oclchannels() == dst.oclchannels()));
CV_Assert( (borderType != 0) ); CV_Assert((borderType != 0));
CV_Assert(ksize.height > 0 && ksize.width > 0 && ((ksize.height & 1) == 1) && ((ksize.width & 1) == 1)); CV_Assert(ksize.height > 0 && ksize.width > 0 && ((ksize.height & 1) == 1) && ((ksize.width & 1) == 1));
CV_Assert((anchor.x == -1 && anchor.y == -1) || (anchor.x == ksize.width >> 1 && anchor.y == ksize.height >> 1)); CV_Assert((anchor.x == -1 && anchor.y == -1) || (anchor.x == ksize.width >> 1 && anchor.y == ksize.height >> 1));
Context *clCxt = src.clCxt; Context *clCxt = src.clCxt;
@ -703,20 +730,20 @@ void GPUFilter2D(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
}; };
vector< pair<size_t, const void *> > args; vector< pair<size_t, const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data)); args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.step)); args.push_back(make_pair(sizeof(cl_int), (void *)&src.step));
args.push_back( make_pair( sizeof(cl_int), (void *)&src_offset_x)); args.push_back(make_pair(sizeof(cl_int), (void *)&src_offset_x));
args.push_back( make_pair( sizeof(cl_int), (void *)&src_offset_y)); args.push_back(make_pair(sizeof(cl_int), (void *)&src_offset_y));
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data)); args.push_back(make_pair(sizeof(cl_mem), (void *)&dst.data));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step)); args.push_back(make_pair(sizeof(cl_int), (void *)&dst.step));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset_x)); args.push_back(make_pair(sizeof(cl_int), (void *)&dst_offset_x));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset_y)); args.push_back(make_pair(sizeof(cl_int), (void *)&dst_offset_y));
args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_kernel.data)); args.push_back(make_pair(sizeof(cl_mem), (void *)&mat_kernel.data));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols)); args.push_back(make_pair(sizeof(cl_int), (void *)&src.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows)); args.push_back(make_pair(sizeof(cl_int), (void *)&src.rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&cols)); args.push_back(make_pair(sizeof(cl_int), (void *)&cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholecols)); args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholecols));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholerows)); args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholerows));
openCLExecuteKernel(clCxt, &filtering_laplacian, kernelName, globalThreads, localThreads, args, cn, depth); openCLExecuteKernel(clCxt, &filtering_laplacian, kernelName, globalThreads, localThreads, args, cn, depth);
} }
@ -750,8 +777,10 @@ Ptr<FilterEngine_GPU> cv::ocl::createLinearFilter_GPU(int srcType, int dstType,
void cv::ocl::filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel, Point anchor, int borderType) void cv::ocl::filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel, Point anchor, int borderType)
{ {
if( ddepth < 0 ) if (ddepth < 0)
{
ddepth = src.depth(); ddepth = src.depth();
}
dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels())); dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels()));
@ -782,10 +811,10 @@ public:
int cn = src.oclchannels(); int cn = src.oclchannels();
//dst.create(src_size, src_type); //dst.create(src_size, src_type);
dst = Scalar(0.0); //dst = Scalar(0.0);
//dstBuf.create(src_size, src_type); //dstBuf.create(src_size, src_type);
dstBuf.create(src_size.height + ksize.height - 1, src_size.width, CV_MAKETYPE(CV_32F, cn)); dstBuf.create(src_size.height + ksize.height - 1, src_size.width, CV_MAKETYPE(CV_32F, cn));
dstBuf = Scalar(0.0); //dstBuf = Scalar(0.0);
normalizeROI(roi, ksize, anchor, src_size); normalizeROI(roi, ksize, anchor, src_size);
@ -829,13 +858,14 @@ void GPUFilterBox_8u_C1R(const oclMat &src, oclMat &dst,
CV_Assert(src.clCxt == dst.clCxt); CV_Assert(src.clCxt == dst.clCxt);
CV_Assert((src.cols == dst.cols) && CV_Assert((src.cols == dst.cols) &&
(src.rows == dst.rows) ); (src.rows == dst.rows));
Context *clCxt = src.clCxt; Context *clCxt = src.clCxt;
string kernelName = "boxFilter_C1_D0"; string kernelName = "boxFilter_C1_D0";
char btype[30]; char btype[30];
switch(borderType)
switch (borderType)
{ {
case 0: case 0:
sprintf(btype, "BORDER_CONSTANT"); sprintf(btype, "BORDER_CONSTANT");
@ -890,13 +920,14 @@ void GPUFilterBox_8u_C4R(const oclMat &src, oclMat &dst,
CV_Assert(src.clCxt == dst.clCxt); CV_Assert(src.clCxt == dst.clCxt);
CV_Assert((src.cols == dst.cols) && CV_Assert((src.cols == dst.cols) &&
(src.rows == dst.rows) ); (src.rows == dst.rows));
Context *clCxt = src.clCxt; Context *clCxt = src.clCxt;
string kernelName = "boxFilter_C4_D0"; string kernelName = "boxFilter_C4_D0";
char btype[30]; char btype[30];
switch(borderType)
switch (borderType)
{ {
case 0: case 0:
sprintf(btype, "BORDER_CONSTANT"); sprintf(btype, "BORDER_CONSTANT");
@ -951,13 +982,14 @@ void GPUFilterBox_32F_C1R(const oclMat &src, oclMat &dst,
CV_Assert(src.clCxt == dst.clCxt); CV_Assert(src.clCxt == dst.clCxt);
CV_Assert((src.cols == dst.cols) && CV_Assert((src.cols == dst.cols) &&
(src.rows == dst.rows) ); (src.rows == dst.rows));
Context *clCxt = src.clCxt; Context *clCxt = src.clCxt;
string kernelName = "boxFilter_C1_D5"; string kernelName = "boxFilter_C1_D5";
char btype[30]; char btype[30];
switch(borderType)
switch (borderType)
{ {
case 0: case 0:
sprintf(btype, "BORDER_CONSTANT"); sprintf(btype, "BORDER_CONSTANT");
@ -1013,13 +1045,14 @@ void GPUFilterBox_32F_C4R(const oclMat &src, oclMat &dst,
CV_Assert(src.clCxt == dst.clCxt); CV_Assert(src.clCxt == dst.clCxt);
CV_Assert((src.cols == dst.cols) && CV_Assert((src.cols == dst.cols) &&
(src.rows == dst.rows) ); (src.rows == dst.rows));
Context *clCxt = src.clCxt; Context *clCxt = src.clCxt;
string kernelName = "boxFilter_C4_D5"; string kernelName = "boxFilter_C4_D5";
char btype[30]; char btype[30];
switch(borderType)
switch (borderType)
{ {
case 0: case 0:
sprintf(btype, "BORDER_CONSTANT"); sprintf(btype, "BORDER_CONSTANT");
@ -1095,8 +1128,11 @@ void cv::ocl::boxFilter(const oclMat &src, oclMat &dst, int ddepth, Size ksize,
Point anchor, int borderType) Point anchor, int borderType)
{ {
int sdepth = src.depth(), cn = src.channels(); int sdepth = src.depth(), cn = src.channels();
if( ddepth < 0 )
if (ddepth < 0)
{
ddepth = sdepth; ddepth = sdepth;
}
dst.create(src.size(), CV_MAKETYPE(ddepth, cn)); dst.create(src.size(), CV_MAKETYPE(ddepth, cn));
@ -1161,7 +1197,8 @@ void linearRowFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel
string kernelName = "row_filter"; string kernelName = "row_filter";
char btype[30]; char btype[30];
switch(bordertype)
switch (bordertype)
{ {
case 0: case 0:
sprintf(btype, "BORDER_CONSTANT"); sprintf(btype, "BORDER_CONSTANT");
@ -1179,15 +1216,17 @@ void linearRowFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel
sprintf(btype, "BORDER_REFLECT_101"); sprintf(btype, "BORDER_REFLECT_101");
break; break;
} }
char compile_option[128]; char compile_option[128];
sprintf(compile_option, "-D RADIUSX=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s", anchor, localThreads[0], localThreads[1], channels, btype); sprintf(compile_option, "-D RADIUSX=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s", anchor, localThreads[0], localThreads[1], channels, btype);
size_t globalThreads[3]; size_t globalThreads[3];
globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1]; globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1];
globalThreads[2] = (1 + localThreads[2] - 1) / localThreads[2] * localThreads[2]; globalThreads[2] = (1 + localThreads[2] - 1) / localThreads[2] * localThreads[2];
if(src.depth() == CV_8U)
if (src.depth() == CV_8U)
{ {
switch(channels) switch (channels)
{ {
case 1: case 1:
case 3: case 3:
@ -1205,6 +1244,7 @@ void linearRowFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel
{ {
globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0]; globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0];
} }
//sanity checks //sanity checks
CV_Assert(clCxt == dst.clCxt); CV_Assert(clCxt == dst.clCxt);
CV_Assert(src.cols == dst.cols); CV_Assert(src.cols == dst.cols);
@ -1232,7 +1272,7 @@ void linearRowFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel
args.push_back(make_pair(sizeof(cl_int), (void *)&ridusy)); args.push_back(make_pair(sizeof(cl_int), (void *)&ridusy));
args.push_back(make_pair(sizeof(cl_mem), (void *)&mat_kernel.data)); args.push_back(make_pair(sizeof(cl_mem), (void *)&mat_kernel.data));
openCLExecuteKernel(clCxt, &filter_sep_row, kernelName, globalThreads, localThreads, args, channels, src.depth(), compile_option); openCLExecuteKernel2(clCxt, &filter_sep_row, kernelName, globalThreads, localThreads, args, channels, src.depth(), compile_option, CLFLUSH);
} }
Ptr<BaseRowFilter_GPU> cv::ocl::getLinearRowFilter_GPU(int srcType, int /*bufType*/, const Mat &rowKernel, int anchor, int bordertype) Ptr<BaseRowFilter_GPU> cv::ocl::getLinearRowFilter_GPU(int srcType, int /*bufType*/, const Mat &rowKernel, int anchor, int bordertype)
@ -1289,7 +1329,8 @@ void linearColumnFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_ker
string kernelName = "col_filter"; string kernelName = "col_filter";
char btype[30]; char btype[30];
switch(bordertype)
switch (bordertype)
{ {
case 0: case 0:
sprintf(btype, "BORDER_CONSTANT"); sprintf(btype, "BORDER_CONSTANT");
@ -1307,15 +1348,17 @@ void linearColumnFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_ker
sprintf(btype, "BORDER_REFLECT_101"); sprintf(btype, "BORDER_REFLECT_101");
break; break;
} }
char compile_option[256]; char compile_option[256];
size_t globalThreads[3]; size_t globalThreads[3];
globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1]; globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1];
globalThreads[2] = (1 + localThreads[2] - 1) / localThreads[2] * localThreads[2]; globalThreads[2] = (1 + localThreads[2] - 1) / localThreads[2] * localThreads[2];
if(dst.depth() == CV_8U)
if (dst.depth() == CV_8U)
{ {
switch(channels) switch (channels)
{ {
case 1: case 1:
globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0]; globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0];
@ -1338,7 +1381,8 @@ void linearColumnFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_ker
else else
{ {
globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0]; globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0];
switch(dst.type())
switch (dst.type())
{ {
case CV_32SC1: case CV_32SC1:
sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s", sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
@ -1441,20 +1485,25 @@ Ptr<FilterEngine_GPU> cv::ocl::createSeparableLinearFilter_GPU(int srcType, int
void cv::ocl::sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY, Point anchor, double delta, int bordertype) void cv::ocl::sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY, Point anchor, double delta, int bordertype)
{ {
if((dst.cols != dst.wholecols) || (dst.rows != dst.wholerows)) //has roi if ((dst.cols != dst.wholecols) || (dst.rows != dst.wholerows)) //has roi
{ {
if((bordertype & cv::BORDER_ISOLATED) != 0) if ((bordertype & cv::BORDER_ISOLATED) != 0)
{ {
bordertype &= ~cv::BORDER_ISOLATED; bordertype &= ~cv::BORDER_ISOLATED;
if((bordertype != cv::BORDER_CONSTANT) &&
if ((bordertype != cv::BORDER_CONSTANT) &&
(bordertype != cv::BORDER_REPLICATE)) (bordertype != cv::BORDER_REPLICATE))
{ {
CV_Error(CV_StsBadArg, "unsupported border type"); CV_Error(CV_StsBadArg, "unsupported border type");
} }
} }
} }
if( ddepth < 0 )
if (ddepth < 0)
{
ddepth = src.depth(); ddepth = src.depth();
}
//CV_Assert(ddepth == src.depth()); //CV_Assert(ddepth == src.depth());
dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels())); dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels()));
@ -1462,12 +1511,12 @@ void cv::ocl::sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat
f->apply(src, dst); f->apply(src, dst);
} }
Ptr<FilterEngine_GPU> cv::ocl::createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType ) Ptr<FilterEngine_GPU> cv::ocl::createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, int borderType)
{ {
Mat kx, ky; Mat kx, ky;
getDerivKernels( kx, ky, dx, dy, ksize, false, CV_32F ); getDerivKernels(kx, ky, dx, dy, ksize, false, CV_32F);
return createSeparableLinearFilter_GPU(srcType, dstType, return createSeparableLinearFilter_GPU(srcType, dstType,
kx, ky, Point(-1, -1), 0, borderType ); kx, ky, Point(-1, -1), 0, borderType);
} }
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
@ -1482,15 +1531,20 @@ void cv::ocl::Sobel(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy,
// usually the smoothing part is the slowest to compute, // usually the smoothing part is the slowest to compute,
// so try to scale it instead of the faster differenciating part // so try to scale it instead of the faster differenciating part
if (dx == 0) if (dx == 0)
{
kx *= scale; kx *= scale;
}
else else
{
ky *= scale; ky *= scale;
}
} }
// Mat kx_, ky_; // Mat kx_, ky_;
//ky.convertTo(ky_,CV_32S,1<<8); //ky.convertTo(ky_,CV_32S,1<<8);
//kx.convertTo(kx_,CV_32S,1<<8); //kx.convertTo(kx_,CV_32S,1<<8);
sepFilter2D(src, dst, ddepth, kx, ky, Point(-1, -1), delta, borderType ); sepFilter2D(src, dst, ddepth, kx, ky, Point(-1, -1), delta, borderType);
} }
void cv::ocl::Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, double scale, double delta , int bordertype) void cv::ocl::Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, double scale, double delta , int bordertype)
@ -1498,14 +1552,18 @@ void cv::ocl::Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy,
Mat kx, ky; Mat kx, ky;
getDerivKernels(kx, ky, dx, dy, -1, false, CV_32F); getDerivKernels(kx, ky, dx, dy, -1, false, CV_32F);
if( scale != 1 ) if (scale != 1)
{ {
// usually the smoothing part is the slowest to compute, // usually the smoothing part is the slowest to compute,
// so try to scale it instead of the faster differenciating part // so try to scale it instead of the faster differenciating part
if( dx == 0 ) if (dx == 0)
{
kx *= scale; kx *= scale;
}
else else
{
ky *= scale; ky *= scale;
}
} }
// Mat kx_, ky_; // Mat kx_, ky_;
@ -1517,7 +1575,7 @@ void cv::ocl::Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy,
void cv::ocl::Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize, double scale) void cv::ocl::Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize, double scale)
{ {
if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F) if (src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F)
{ {
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return; return;
@ -1531,8 +1589,12 @@ void cv::ocl::Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize, d
{2, 0, 2, 0, -8, 0, 2, 0, 2} {2, 0, 2, 0, -8, 0, 2, 0, 2}
}; };
Mat kernel(3, 3, CV_32S, (void *)K[ksize == 3]); Mat kernel(3, 3, CV_32S, (void *)K[ksize == 3]);
if (scale != 1) if (scale != 1)
{
kernel *= scale; kernel *= scale;
}
filter2D(src, dst, ddepth, kernel, Point(-1, -1)); filter2D(src, dst, ddepth, kernel, Point(-1, -1));
} }
@ -1544,25 +1606,38 @@ Ptr<FilterEngine_GPU> cv::ocl::createGaussianFilter_GPU(int type, Size ksize, do
int depth = CV_MAT_DEPTH(type); int depth = CV_MAT_DEPTH(type);
if (sigma2 <= 0) if (sigma2 <= 0)
{
sigma2 = sigma1; sigma2 = sigma1;
}
// automatic detection of kernel size from sigma // automatic detection of kernel size from sigma
if (ksize.width <= 0 && sigma1 > 0) if (ksize.width <= 0 && sigma1 > 0)
{
ksize.width = cvRound(sigma1 * (depth == CV_8U ? 3 : 4) * 2 + 1) | 1; ksize.width = cvRound(sigma1 * (depth == CV_8U ? 3 : 4) * 2 + 1) | 1;
}
if (ksize.height <= 0 && sigma2 > 0) if (ksize.height <= 0 && sigma2 > 0)
{
ksize.height = cvRound(sigma2 * (depth == CV_8U ? 3 : 4) * 2 + 1) | 1; ksize.height = cvRound(sigma2 * (depth == CV_8U ? 3 : 4) * 2 + 1) | 1;
}
CV_Assert( ksize.width > 0 && ksize.width % 2 == 1 && ksize.height > 0 && ksize.height % 2 == 1 ); CV_Assert(ksize.width > 0 && ksize.width % 2 == 1 && ksize.height > 0 && ksize.height % 2 == 1);
sigma1 = std::max(sigma1, 0.0); sigma1 = std::max(sigma1, 0.0);
sigma2 = std::max(sigma2, 0.0); sigma2 = std::max(sigma2, 0.0);
Mat kx = getGaussianKernel( ksize.width, sigma1, std::max(depth, CV_32F) ); Mat kx = getGaussianKernel(ksize.width, sigma1, std::max(depth, CV_32F));
Mat ky; Mat ky;
if( ksize.height == ksize.width && std::abs(sigma1 - sigma2) < DBL_EPSILON )
if (ksize.height == ksize.width && std::abs(sigma1 - sigma2) < DBL_EPSILON)
{
ky = kx; ky = kx;
}
else else
ky = getGaussianKernel( ksize.height, sigma2, std::max(depth, CV_32F) ); {
ky = getGaussianKernel(ksize.height, sigma2, std::max(depth, CV_32F));
}
//Mat kx_, ky_; //Mat kx_, ky_;
//kx.convertTo(kx_,CV_32S,1<<8); //kx.convertTo(kx_,CV_32S,1<<8);
//ky.convertTo(ky_,CV_32S,1<<8); //ky.convertTo(ky_,CV_32S,1<<8);
@ -1576,26 +1651,36 @@ void cv::ocl::GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double si
src.copyTo(dst); src.copyTo(dst);
return; return;
} }
if((dst.cols != dst.wholecols) || (dst.rows != dst.wholerows)) //has roi
if ((dst.cols != dst.wholecols) || (dst.rows != dst.wholerows)) //has roi
{ {
if((bordertype & cv::BORDER_ISOLATED) != 0) if ((bordertype & cv::BORDER_ISOLATED) != 0)
{ {
bordertype &= ~cv::BORDER_ISOLATED; bordertype &= ~cv::BORDER_ISOLATED;
if((bordertype != cv::BORDER_CONSTANT) &&
if ((bordertype != cv::BORDER_CONSTANT) &&
(bordertype != cv::BORDER_REPLICATE)) (bordertype != cv::BORDER_REPLICATE))
{ {
CV_Error(CV_StsBadArg, "unsupported border type"); CV_Error(CV_StsBadArg, "unsupported border type");
} }
} }
} }
dst.create(src.size(), src.type()); dst.create(src.size(), src.type());
if( bordertype != BORDER_CONSTANT )
if (bordertype != BORDER_CONSTANT)
{ {
if( src.rows == 1 ) if (src.rows == 1)
{
ksize.height = 1; ksize.height = 1;
if( src.cols == 1 ) }
if (src.cols == 1)
{
ksize.width = 1; ksize.width = 1;
}
} }
Ptr<FilterEngine_GPU> f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2, bordertype); Ptr<FilterEngine_GPU> f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2, bordertype);
f->apply(src, dst); f->apply(src, dst);
} }

Loading…
Cancel
Save