imgproc: dispatch box_filter

pull/14013/head
Alexander Alekhin 6 years ago
parent ce3c92eb1f
commit 5a01227aa1
  1. 1
      modules/imgproc/CMakeLists.txt
  2. 1303
      modules/imgproc/src/box_filter.dispatch.cpp
  3. 546
      modules/imgproc/src/box_filter.simd.hpp

@ -1,6 +1,7 @@
set(the_description "Image Processing")
ocv_add_dispatched_file(accum SSE4_1 AVX AVX2)
ocv_add_dispatched_file(bilateral_filter SSE2 AVX2)
ocv_add_dispatched_file(box_filter SSE2 SSE4_1 AVX2)
ocv_add_dispatched_file(filter SSE2 SSE4_1 AVX2)
ocv_add_dispatched_file(color_hsv SSE2 SSE4_1 AVX2)
ocv_add_dispatched_file(color_rgb SSE2 SSE4_1 AVX2)

File diff suppressed because it is too large Load Diff

@ -42,21 +42,25 @@
//M*/
#include "precomp.hpp"
#include <vector>
#include "opencv2/core/hal/intrin.hpp"
#include "opencl_kernels_imgproc.hpp"
#include "opencv2/core/openvx/ovx_defs.hpp"
namespace cv {
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
// forward declarations
Ptr<BaseRowFilter> getRowSumFilter(int srcType, int sumType, int ksize, int anchor);
Ptr<BaseColumnFilter> getColumnSumFilter(int sumType, int dstType, int ksize, int anchor, double scale);
Ptr<FilterEngine> createBoxFilter(int srcType, int dstType, Size ksize,
Point anchor, bool normalize, int borderType);
namespace cv
{
Ptr<BaseRowFilter> getSqrRowSumFilter(int srcType, int sumType, int ksize, int anchor);
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
/****************************************************************************************\
Box Filter
\****************************************************************************************/
namespace {
template<typename T, typename ST>
struct RowSum :
public BaseRowFilter
@ -70,6 +74,8 @@ struct RowSum :
virtual void operator()(const uchar* src, uchar* dst, int width, int cn) CV_OVERRIDE
{
CV_INSTRUMENT_REGION();
const T* S = (const T*)src;
ST* D = (ST*)dst;
int i = 0, k, ksz_cn = ksize*cn;
@ -183,6 +189,8 @@ struct ColumnSum :
virtual void operator()(const uchar** src, uchar* dst, int dststep, int count, int width) CV_OVERRIDE
{
CV_INSTRUMENT_REGION();
int i;
ST* SUM;
bool haveScale = scale != 1;
@ -281,6 +289,8 @@ struct ColumnSum<int, uchar> :
virtual void operator()(const uchar** src, uchar* dst, int dststep, int count, int width) CV_OVERRIDE
{
CV_INSTRUMENT_REGION();
int* SUM;
bool haveScale = scale != 1;
double _scale = scale;
@ -408,9 +418,6 @@ struct ColumnSum<int, uchar> :
}
dst += dststep;
}
#if CV_SIMD
vx_cleanup();
#endif
}
double scale;
@ -452,6 +459,8 @@ public BaseColumnFilter
virtual void operator()(const uchar** src, uchar* dst, int dststep, int count, int width) CV_OVERRIDE
{
CV_INSTRUMENT_REGION();
const int ds = divScale;
const int dd = divDelta;
ushort* SUM;
@ -586,9 +595,6 @@ public BaseColumnFilter
}
dst += dststep;
}
#if CV_SIMD
vx_cleanup();
#endif
}
double scale;
@ -616,6 +622,8 @@ struct ColumnSum<int, short> :
virtual void operator()(const uchar** src, uchar* dst, int dststep, int count, int width) CV_OVERRIDE
{
CV_INSTRUMENT_REGION();
int i;
int* SUM;
bool haveScale = scale != 1;
@ -739,9 +747,6 @@ struct ColumnSum<int, short> :
}
dst += dststep;
}
#if CV_SIMD
vx_cleanup();
#endif
}
double scale;
@ -767,6 +772,8 @@ struct ColumnSum<int, ushort> :
virtual void operator()(const uchar** src, uchar* dst, int dststep, int count, int width) CV_OVERRIDE
{
CV_INSTRUMENT_REGION();
int* SUM;
bool haveScale = scale != 1;
double _scale = scale;
@ -888,9 +895,6 @@ struct ColumnSum<int, ushort> :
}
dst += dststep;
}
#if CV_SIMD
vx_cleanup();
#endif
}
double scale;
@ -915,6 +919,8 @@ struct ColumnSum<int, int> :
virtual void operator()(const uchar** src, uchar* dst, int dststep, int count, int width) CV_OVERRIDE
{
CV_INSTRUMENT_REGION();
int* SUM;
bool haveScale = scale != 1;
double _scale = scale;
@ -1022,9 +1028,6 @@ struct ColumnSum<int, int> :
}
dst += dststep;
}
#if CV_SIMD
vx_cleanup();
#endif
}
double scale;
@ -1050,6 +1053,8 @@ struct ColumnSum<int, float> :
virtual void operator()(const uchar** src, uchar* dst, int dststep, int count, int width) CV_OVERRIDE
{
CV_INSTRUMENT_REGION();
int* SUM;
bool haveScale = scale != 1;
double _scale = scale;
@ -1154,9 +1159,6 @@ struct ColumnSum<int, float> :
}
dst += dststep;
}
#if CV_SIMD
vx_cleanup();
#endif
}
double scale;
@ -1164,243 +1166,13 @@ struct ColumnSum<int, float> :
std::vector<int> sum;
};
#ifdef HAVE_OPENCL
} // namespace anon
static bool ocl_boxFilter3x3_8UC1( InputArray _src, OutputArray _dst, int ddepth,
Size ksize, Point anchor, int borderType, bool normalize )
{
const ocl::Device & dev = ocl::Device::getDefault();
int type = _src.type(), sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
if (ddepth < 0)
ddepth = sdepth;
if (anchor.x < 0)
anchor.x = ksize.width / 2;
if (anchor.y < 0)
anchor.y = ksize.height / 2;
if ( !(dev.isIntel() && (type == CV_8UC1) &&
(_src.offset() == 0) && (_src.step() % 4 == 0) &&
(_src.cols() % 16 == 0) && (_src.rows() % 2 == 0) &&
(anchor.x == 1) && (anchor.y == 1) &&
(ksize.width == 3) && (ksize.height == 3)) )
return false;
float alpha = 1.0f / (ksize.height * ksize.width);
Size size = _src.size();
size_t globalsize[2] = { 0, 0 };
size_t localsize[2] = { 0, 0 };
const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", 0, "BORDER_REFLECT_101" };
globalsize[0] = size.width / 16;
globalsize[1] = size.height / 2;
char build_opts[1024];
sprintf(build_opts, "-D %s %s", borderMap[borderType], normalize ? "-D NORMALIZE" : "");
ocl::Kernel kernel("boxFilter3x3_8UC1_cols16_rows2", cv::ocl::imgproc::boxFilter3x3_oclsrc, build_opts);
if (kernel.empty())
return false;
UMat src = _src.getUMat();
_dst.create(size, CV_MAKETYPE(ddepth, cn));
if (!(_dst.offset() == 0 && _dst.step() % 4 == 0))
return false;
UMat dst = _dst.getUMat();
int idxArg = kernel.set(0, ocl::KernelArg::PtrReadOnly(src));
idxArg = kernel.set(idxArg, (int)src.step);
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dst));
idxArg = kernel.set(idxArg, (int)dst.step);
idxArg = kernel.set(idxArg, (int)dst.rows);
idxArg = kernel.set(idxArg, (int)dst.cols);
if (normalize)
idxArg = kernel.set(idxArg, (float)alpha);
return kernel.run(2, globalsize, (localsize[0] == 0) ? NULL : localsize, false);
}
static bool ocl_boxFilter( InputArray _src, OutputArray _dst, int ddepth,
Size ksize, Point anchor, int borderType, bool normalize, bool sqr = false )
Ptr<BaseRowFilter> getRowSumFilter(int srcType, int sumType, int ksize, int anchor)
{
const ocl::Device & dev = ocl::Device::getDefault();
int type = _src.type(), sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), esz = CV_ELEM_SIZE(type);
bool doubleSupport = dev.doubleFPConfig() > 0;
if (ddepth < 0)
ddepth = sdepth;
if (cn > 4 || (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F)) ||
_src.offset() % esz != 0 || _src.step() % esz != 0)
return false;
if (anchor.x < 0)
anchor.x = ksize.width / 2;
if (anchor.y < 0)
anchor.y = ksize.height / 2;
int computeUnits = ocl::Device::getDefault().maxComputeUnits();
float alpha = 1.0f / (ksize.height * ksize.width);
Size size = _src.size(), wholeSize;
bool isolated = (borderType & BORDER_ISOLATED) != 0;
borderType &= ~BORDER_ISOLATED;
int wdepth = std::max(CV_32F, std::max(ddepth, sdepth)),
wtype = CV_MAKE_TYPE(wdepth, cn), dtype = CV_MAKE_TYPE(ddepth, cn);
const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", 0, "BORDER_REFLECT_101" };
size_t globalsize[2] = { (size_t)size.width, (size_t)size.height };
size_t localsize_general[2] = { 0, 1 }, * localsize = NULL;
UMat src = _src.getUMat();
if (!isolated)
{
Point ofs;
src.locateROI(wholeSize, ofs);
}
int h = isolated ? size.height : wholeSize.height;
int w = isolated ? size.width : wholeSize.width;
size_t maxWorkItemSizes[32];
ocl::Device::getDefault().maxWorkItemSizes(maxWorkItemSizes);
int tryWorkItems = (int)maxWorkItemSizes[0];
ocl::Kernel kernel;
if (dev.isIntel() && !(dev.type() & ocl::Device::TYPE_CPU) &&
((ksize.width < 5 && ksize.height < 5 && esz <= 4) ||
(ksize.width == 5 && ksize.height == 5 && cn == 1)))
{
if (w < ksize.width || h < ksize.height)
return false;
// Figure out what vector size to use for loading the pixels.
int pxLoadNumPixels = cn != 1 || size.width % 4 ? 1 : 4;
int pxLoadVecSize = cn * pxLoadNumPixels;
// Figure out how many pixels per work item to compute in X and Y
// directions. Too many and we run out of registers.
int pxPerWorkItemX = 1, pxPerWorkItemY = 1;
if (cn <= 2 && ksize.width <= 4 && ksize.height <= 4)
{
pxPerWorkItemX = size.width % 8 ? size.width % 4 ? size.width % 2 ? 1 : 2 : 4 : 8;
pxPerWorkItemY = size.height % 2 ? 1 : 2;
}
else if (cn < 4 || (ksize.width <= 4 && ksize.height <= 4))
{
pxPerWorkItemX = size.width % 2 ? 1 : 2;
pxPerWorkItemY = size.height % 2 ? 1 : 2;
}
globalsize[0] = size.width / pxPerWorkItemX;
globalsize[1] = size.height / pxPerWorkItemY;
// Need some padding in the private array for pixels
int privDataWidth = roundUp(pxPerWorkItemX + ksize.width - 1, pxLoadNumPixels);
// Make the global size a nice round number so the runtime can pick
// from reasonable choices for the workgroup size
const int wgRound = 256;
globalsize[0] = roundUp(globalsize[0], wgRound);
char build_options[1024], cvt[2][40];
sprintf(build_options, "-D cn=%d "
"-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d "
"-D PX_LOAD_VEC_SIZE=%d -D PX_LOAD_NUM_PX=%d "
"-D PX_PER_WI_X=%d -D PX_PER_WI_Y=%d -D PRIV_DATA_WIDTH=%d -D %s -D %s "
"-D PX_LOAD_X_ITERATIONS=%d -D PX_LOAD_Y_ITERATIONS=%d "
"-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D WT=%s -D WT1=%s "
"-D convertToWT=%s -D convertToDstT=%s%s%s -D PX_LOAD_FLOAT_VEC_CONV=convert_%s -D OP_BOX_FILTER",
cn, anchor.x, anchor.y, ksize.width, ksize.height,
pxLoadVecSize, pxLoadNumPixels,
pxPerWorkItemX, pxPerWorkItemY, privDataWidth, borderMap[borderType],
isolated ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED",
privDataWidth / pxLoadNumPixels, pxPerWorkItemY + ksize.height - 1,
ocl::typeToStr(type), ocl::typeToStr(sdepth), ocl::typeToStr(dtype),
ocl::typeToStr(ddepth), ocl::typeToStr(wtype), ocl::typeToStr(wdepth),
ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]),
ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1]),
normalize ? " -D NORMALIZE" : "", sqr ? " -D SQR" : "",
ocl::typeToStr(CV_MAKE_TYPE(wdepth, pxLoadVecSize)) //PX_LOAD_FLOAT_VEC_CONV
);
if (!kernel.create("filterSmall", cv::ocl::imgproc::filterSmall_oclsrc, build_options))
return false;
}
else
{
localsize = localsize_general;
for ( ; ; )
{
int BLOCK_SIZE_X = tryWorkItems, BLOCK_SIZE_Y = std::min(ksize.height * 10, size.height);
while (BLOCK_SIZE_X > 32 && BLOCK_SIZE_X >= ksize.width * 2 && BLOCK_SIZE_X > size.width * 2)
BLOCK_SIZE_X /= 2;
while (BLOCK_SIZE_Y < BLOCK_SIZE_X / 8 && BLOCK_SIZE_Y * computeUnits * 32 < size.height)
BLOCK_SIZE_Y *= 2;
if (ksize.width > BLOCK_SIZE_X || w < ksize.width || h < ksize.height)
return false;
char cvt[2][50];
String opts = format("-D LOCAL_SIZE_X=%d -D BLOCK_SIZE_Y=%d -D ST=%s -D DT=%s -D WT=%s -D convertToDT=%s -D convertToWT=%s"
" -D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d -D %s%s%s%s%s"
" -D ST1=%s -D DT1=%s -D cn=%d",
BLOCK_SIZE_X, BLOCK_SIZE_Y, ocl::typeToStr(type), ocl::typeToStr(CV_MAKE_TYPE(ddepth, cn)),
ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)),
ocl::convertTypeStr(wdepth, ddepth, cn, cvt[0]),
ocl::convertTypeStr(sdepth, wdepth, cn, cvt[1]),
anchor.x, anchor.y, ksize.width, ksize.height, borderMap[borderType],
isolated ? " -D BORDER_ISOLATED" : "", doubleSupport ? " -D DOUBLE_SUPPORT" : "",
normalize ? " -D NORMALIZE" : "", sqr ? " -D SQR" : "",
ocl::typeToStr(sdepth), ocl::typeToStr(ddepth), cn);
localsize[0] = BLOCK_SIZE_X;
globalsize[0] = divUp(size.width, BLOCK_SIZE_X - (ksize.width - 1)) * BLOCK_SIZE_X;
globalsize[1] = divUp(size.height, BLOCK_SIZE_Y);
kernel.create("boxFilter", cv::ocl::imgproc::boxFilter_oclsrc, opts);
if (kernel.empty())
return false;
size_t kernelWorkGroupSize = kernel.workGroupSize();
if (localsize[0] <= kernelWorkGroupSize)
break;
if (BLOCK_SIZE_X < (int)kernelWorkGroupSize)
return false;
tryWorkItems = (int)kernelWorkGroupSize;
}
}
_dst.create(size, CV_MAKETYPE(ddepth, cn));
UMat dst = _dst.getUMat();
int idxArg = kernel.set(0, ocl::KernelArg::PtrReadOnly(src));
idxArg = kernel.set(idxArg, (int)src.step);
int srcOffsetX = (int)((src.offset % src.step) / src.elemSize());
int srcOffsetY = (int)(src.offset / src.step);
int srcEndX = isolated ? srcOffsetX + size.width : wholeSize.width;
int srcEndY = isolated ? srcOffsetY + size.height : wholeSize.height;
idxArg = kernel.set(idxArg, srcOffsetX);
idxArg = kernel.set(idxArg, srcOffsetY);
idxArg = kernel.set(idxArg, srcEndX);
idxArg = kernel.set(idxArg, srcEndY);
idxArg = kernel.set(idxArg, ocl::KernelArg::WriteOnly(dst));
if (normalize)
idxArg = kernel.set(idxArg, (float)alpha);
return kernel.run(2, globalsize, localsize, false);
}
#endif
}
CV_INSTRUMENT_REGION();
cv::Ptr<cv::BaseRowFilter> cv::getRowSumFilter(int srcType, int sumType, int ksize, int anchor)
{
int sdepth = CV_MAT_DEPTH(srcType), ddepth = CV_MAT_DEPTH(sumType);
CV_Assert( CV_MAT_CN(sumType) == CV_MAT_CN(srcType) );
@ -1434,9 +1206,10 @@ cv::Ptr<cv::BaseRowFilter> cv::getRowSumFilter(int srcType, int sumType, int ksi
}
cv::Ptr<cv::BaseColumnFilter> cv::getColumnSumFilter(int sumType, int dstType, int ksize,
int anchor, double scale)
Ptr<BaseColumnFilter> getColumnSumFilter(int sumType, int dstType, int ksize, int anchor, double scale)
{
CV_INSTRUMENT_REGION();
int sdepth = CV_MAT_DEPTH(sumType), ddepth = CV_MAT_DEPTH(dstType);
CV_Assert( CV_MAT_CN(sumType) == CV_MAT_CN(dstType) );
@ -1474,9 +1247,11 @@ cv::Ptr<cv::BaseColumnFilter> cv::getColumnSumFilter(int sumType, int dstType, i
}
cv::Ptr<cv::FilterEngine> cv::createBoxFilter( int srcType, int dstType, Size ksize,
Point anchor, bool normalize, int borderType )
Ptr<FilterEngine> createBoxFilter(int srcType, int dstType, Size ksize,
Point anchor, bool normalize, int borderType)
{
CV_INSTRUMENT_REGION();
int sdepth = CV_MAT_DEPTH(srcType);
int cn = CV_MAT_CN(srcType), sumType = CV_64F;
if( sdepth == CV_8U && CV_MAT_DEPTH(dstType) == CV_8U &&
@ -1496,199 +1271,12 @@ cv::Ptr<cv::FilterEngine> cv::createBoxFilter( int srcType, int dstType, Size ks
srcType, dstType, sumType, borderType );
}
#ifdef HAVE_OPENVX
namespace cv
{
namespace ovx {
template <> inline bool skipSmallImages<VX_KERNEL_BOX_3x3>(int w, int h) { return w*h < 640 * 480; }
}
static bool openvx_boxfilter(InputArray _src, OutputArray _dst, int ddepth,
Size ksize, Point anchor,
bool normalize, int borderType)
{
if (ddepth < 0)
ddepth = CV_8UC1;
if (_src.type() != CV_8UC1 || ddepth != CV_8U || !normalize ||
_src.cols() < 3 || _src.rows() < 3 ||
ksize.width != 3 || ksize.height != 3 ||
(anchor.x >= 0 && anchor.x != 1) ||
(anchor.y >= 0 && anchor.y != 1) ||
ovx::skipSmallImages<VX_KERNEL_BOX_3x3>(_src.cols(), _src.rows()))
return false;
Mat src = _src.getMat();
if ((borderType & BORDER_ISOLATED) == 0 && src.isSubmatrix())
return false; //Process isolated borders only
vx_enum border;
switch (borderType & ~BORDER_ISOLATED)
{
case BORDER_CONSTANT:
border = VX_BORDER_CONSTANT;
break;
case BORDER_REPLICATE:
border = VX_BORDER_REPLICATE;
break;
default:
return false;
}
_dst.create(src.size(), CV_8UC1);
Mat dst = _dst.getMat();
try
{
ivx::Context ctx = ovx::getOpenVXContext();
Mat a;
if (dst.data != src.data)
a = src;
else
src.copyTo(a);
ivx::Image
ia = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8,
ivx::Image::createAddressing(a.cols, a.rows, 1, (vx_int32)(a.step)), a.data),
ib = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8,
ivx::Image::createAddressing(dst.cols, dst.rows, 1, (vx_int32)(dst.step)), dst.data);
//ATTENTION: VX_CONTEXT_IMMEDIATE_BORDER attribute change could lead to strange issues in multi-threaded environments
//since OpenVX standard says nothing about thread-safety for now
ivx::border_t prevBorder = ctx.immediateBorder();
ctx.setImmediateBorder(border, (vx_uint8)(0));
ivx::IVX_CHECK_STATUS(vxuBox3x3(ctx, ia, ib));
ctx.setImmediateBorder(prevBorder);
}
catch (const ivx::RuntimeError & e)
{
VX_DbgThrow(e.what());
}
catch (const ivx::WrapperError & e)
{
VX_DbgThrow(e.what());
}
return true;
}
}
#endif
#if defined(HAVE_IPP)
namespace cv
{
static bool ipp_boxfilter(Mat &src, Mat &dst, Size ksize, Point anchor, bool normalize, int borderType)
{
#ifdef HAVE_IPP_IW
CV_INSTRUMENT_REGION_IPP();
#if IPP_VERSION_X100 < 201801
// Problem with SSE42 optimization for 16s and some 8u modes
if(ipp::getIppTopFeatures() == ippCPUID_SSE42 && (((src.depth() == CV_16S || src.depth() == CV_16U) && (src.channels() == 3 || src.channels() == 4)) || (src.depth() == CV_8U && src.channels() == 3 && (ksize.width > 5 || ksize.height > 5))))
return false;
// Other optimizations has some degradations too
if((((src.depth() == CV_16S || src.depth() == CV_16U) && (src.channels() == 4)) || (src.depth() == CV_8U && src.channels() == 1 && (ksize.width > 5 || ksize.height > 5))))
return false;
#endif
if(!normalize)
return false;
if(!ippiCheckAnchor(anchor, ksize))
return false;
try
{
::ipp::IwiImage iwSrc = ippiGetImage(src);
::ipp::IwiImage iwDst = ippiGetImage(dst);
::ipp::IwiSize iwKSize = ippiGetSize(ksize);
::ipp::IwiBorderSize borderSize(iwKSize);
::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize));
if(!ippBorder)
return false;
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBox, iwSrc, iwDst, iwKSize, ::ipp::IwDefault(), ippBorder);
}
catch (const ::ipp::IwException &)
{
return false;
}
return true;
#else
CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(ksize); CV_UNUSED(anchor); CV_UNUSED(normalize); CV_UNUSED(borderType);
return false;
#endif
}
}
#endif
void cv::boxFilter( InputArray _src, OutputArray _dst, int ddepth,
Size ksize, Point anchor,
bool normalize, int borderType )
{
CV_INSTRUMENT_REGION();
CV_OCL_RUN(_dst.isUMat() &&
(borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT ||
borderType == BORDER_REFLECT || borderType == BORDER_REFLECT_101),
ocl_boxFilter3x3_8UC1(_src, _dst, ddepth, ksize, anchor, borderType, normalize))
CV_OCL_RUN(_dst.isUMat(), ocl_boxFilter(_src, _dst, ddepth, ksize, anchor, borderType, normalize))
Mat src = _src.getMat();
int stype = src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype);
if( ddepth < 0 )
ddepth = sdepth;
_dst.create( src.size(), CV_MAKETYPE(ddepth, cn) );
Mat dst = _dst.getMat();
if( borderType != BORDER_CONSTANT && normalize && (borderType & BORDER_ISOLATED) != 0 )
{
if( src.rows == 1 )
ksize.height = 1;
if( src.cols == 1 )
ksize.width = 1;
}
Point ofs;
Size wsz(src.cols, src.rows);
if(!(borderType&BORDER_ISOLATED))
src.locateROI( wsz, ofs );
CALL_HAL(boxFilter, cv_hal_boxFilter, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, ddepth, cn,
ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, ksize.width, ksize.height,
anchor.x, anchor.y, normalize, borderType&~BORDER_ISOLATED);
CV_OVX_RUN(true,
openvx_boxfilter(src, dst, ddepth, ksize, anchor, normalize, borderType))
CV_IPP_RUN_FAST(ipp_boxfilter(src, dst, ksize, anchor, normalize, borderType));
borderType = (borderType&~BORDER_ISOLATED);
Ptr<FilterEngine> f = createBoxFilter( src.type(), dst.type(),
ksize, anchor, normalize, borderType );
f->apply( src, dst, wsz, ofs );
}
void cv::blur( InputArray src, OutputArray dst,
Size ksize, Point anchor, int borderType )
{
CV_INSTRUMENT_REGION();
boxFilter( src, dst, -1, ksize, anchor, true, borderType );
}
/****************************************************************************************\
Squared Box Filter
\****************************************************************************************/
namespace cv
{
namespace {
template<typename T, typename ST>
struct SqrRowSum :
@ -1703,6 +1291,8 @@ struct SqrRowSum :
virtual void operator()(const uchar* src, uchar* dst, int width, int cn) CV_OVERRIDE
{
CV_INSTRUMENT_REGION();
const T* S = (const T*)src;
ST* D = (ST*)dst;
int i = 0, k, ksz_cn = ksize*cn;
@ -1727,7 +1317,9 @@ struct SqrRowSum :
}
};
static Ptr<BaseRowFilter> getSqrRowSumFilter(int srcType, int sumType, int ksize, int anchor)
} // namespace anon
Ptr<BaseRowFilter> getSqrRowSumFilter(int srcType, int sumType, int ksize, int anchor)
{
int sdepth = CV_MAT_DEPTH(srcType), ddepth = CV_MAT_DEPTH(sumType);
CV_Assert( CV_MAT_CN(sumType) == CV_MAT_CN(srcType) );
@ -1753,52 +1345,6 @@ static Ptr<BaseRowFilter> getSqrRowSumFilter(int srcType, int sumType, int ksize
srcType, sumType));
}
}
void cv::sqrBoxFilter( InputArray _src, OutputArray _dst, int ddepth,
Size ksize, Point anchor,
bool normalize, int borderType )
{
CV_INSTRUMENT_REGION();
int srcType = _src.type(), sdepth = CV_MAT_DEPTH(srcType), cn = CV_MAT_CN(srcType);
Size size = _src.size();
if( ddepth < 0 )
ddepth = sdepth < CV_32F ? CV_32F : CV_64F;
if( borderType != BORDER_CONSTANT && normalize )
{
if( size.height == 1 )
ksize.height = 1;
if( size.width == 1 )
ksize.width = 1;
}
CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2,
ocl_boxFilter(_src, _dst, ddepth, ksize, anchor, borderType, normalize, true))
int sumDepth = CV_64F;
if( sdepth == CV_8U )
sumDepth = CV_32S;
int sumType = CV_MAKETYPE( sumDepth, cn ), dstType = CV_MAKETYPE(ddepth, cn);
Mat src = _src.getMat();
_dst.create( size, dstType );
Mat dst = _dst.getMat();
Ptr<BaseRowFilter> rowFilter = getSqrRowSumFilter(srcType, sumType, ksize.width, anchor.x );
Ptr<BaseColumnFilter> columnFilter = getColumnSumFilter(sumType,
dstType, ksize.height, anchor.y,
normalize ? 1./(ksize.width*ksize.height) : 1);
Ptr<FilterEngine> f = makePtr<FilterEngine>(Ptr<BaseFilter>(), rowFilter, columnFilter,
srcType, dstType, sumType, borderType );
Point ofs;
Size wsz(src.cols, src.rows);
src.locateROI( wsz, ofs );
f->apply( src, dst, wsz, ofs );
}
/* End of file. */
#endif
CV_CPU_OPTIMIZATION_NAMESPACE_END
} // namespace

Loading…
Cancel
Save