From e1efed1914853b22e6c04f39f2fae2f4cfc4a21e Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Fri, 28 Mar 2014 17:41:19 +0400 Subject: [PATCH] added support of ksize >= 5 to cv::Laplacian --- modules/imgproc/src/deriv.cpp | 80 +++++++++++++++++++---- modules/imgproc/src/opencl/laplacian5.cl | 34 ++++++++++ modules/imgproc/test/ocl/test_filters.cpp | 2 +- 3 files changed, 103 insertions(+), 13 deletions(-) create mode 100644 modules/imgproc/src/opencl/laplacian5.cl diff --git a/modules/imgproc/src/deriv.cpp b/modules/imgproc/src/deriv.cpp index 31a8b1b939..df2f371f55 100644 --- a/modules/imgproc/src/deriv.cpp +++ b/modules/imgproc/src/deriv.cpp @@ -11,6 +11,7 @@ // For Open Source Computer Vision Library // // Copyright (C) 2000, Intel Corporation, all rights reserved. +// Copyright (C) 2014, Itseez, Inc, all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, @@ -40,6 +41,8 @@ //M*/ #include "precomp.hpp" +#include "opencl_kernels.hpp" + #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) static IppStatus sts = ippInit(); #endif @@ -495,6 +498,58 @@ void cv::Scharr( InputArray _src, OutputArray _dst, int ddepth, int dx, int dy, sepFilter2D( _src, _dst, ddepth, kx, ky, Point(-1, -1), delta, borderType ); } +#ifdef HAVE_OPENCL + +namespace cv { + +static bool ocl_Laplacian5(InputArray _src, OutputArray _dst, + const Mat & kd, const Mat & ks, double scale, double delta, + int borderType, int depth, int ddepth) +{ + int iscale = cvRound(scale), idelta = cvRound(delta); + bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0, + floatCoeff = std::fabs(delta - idelta) > DBL_EPSILON || std::fabs(scale - iscale) > DBL_EPSILON; + int cn = _src.channels(), wdepth = std::max(depth, floatCoeff ? CV_32F : CV_32S), kercn = 1; + + if (!doubleSupport && wdepth == CV_64F) + return false; + + char cvt[2][40]; + ocl::Kernel k("sumConvert", ocl::imgproc::laplacian5_oclsrc, + format("-D srcT=%s -D WT=%s -D dstT=%s -D coeffT=%s -D wdepth=%d " + "-D convertToWT=%s -D convertToDT=%s%s", + ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), + ocl::typeToStr(CV_MAKE_TYPE(wdepth, kercn)), + ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)), + ocl::typeToStr(wdepth), wdepth, + ocl::convertTypeStr(depth, wdepth, kercn, cvt[0]), + ocl::convertTypeStr(wdepth, ddepth, kercn, cvt[1]), + doubleSupport ? " -D DOUBLE_SUPPORT" : "")); + if (k.empty()) + return false; + + UMat d2x, d2y; + sepFilter2D(_src, d2x, depth, kd, ks, Point(-1, -1), 0, borderType); + sepFilter2D(_src, d2y, depth, ks, kd, Point(-1, -1), 0, borderType); + + UMat dst = _dst.getUMat(); + + ocl::KernelArg d2xarg = ocl::KernelArg::ReadOnlyNoSize(d2x), + d2yarg = ocl::KernelArg::ReadOnlyNoSize(d2y), + dstarg = ocl::KernelArg::WriteOnly(dst, cn, kercn); + + if (wdepth >= CV_32F) + k.args(d2xarg, d2yarg, dstarg, (float)scale, (float)delta); + else + k.args(d2xarg, d2yarg, dstarg, iscale, idelta); + + size_t globalsize[] = { dst.cols * cn / kercn, dst.rows }; + return k.run(2, globalsize, NULL, false); +} + +} + +#endif void cv::Laplacian( InputArray _src, OutputArray _dst, int ddepth, int ksize, double scale, double delta, int borderType ) @@ -531,27 +586,28 @@ void cv::Laplacian( InputArray _src, OutputArray _dst, int ddepth, int ksize, } else { - Mat src = _src.getMat(), dst = _dst.getMat(); - const size_t STRIPE_SIZE = 1 << 14; - - int depth = src.depth(); - int ktype = std::max(CV_32F, std::max(ddepth, depth)); - int wdepth = depth == CV_8U && ksize <= 5 ? CV_16S : depth <= CV_32F ? CV_32F : CV_64F; - int wtype = CV_MAKETYPE(wdepth, src.channels()); + int ktype = std::max(CV_32F, std::max(ddepth, sdepth)); + int wdepth = sdepth == CV_8U && ksize <= 5 ? CV_16S : sdepth <= CV_32F ? CV_32F : CV_64F; + int wtype = CV_MAKETYPE(wdepth, cn); Mat kd, ks; getSobelKernels( kd, ks, 2, 0, ksize, false, ktype ); - int dtype = CV_MAKETYPE(ddepth, src.channels()); - int dy0 = std::min(std::max((int)(STRIPE_SIZE/(getElemSize(src.type())*src.cols)), 1), src.rows); - Ptr fx = createSeparableLinearFilter(src.type(), + CV_OCL_RUN(_dst.isUMat(), + ocl_Laplacian5(_src, _dst, kd, ks, scale, + delta, borderType, wdepth, ddepth)) + + const size_t STRIPE_SIZE = 1 << 14; + Ptr fx = createSeparableLinearFilter(stype, wtype, kd, ks, Point(-1,-1), 0, borderType, borderType, Scalar() ); - Ptr fy = createSeparableLinearFilter(src.type(), + Ptr fy = createSeparableLinearFilter(stype, wtype, ks, kd, Point(-1,-1), 0, borderType, borderType, Scalar() ); + Mat src = _src.getMat(), dst = _dst.getMat(); int y = fx->start(src), dsty = 0, dy = 0; fy->start(src); const uchar* sptr = src.data + y*src.step; + int dy0 = std::min(std::max((int)(STRIPE_SIZE/(CV_ELEM_SIZE(stype)*src.cols)), 1), src.rows); Mat d2x( dy0 + kd.rows - 1, src.cols, wtype ); Mat d2y( dy0 + kd.rows - 1, src.cols, wtype ); @@ -564,7 +620,7 @@ void cv::Laplacian( InputArray _src, OutputArray _dst, int ddepth, int ksize, Mat dstripe = dst.rowRange(dsty, dsty + dy); d2x.rows = d2y.rows = dy; // modify the headers, which should work d2x += d2y; - d2x.convertTo( dstripe, dtype, scale, delta ); + d2x.convertTo( dstripe, ddepth, scale, delta ); } } } diff --git a/modules/imgproc/src/opencl/laplacian5.cl b/modules/imgproc/src/opencl/laplacian5.cl new file mode 100644 index 0000000000..3e15e097cb --- /dev/null +++ b/modules/imgproc/src/opencl/laplacian5.cl @@ -0,0 +1,34 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// Copyright (C) 2014, Itseez, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. + +#define noconvert + +__kernel void sumConvert(__global const uchar * src1ptr, int src1_step, int src1_offset, + __global const uchar * src2ptr, int src2_step, int src2_offset, + __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, + coeffT scale, coeffT delta) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if (y < dst_rows && x < dst_cols) + { + int src1_index = mad24(y, src1_step, mad24(x, (int)sizeof(srcT), src1_offset)); + int src2_index = mad24(y, src2_step, mad24(x, (int)sizeof(srcT), src2_offset)); + int dst_index = mad24(y, dst_step, mad24(x, (int)sizeof(dstT), dst_offset)); + + __global const srcT * src1 = (__global const srcT *)(src1ptr + src1_index); + __global const srcT * src2 = (__global const srcT *)(src2ptr + src2_index); + __global dstT * dst = (__global dstT *)(dstptr + dst_index); + +#if wdepth <= 4 + dst[0] = convertToDT( mad24((WT)(scale), convertToWT(src1[0]) + convertToWT(src2[0]), (WT)(delta)) ); +#else + dst[0] = convertToDT( mad((WT)(scale), convertToWT(src1[0]) + convertToWT(src2[0]), (WT)(delta)) ); +#endif + } +} diff --git a/modules/imgproc/test/ocl/test_filters.cpp b/modules/imgproc/test/ocl/test_filters.cpp index 09b215108e..d2f5085168 100644 --- a/modules/imgproc/test/ocl/test_filters.cpp +++ b/modules/imgproc/test/ocl/test_filters.cpp @@ -316,7 +316,7 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, Bilateral, Combine( OCL_INSTANTIATE_TEST_CASE_P(Filter, LaplacianTest, Combine( FILTER_TYPES, - Values(1, 3), // kernel size + Values(1, 3, 5), // kernel size Values(Size(0, 0)), // not used FILTER_BORDER_SET_NO_WRAP_NO_ISOLATED, Values(1.0, 0.2, 3.0), // kernel scale