From 6fa41c5a6438e1391c6005ef0aa8154ce40a94a0 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov <ilya.lavrenov@itseez.com> Date: Thu, 30 Jan 2014 00:12:59 +0400 Subject: [PATCH 1/5] some experiments --- modules/imgproc/src/filter.cpp | 62 ++++++++++++++++++++-- modules/imgproc/src/opencl/filterSepCol.cl | 4 +- modules/imgproc/src/opencl/filterSepRow.cl | 14 +++-- 3 files changed, 66 insertions(+), 14 deletions(-) diff --git a/modules/imgproc/src/filter.cpp b/modules/imgproc/src/filter.cpp index 8c11c62dba..6b767329dc 100644 --- a/modules/imgproc/src/filter.cpp +++ b/modules/imgproc/src/filter.cpp @@ -3313,6 +3313,56 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth, return kernel.run(2, globalsize, localsize, true); } +template <typename T> +static std::string kerToStr(const Mat & k) +{ + int width = k.cols - 1, depth = k.depth(); + const T * const data = reinterpret_cast<const T *>(k.data); + + std::ostringstream stream; + stream.precision(10); + + if (depth <= CV_8S) + { + for (int i = 0; i < width; ++i) + stream << (int)data[i] << ","; + stream << (int)data[width]; + } + else if (depth == CV_32F) + { + for (int i = 0; i < width; ++i) + stream << data[i] << "f,"; + stream << data[width] << "f"; + } + else + { + for (int i = 0; i < width; ++i) + stream << data[i] << ","; + } + + return stream.str(); +} + +static String kernelToStr(InputArray _kernel, int ddepth = -1) +{ + Mat kernel = _kernel.getMat().reshape(1, 1); + + int depth = kernel.depth(); + if (ddepth < 0) + ddepth = depth; + + if (ddepth != depth) + kernel.convertTo(kernel, ddepth); + + typedef std::string (*func_t)(const Mat &); + static const func_t funcs[] = { kerToStr<uchar>, kerToStr<char>, kerToStr<ushort>,kerToStr<short>, + kerToStr<int>, kerToStr<float>, kerToStr<double>, 0 }; + const func_t func = funcs[depth]; + CV_Assert(func != 0); + + return cv::format(" -D COEFF=%s", func(kernel).c_str()); +} + static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor, int borderType, bool sync) { int type = src.type(); @@ -3378,6 +3428,7 @@ static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor, btype, extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION", isIsolatedBorder ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED"); + build_options += kernelToStr(kernelX, CV_32F); Size srcWholeSize; Point srcOffset; src.locateROI(srcWholeSize, srcOffset); @@ -3390,7 +3441,8 @@ static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor, strKernel << "_D" << sdepth; ocl::Kernel kernelRow; - if (!kernelRow.create(strKernel.str().c_str(), cv::ocl::imgproc::filterSepRow_oclsrc, build_options)) + if (!kernelRow.create(strKernel.str().c_str(), cv::ocl::imgproc::filterSepRow_oclsrc, + build_options)) return false; int idxArg = 0; @@ -3409,7 +3461,7 @@ static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor, idxArg = kernelRow.set(idxArg, buf.cols); idxArg = kernelRow.set(idxArg, buf.rows); idxArg = kernelRow.set(idxArg, radiusY); - idxArg = kernelRow.set(idxArg, ocl::KernelArg::PtrReadOnly(kernelX.getUMat(ACCESS_READ))); +// idxArg = kernelRow.set(idxArg, ocl::KernelArg::PtrReadOnly(kernelX.getUMat(ACCESS_READ))); return kernelRow.run(2, globalsize, localsize, sync); } @@ -3479,6 +3531,8 @@ static bool ocl_sepColFilter2D(UMat &buf, UMat &dst, Mat &kernelY, int anchor, b } } + build_options += kernelToStr(kernelY, CV_32F); + ocl::Kernel kernelCol; if (!kernelCol.create("col_filter", cv::ocl::imgproc::filterSepCol_oclsrc, build_options)) return false; @@ -3494,7 +3548,7 @@ static bool ocl_sepColFilter2D(UMat &buf, UMat &dst, Mat &kernelY, int anchor, b idxArg = kernelCol.set(idxArg, (int)(dst.step / dst.elemSize())); idxArg = kernelCol.set(idxArg, dst.cols); idxArg = kernelCol.set(idxArg, dst.rows); - idxArg = kernelCol.set(idxArg, ocl::KernelArg::PtrReadOnly(kernelY.getUMat(ACCESS_READ))); +// idxArg = kernelCol.set(idxArg, ocl::KernelArg::PtrReadOnly(kernelY.getUMat(ACCESS_READ))); return kernelCol.run(2, globalsize, localsize, sync); } @@ -3508,7 +3562,7 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth, int type = _src.type(); if ( !( (CV_8UC1 == type || CV_8UC4 == type || CV_32FC1 == type || CV_32FC4 == type) && - (ddepth == CV_32F || ddepth == CV_8U) ) ) + (ddepth == CV_32F || ddepth == CV_8U || ddepth < 0) ) ) return false; int cn = CV_MAT_CN(type); diff --git a/modules/imgproc/src/opencl/filterSepCol.cl b/modules/imgproc/src/opencl/filterSepCol.cl index e99fa6ee03..721eb90097 100644 --- a/modules/imgproc/src/opencl/filterSepCol.cl +++ b/modules/imgproc/src/opencl/filterSepCol.cl @@ -60,6 +60,7 @@ Niko The info above maybe obsolete. ***********************************************************************************/ +__constant float mat_kernel[] = { COEFF }; __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void col_filter (__global const GENTYPE_SRC * restrict src, @@ -70,8 +71,7 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void col_filter const int dst_offset_in_pixel, const int dst_step_in_pixel, const int dst_cols, - const int dst_rows, - __constant float * mat_kernel) + const int dst_rows) { int x = get_global_id(0); int y = get_global_id(1); diff --git a/modules/imgproc/src/opencl/filterSepRow.cl b/modules/imgproc/src/opencl/filterSepRow.cl index dfbf300999..efb082e3e4 100644 --- a/modules/imgproc/src/opencl/filterSepRow.cl +++ b/modules/imgproc/src/opencl/filterSepRow.cl @@ -144,6 +144,8 @@ Niko The info above maybe obsolete. ***********************************************************************************/ +__constant float mat_kernel[] = { COEFF }; + __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_C1_D0 (__global uchar * restrict src, int src_step_in_pixel, @@ -153,8 +155,7 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_ __global float * dst, int dst_step_in_pixel, int dst_cols, int dst_rows, - int radiusy, - __constant float * mat_kernel) + int radiusy) { int x = get_global_id(0)<<2; int y = get_global_id(1); @@ -297,8 +298,7 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_ __global float4 * dst, int dst_step_in_pixel, int dst_cols, int dst_rows, - int radiusy, - __constant float * mat_kernel) + int radiusy) { int x = get_global_id(0); int y = get_global_id(1); @@ -391,8 +391,7 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_ __global float * dst, int dst_step_in_pixel, int dst_cols, int dst_rows, - int radiusy, - __constant float * mat_kernel) + int radiusy) { int x = get_global_id(0); int y = get_global_id(1); @@ -484,8 +483,7 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_ __global float4 * dst, int dst_step_in_pixel, int dst_cols, int dst_rows, - int radiusy, - __constant float * mat_kernel) + int radiusy) { int x = get_global_id(0); int y = get_global_id(1); From 6d64907f18a3bbfc91404f22ae1964ba1a9244ae Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov <ilya.lavrenov@itseez.com> Date: Thu, 30 Jan 2014 00:19:18 +0400 Subject: [PATCH 2/5] async --- modules/core/src/ocl.cpp | 2 +- modules/imgproc/src/filter.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index 7201fca714..66dbcbbab9 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -3299,7 +3299,7 @@ public: CV_Assert(u->handle != 0 && u->urefcount == 0); if(u->tempUMat()) { - UMatDataAutoLock lock(u); +// UMatDataAutoLock lock(u); if( u->hostCopyObsolete() && u->refcount > 0 ) { cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); diff --git a/modules/imgproc/src/filter.cpp b/modules/imgproc/src/filter.cpp index 6b767329dc..9a4a085d73 100644 --- a/modules/imgproc/src/filter.cpp +++ b/modules/imgproc/src/filter.cpp @@ -3595,12 +3595,12 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth, Size srcSize = src.size(); Size bufSize(srcSize.width, srcSize.height + kernelY.cols - 1); UMat buf; buf.create(bufSize, CV_MAKETYPE(CV_32F, cn)); - if (!ocl_sepRowFilter2D(src, buf, kernelX, anchor.x, borderType, true)) + if (!ocl_sepRowFilter2D(src, buf, kernelX, anchor.x, borderType, false)) return false; _dst.create(srcSize, CV_MAKETYPE(ddepth, cn)); UMat dst = _dst.getUMat(); - return ocl_sepColFilter2D(buf, dst, kernelY, anchor.y, true); + return ocl_sepColFilter2D(buf, dst, kernelY, anchor.y, false); } #endif From 1862dbc5ab95762ecfdf914e01ba784e2674d0f1 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov <ilya.lavrenov@itseez.com> Date: Thu, 30 Jan 2014 05:48:15 +0400 Subject: [PATCH 3/5] fixed fp problem --- modules/imgproc/src/filter.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/imgproc/src/filter.cpp b/modules/imgproc/src/filter.cpp index 9a4a085d73..c3a096619e 100644 --- a/modules/imgproc/src/filter.cpp +++ b/modules/imgproc/src/filter.cpp @@ -3330,6 +3330,7 @@ static std::string kerToStr(const Mat & k) } else if (depth == CV_32F) { + stream.setf(std::ios_base::showpoint); for (int i = 0; i < width; ++i) stream << data[i] << "f,"; stream << data[width] << "f"; From 372cdac07cbd305d9d9ac94c8dcc518b98d87c61 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov <ilya.lavrenov@itseez.com> Date: Thu, 30 Jan 2014 15:23:11 +0400 Subject: [PATCH 4/5] moved kernelToStr to ocl.hpp --- modules/core/include/opencv2/core/ocl.hpp | 1 + modules/core/src/ocl.cpp | 51 ++++++++++++++++++++ modules/imgproc/src/filter.cpp | 57 +---------------------- 3 files changed, 54 insertions(+), 55 deletions(-) diff --git a/modules/core/include/opencv2/core/ocl.hpp b/modules/core/include/opencv2/core/ocl.hpp index 850a2e60ea..4d63e3f001 100644 --- a/modules/core/include/opencv2/core/ocl.hpp +++ b/modules/core/include/opencv2/core/ocl.hpp @@ -575,6 +575,7 @@ protected: CV_EXPORTS const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf); CV_EXPORTS const char* typeToStr(int t); CV_EXPORTS const char* memopTypeToStr(int t); +CV_EXPORTS String kernelToStr(InputArray _kernel, int ddepth = -1); CV_EXPORTS void getPlatfomsInfo(std::vector<PlatformInfo2>& platform_info); class CV_EXPORTS Image2D diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index 66dbcbbab9..03abc061b1 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -3814,6 +3814,57 @@ const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf) return buf; } +template <typename T> +static std::string kerToStr(const Mat & k) +{ + int width = k.cols - 1, depth = k.depth(); + const T * const data = reinterpret_cast<const T *>(k.data); + + std::ostringstream stream; + stream.precision(10); + + if (depth <= CV_8S) + { + for (int i = 0; i < width; ++i) + stream << (int)data[i] << ", "; + stream << (int)data[width]; + } + else if (depth == CV_32F) + { + stream.setf(std::ios_base::showpoint); + for (int i = 0; i < width; ++i) + stream << data[i] << "f, "; + stream << data[width] << "f"; + } + else + { + for (int i = 0; i < width; ++i) + stream << data[i] << ", "; + } + + return stream.str(); +} + +String kernelToStr(InputArray _kernel, int ddepth) +{ + Mat kernel = _kernel.getMat().reshape(1, 1); + + int depth = kernel.depth(); + if (ddepth < 0) + ddepth = depth; + + if (ddepth != depth) + kernel.convertTo(kernel, ddepth); + + typedef std::string (*func_t)(const Mat &); + static const func_t funcs[] = { kerToStr<uchar>, kerToStr<char>, kerToStr<ushort>,kerToStr<short>, + kerToStr<int>, kerToStr<float>, kerToStr<double>, 0 }; + const func_t func = funcs[depth]; + CV_Assert(func != 0); + + return cv::format(" -D COEFF=%s", func(kernel).c_str()); +} + /////////////////////////////////////////////////////////////////////////////////////////////// // deviceVersion has format // OpenCL<space><major_version.minor_version><space><vendor-specific information> diff --git a/modules/imgproc/src/filter.cpp b/modules/imgproc/src/filter.cpp index c3a096619e..09519e74d8 100644 --- a/modules/imgproc/src/filter.cpp +++ b/modules/imgproc/src/filter.cpp @@ -3313,57 +3313,6 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth, return kernel.run(2, globalsize, localsize, true); } -template <typename T> -static std::string kerToStr(const Mat & k) -{ - int width = k.cols - 1, depth = k.depth(); - const T * const data = reinterpret_cast<const T *>(k.data); - - std::ostringstream stream; - stream.precision(10); - - if (depth <= CV_8S) - { - for (int i = 0; i < width; ++i) - stream << (int)data[i] << ","; - stream << (int)data[width]; - } - else if (depth == CV_32F) - { - stream.setf(std::ios_base::showpoint); - for (int i = 0; i < width; ++i) - stream << data[i] << "f,"; - stream << data[width] << "f"; - } - else - { - for (int i = 0; i < width; ++i) - stream << data[i] << ","; - } - - return stream.str(); -} - -static String kernelToStr(InputArray _kernel, int ddepth = -1) -{ - Mat kernel = _kernel.getMat().reshape(1, 1); - - int depth = kernel.depth(); - if (ddepth < 0) - ddepth = depth; - - if (ddepth != depth) - kernel.convertTo(kernel, ddepth); - - typedef std::string (*func_t)(const Mat &); - static const func_t funcs[] = { kerToStr<uchar>, kerToStr<char>, kerToStr<ushort>,kerToStr<short>, - kerToStr<int>, kerToStr<float>, kerToStr<double>, 0 }; - const func_t func = funcs[depth]; - CV_Assert(func != 0); - - return cv::format(" -D COEFF=%s", func(kernel).c_str()); -} - static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor, int borderType, bool sync) { int type = src.type(); @@ -3429,7 +3378,7 @@ static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor, btype, extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION", isIsolatedBorder ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED"); - build_options += kernelToStr(kernelX, CV_32F); + build_options += ocl::kernelToStr(kernelX, CV_32F); Size srcWholeSize; Point srcOffset; src.locateROI(srcWholeSize, srcOffset); @@ -3462,7 +3411,6 @@ static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor, idxArg = kernelRow.set(idxArg, buf.cols); idxArg = kernelRow.set(idxArg, buf.rows); idxArg = kernelRow.set(idxArg, radiusY); -// idxArg = kernelRow.set(idxArg, ocl::KernelArg::PtrReadOnly(kernelX.getUMat(ACCESS_READ))); return kernelRow.run(2, globalsize, localsize, sync); } @@ -3532,7 +3480,7 @@ static bool ocl_sepColFilter2D(UMat &buf, UMat &dst, Mat &kernelY, int anchor, b } } - build_options += kernelToStr(kernelY, CV_32F); + build_options += ocl::kernelToStr(kernelY, CV_32F); ocl::Kernel kernelCol; if (!kernelCol.create("col_filter", cv::ocl::imgproc::filterSepCol_oclsrc, build_options)) @@ -3549,7 +3497,6 @@ static bool ocl_sepColFilter2D(UMat &buf, UMat &dst, Mat &kernelY, int anchor, b idxArg = kernelCol.set(idxArg, (int)(dst.step / dst.elemSize())); idxArg = kernelCol.set(idxArg, dst.cols); idxArg = kernelCol.set(idxArg, dst.rows); -// idxArg = kernelCol.set(idxArg, ocl::KernelArg::PtrReadOnly(kernelY.getUMat(ACCESS_READ))); return kernelCol.run(2, globalsize, localsize, sync); } From 1ab1594d00be987d18bc6a62151a93428a7b919b Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov <ilya.lavrenov@itseez.com> Date: Thu, 30 Jan 2014 17:23:56 +0400 Subject: [PATCH 5/5] workaround --- modules/core/src/ocl.cpp | 11 ++++++----- modules/imgproc/src/opencl/filterSepCol.cl | 1 + modules/imgproc/src/opencl/filterSepRow.cl | 1 + 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index 03abc061b1..7334670f5c 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -3826,20 +3826,21 @@ static std::string kerToStr(const Mat & k) if (depth <= CV_8S) { for (int i = 0; i < width; ++i) - stream << (int)data[i] << ", "; - stream << (int)data[width]; + stream << "DIG(" << (int)data[i] << ")"; + stream << "DIG(" << (int)data[width] << ")"; } else if (depth == CV_32F) { stream.setf(std::ios_base::showpoint); for (int i = 0; i < width; ++i) - stream << data[i] << "f, "; - stream << data[width] << "f"; + stream << "DIG(" << data[i] << "f)"; + stream << "DIG(" << data[width] << "f)"; } else { for (int i = 0; i < width; ++i) - stream << data[i] << ", "; + stream << "DIG(" << data[i] << ")"; + stream << "DIG(" << data[width] << ")"; } return stream.str(); diff --git a/modules/imgproc/src/opencl/filterSepCol.cl b/modules/imgproc/src/opencl/filterSepCol.cl index 721eb90097..2657ae9312 100644 --- a/modules/imgproc/src/opencl/filterSepCol.cl +++ b/modules/imgproc/src/opencl/filterSepCol.cl @@ -60,6 +60,7 @@ Niko The info above maybe obsolete. ***********************************************************************************/ +#define DIG(a) a, __constant float mat_kernel[] = { COEFF }; __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void col_filter diff --git a/modules/imgproc/src/opencl/filterSepRow.cl b/modules/imgproc/src/opencl/filterSepRow.cl index efb082e3e4..d0623f5905 100644 --- a/modules/imgproc/src/opencl/filterSepRow.cl +++ b/modules/imgproc/src/opencl/filterSepRow.cl @@ -144,6 +144,7 @@ Niko The info above maybe obsolete. ***********************************************************************************/ +#define DIG(a) a, __constant float mat_kernel[] = { COEFF }; __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_C1_D0