From 6fa41c5a6438e1391c6005ef0aa8154ce40a94a0 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Thu, 30 Jan 2014 00:12:59 +0400
Subject: [PATCH 1/5] some experiments

---
 modules/imgproc/src/filter.cpp             | 62 ++++++++++++++++++++--
 modules/imgproc/src/opencl/filterSepCol.cl |  4 +-
 modules/imgproc/src/opencl/filterSepRow.cl | 14 +++--
 3 files changed, 66 insertions(+), 14 deletions(-)

diff --git a/modules/imgproc/src/filter.cpp b/modules/imgproc/src/filter.cpp
index 8c11c62dba..6b767329dc 100644
--- a/modules/imgproc/src/filter.cpp
+++ b/modules/imgproc/src/filter.cpp
@@ -3313,6 +3313,56 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,
     return kernel.run(2, globalsize, localsize, true);
 }
 
+template <typename T>
+static std::string kerToStr(const Mat & k)
+{
+    int width = k.cols - 1, depth = k.depth();
+    const T * const data = reinterpret_cast<const T *>(k.data);
+
+    std::ostringstream stream;
+    stream.precision(10);
+
+    if (depth <= CV_8S)
+    {
+        for (int i = 0; i < width; ++i)
+            stream << (int)data[i] << ",";
+        stream << (int)data[width];
+    }
+    else if (depth == CV_32F)
+    {
+        for (int i = 0; i < width; ++i)
+            stream << data[i] << "f,";
+        stream << data[width] << "f";
+    }
+    else
+    {
+        for (int i = 0; i < width; ++i)
+            stream << data[i] << ",";
+    }
+
+    return stream.str();
+}
+
+static String kernelToStr(InputArray _kernel, int ddepth = -1)
+{
+    Mat kernel = _kernel.getMat().reshape(1, 1);
+
+    int depth = kernel.depth();
+    if (ddepth < 0)
+        ddepth = depth;
+
+    if (ddepth != depth)
+        kernel.convertTo(kernel, ddepth);
+
+    typedef std::string (*func_t)(const Mat &);
+    static const func_t funcs[] = { kerToStr<uchar>, kerToStr<char>, kerToStr<ushort>,kerToStr<short>,
+                                    kerToStr<int>, kerToStr<float>, kerToStr<double>, 0 };
+    const func_t func = funcs[depth];
+    CV_Assert(func != 0);
+
+    return cv::format(" -D COEFF=%s", func(kernel).c_str());
+}
+
 static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor, int borderType, bool sync)
 {
     int type = src.type();
@@ -3378,6 +3428,7 @@ static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor,
         btype,
         extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION",
         isIsolatedBorder ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED");
+    build_options += kernelToStr(kernelX, CV_32F);
 
     Size srcWholeSize; Point srcOffset;
     src.locateROI(srcWholeSize, srcOffset);
@@ -3390,7 +3441,8 @@ static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor,
         strKernel << "_D" << sdepth;
 
     ocl::Kernel kernelRow;
-    if (!kernelRow.create(strKernel.str().c_str(), cv::ocl::imgproc::filterSepRow_oclsrc, build_options))
+    if (!kernelRow.create(strKernel.str().c_str(), cv::ocl::imgproc::filterSepRow_oclsrc,
+                          build_options))
         return false;
 
     int idxArg = 0;
@@ -3409,7 +3461,7 @@ static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor,
     idxArg = kernelRow.set(idxArg, buf.cols);
     idxArg = kernelRow.set(idxArg, buf.rows);
     idxArg = kernelRow.set(idxArg, radiusY);
-    idxArg = kernelRow.set(idxArg, ocl::KernelArg::PtrReadOnly(kernelX.getUMat(ACCESS_READ)));
+//    idxArg = kernelRow.set(idxArg, ocl::KernelArg::PtrReadOnly(kernelX.getUMat(ACCESS_READ)));
 
     return kernelRow.run(2, globalsize, localsize, sync);
 }
@@ -3479,6 +3531,8 @@ static bool ocl_sepColFilter2D(UMat &buf, UMat &dst, Mat &kernelY, int anchor, b
         }
     }
 
+    build_options += kernelToStr(kernelY, CV_32F);
+
     ocl::Kernel kernelCol;
     if (!kernelCol.create("col_filter", cv::ocl::imgproc::filterSepCol_oclsrc, build_options))
         return false;
@@ -3494,7 +3548,7 @@ static bool ocl_sepColFilter2D(UMat &buf, UMat &dst, Mat &kernelY, int anchor, b
     idxArg = kernelCol.set(idxArg, (int)(dst.step / dst.elemSize()));
     idxArg = kernelCol.set(idxArg, dst.cols);
     idxArg = kernelCol.set(idxArg, dst.rows);
-    idxArg = kernelCol.set(idxArg, ocl::KernelArg::PtrReadOnly(kernelY.getUMat(ACCESS_READ)));
+//    idxArg = kernelCol.set(idxArg, ocl::KernelArg::PtrReadOnly(kernelY.getUMat(ACCESS_READ)));
 
     return kernelCol.run(2, globalsize, localsize, sync);
 }
@@ -3508,7 +3562,7 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
 
     int type = _src.type();
     if ( !( (CV_8UC1 == type || CV_8UC4 == type || CV_32FC1 == type || CV_32FC4 == type) &&
-            (ddepth == CV_32F || ddepth == CV_8U) ) )
+            (ddepth == CV_32F || ddepth == CV_8U || ddepth < 0) ) )
         return false;
 
     int cn = CV_MAT_CN(type);
diff --git a/modules/imgproc/src/opencl/filterSepCol.cl b/modules/imgproc/src/opencl/filterSepCol.cl
index e99fa6ee03..721eb90097 100644
--- a/modules/imgproc/src/opencl/filterSepCol.cl
+++ b/modules/imgproc/src/opencl/filterSepCol.cl
@@ -60,6 +60,7 @@ Niko
 The info above maybe obsolete.
 ***********************************************************************************/
 
+__constant float mat_kernel[] = { COEFF };
 
 __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void col_filter
                         (__global const GENTYPE_SRC * restrict src,
@@ -70,8 +71,7 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void col_filter
                          const int dst_offset_in_pixel,
                          const int dst_step_in_pixel,
                          const int dst_cols,
-                         const int dst_rows,
-                         __constant float * mat_kernel)
+                         const int dst_rows)
 {
     int x = get_global_id(0);
     int y = get_global_id(1);
diff --git a/modules/imgproc/src/opencl/filterSepRow.cl b/modules/imgproc/src/opencl/filterSepRow.cl
index dfbf300999..efb082e3e4 100644
--- a/modules/imgproc/src/opencl/filterSepRow.cl
+++ b/modules/imgproc/src/opencl/filterSepRow.cl
@@ -144,6 +144,8 @@ Niko
 The info above maybe obsolete.
 ***********************************************************************************/
 
+__constant float mat_kernel[] = { COEFF };
+
 __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_C1_D0
     (__global uchar * restrict src,
      int src_step_in_pixel,
@@ -153,8 +155,7 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
      __global float * dst,
      int dst_step_in_pixel,
      int dst_cols, int dst_rows,
-     int radiusy,
-     __constant float * mat_kernel)
+     int radiusy)
 {
     int x = get_global_id(0)<<2;
     int y = get_global_id(1);
@@ -297,8 +298,7 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
      __global float4 * dst,
      int dst_step_in_pixel,
      int dst_cols, int dst_rows,
-     int radiusy,
-     __constant float * mat_kernel)
+     int radiusy)
 {
     int x = get_global_id(0);
     int y = get_global_id(1);
@@ -391,8 +391,7 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
      __global float * dst,
      int dst_step_in_pixel,
      int dst_cols, int dst_rows,
-     int radiusy,
-     __constant float * mat_kernel)
+     int radiusy)
 {
     int x = get_global_id(0);
     int y = get_global_id(1);
@@ -484,8 +483,7 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
      __global float4 * dst,
      int dst_step_in_pixel,
      int dst_cols, int dst_rows,
-     int radiusy,
-     __constant float * mat_kernel)
+     int radiusy)
 {
     int x = get_global_id(0);
     int y = get_global_id(1);

From 6d64907f18a3bbfc91404f22ae1964ba1a9244ae Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Thu, 30 Jan 2014 00:19:18 +0400
Subject: [PATCH 2/5] async

---
 modules/core/src/ocl.cpp       | 2 +-
 modules/imgproc/src/filter.cpp | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp
index 7201fca714..66dbcbbab9 100644
--- a/modules/core/src/ocl.cpp
+++ b/modules/core/src/ocl.cpp
@@ -3299,7 +3299,7 @@ public:
         CV_Assert(u->handle != 0 && u->urefcount == 0);
         if(u->tempUMat())
         {
-            UMatDataAutoLock lock(u);
+//            UMatDataAutoLock lock(u);
             if( u->hostCopyObsolete() && u->refcount > 0 )
             {
                 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
diff --git a/modules/imgproc/src/filter.cpp b/modules/imgproc/src/filter.cpp
index 6b767329dc..9a4a085d73 100644
--- a/modules/imgproc/src/filter.cpp
+++ b/modules/imgproc/src/filter.cpp
@@ -3595,12 +3595,12 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
     Size srcSize = src.size();
     Size bufSize(srcSize.width, srcSize.height + kernelY.cols - 1);
     UMat buf; buf.create(bufSize, CV_MAKETYPE(CV_32F, cn));
-    if (!ocl_sepRowFilter2D(src, buf, kernelX, anchor.x, borderType, true))
+    if (!ocl_sepRowFilter2D(src, buf, kernelX, anchor.x, borderType, false))
         return false;
 
     _dst.create(srcSize, CV_MAKETYPE(ddepth, cn));
     UMat dst = _dst.getUMat();
-    return ocl_sepColFilter2D(buf, dst, kernelY, anchor.y, true);
+    return ocl_sepColFilter2D(buf, dst, kernelY, anchor.y, false);
 }
 
 #endif

From 1862dbc5ab95762ecfdf914e01ba784e2674d0f1 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Thu, 30 Jan 2014 05:48:15 +0400
Subject: [PATCH 3/5] fixed fp problem

---
 modules/imgproc/src/filter.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/imgproc/src/filter.cpp b/modules/imgproc/src/filter.cpp
index 9a4a085d73..c3a096619e 100644
--- a/modules/imgproc/src/filter.cpp
+++ b/modules/imgproc/src/filter.cpp
@@ -3330,6 +3330,7 @@ static std::string kerToStr(const Mat & k)
     }
     else if (depth == CV_32F)
     {
+        stream.setf(std::ios_base::showpoint);
         for (int i = 0; i < width; ++i)
             stream << data[i] << "f,";
         stream << data[width] << "f";

From 372cdac07cbd305d9d9ac94c8dcc518b98d87c61 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Thu, 30 Jan 2014 15:23:11 +0400
Subject: [PATCH 4/5] moved kernelToStr to ocl.hpp

---
 modules/core/include/opencv2/core/ocl.hpp |  1 +
 modules/core/src/ocl.cpp                  | 51 ++++++++++++++++++++
 modules/imgproc/src/filter.cpp            | 57 +----------------------
 3 files changed, 54 insertions(+), 55 deletions(-)

diff --git a/modules/core/include/opencv2/core/ocl.hpp b/modules/core/include/opencv2/core/ocl.hpp
index 850a2e60ea..4d63e3f001 100644
--- a/modules/core/include/opencv2/core/ocl.hpp
+++ b/modules/core/include/opencv2/core/ocl.hpp
@@ -575,6 +575,7 @@ protected:
 CV_EXPORTS const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf);
 CV_EXPORTS const char* typeToStr(int t);
 CV_EXPORTS const char* memopTypeToStr(int t);
+CV_EXPORTS String kernelToStr(InputArray _kernel, int ddepth = -1);
 CV_EXPORTS void getPlatfomsInfo(std::vector<PlatformInfo2>& platform_info);
 
 class CV_EXPORTS Image2D
diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp
index 66dbcbbab9..03abc061b1 100644
--- a/modules/core/src/ocl.cpp
+++ b/modules/core/src/ocl.cpp
@@ -3814,6 +3814,57 @@ const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf)
     return buf;
 }
 
+template <typename T>
+static std::string kerToStr(const Mat & k)
+{
+    int width = k.cols - 1, depth = k.depth();
+    const T * const data = reinterpret_cast<const T *>(k.data);
+
+    std::ostringstream stream;
+    stream.precision(10);
+
+    if (depth <= CV_8S)
+    {
+        for (int i = 0; i < width; ++i)
+            stream << (int)data[i] << ", ";
+        stream << (int)data[width];
+    }
+    else if (depth == CV_32F)
+    {
+        stream.setf(std::ios_base::showpoint);
+        for (int i = 0; i < width; ++i)
+            stream << data[i] << "f, ";
+        stream << data[width] << "f";
+    }
+    else
+    {
+        for (int i = 0; i < width; ++i)
+            stream << data[i] << ", ";
+    }
+
+    return stream.str();
+}
+
+String kernelToStr(InputArray _kernel, int ddepth)
+{
+    Mat kernel = _kernel.getMat().reshape(1, 1);
+
+    int depth = kernel.depth();
+    if (ddepth < 0)
+        ddepth = depth;
+
+    if (ddepth != depth)
+        kernel.convertTo(kernel, ddepth);
+
+    typedef std::string (*func_t)(const Mat &);
+    static const func_t funcs[] = { kerToStr<uchar>, kerToStr<char>, kerToStr<ushort>,kerToStr<short>,
+                                    kerToStr<int>, kerToStr<float>, kerToStr<double>, 0 };
+    const func_t func = funcs[depth];
+    CV_Assert(func != 0);
+
+    return cv::format(" -D COEFF=%s", func(kernel).c_str());
+}
+
 ///////////////////////////////////////////////////////////////////////////////////////////////
 // deviceVersion has format
 //   OpenCL<space><major_version.minor_version><space><vendor-specific information>
diff --git a/modules/imgproc/src/filter.cpp b/modules/imgproc/src/filter.cpp
index c3a096619e..09519e74d8 100644
--- a/modules/imgproc/src/filter.cpp
+++ b/modules/imgproc/src/filter.cpp
@@ -3313,57 +3313,6 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,
     return kernel.run(2, globalsize, localsize, true);
 }
 
-template <typename T>
-static std::string kerToStr(const Mat & k)
-{
-    int width = k.cols - 1, depth = k.depth();
-    const T * const data = reinterpret_cast<const T *>(k.data);
-
-    std::ostringstream stream;
-    stream.precision(10);
-
-    if (depth <= CV_8S)
-    {
-        for (int i = 0; i < width; ++i)
-            stream << (int)data[i] << ",";
-        stream << (int)data[width];
-    }
-    else if (depth == CV_32F)
-    {
-        stream.setf(std::ios_base::showpoint);
-        for (int i = 0; i < width; ++i)
-            stream << data[i] << "f,";
-        stream << data[width] << "f";
-    }
-    else
-    {
-        for (int i = 0; i < width; ++i)
-            stream << data[i] << ",";
-    }
-
-    return stream.str();
-}
-
-static String kernelToStr(InputArray _kernel, int ddepth = -1)
-{
-    Mat kernel = _kernel.getMat().reshape(1, 1);
-
-    int depth = kernel.depth();
-    if (ddepth < 0)
-        ddepth = depth;
-
-    if (ddepth != depth)
-        kernel.convertTo(kernel, ddepth);
-
-    typedef std::string (*func_t)(const Mat &);
-    static const func_t funcs[] = { kerToStr<uchar>, kerToStr<char>, kerToStr<ushort>,kerToStr<short>,
-                                    kerToStr<int>, kerToStr<float>, kerToStr<double>, 0 };
-    const func_t func = funcs[depth];
-    CV_Assert(func != 0);
-
-    return cv::format(" -D COEFF=%s", func(kernel).c_str());
-}
-
 static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor, int borderType, bool sync)
 {
     int type = src.type();
@@ -3429,7 +3378,7 @@ static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor,
         btype,
         extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION",
         isIsolatedBorder ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED");
-    build_options += kernelToStr(kernelX, CV_32F);
+    build_options += ocl::kernelToStr(kernelX, CV_32F);
 
     Size srcWholeSize; Point srcOffset;
     src.locateROI(srcWholeSize, srcOffset);
@@ -3462,7 +3411,6 @@ static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor,
     idxArg = kernelRow.set(idxArg, buf.cols);
     idxArg = kernelRow.set(idxArg, buf.rows);
     idxArg = kernelRow.set(idxArg, radiusY);
-//    idxArg = kernelRow.set(idxArg, ocl::KernelArg::PtrReadOnly(kernelX.getUMat(ACCESS_READ)));
 
     return kernelRow.run(2, globalsize, localsize, sync);
 }
@@ -3532,7 +3480,7 @@ static bool ocl_sepColFilter2D(UMat &buf, UMat &dst, Mat &kernelY, int anchor, b
         }
     }
 
-    build_options += kernelToStr(kernelY, CV_32F);
+    build_options += ocl::kernelToStr(kernelY, CV_32F);
 
     ocl::Kernel kernelCol;
     if (!kernelCol.create("col_filter", cv::ocl::imgproc::filterSepCol_oclsrc, build_options))
@@ -3549,7 +3497,6 @@ static bool ocl_sepColFilter2D(UMat &buf, UMat &dst, Mat &kernelY, int anchor, b
     idxArg = kernelCol.set(idxArg, (int)(dst.step / dst.elemSize()));
     idxArg = kernelCol.set(idxArg, dst.cols);
     idxArg = kernelCol.set(idxArg, dst.rows);
-//    idxArg = kernelCol.set(idxArg, ocl::KernelArg::PtrReadOnly(kernelY.getUMat(ACCESS_READ)));
 
     return kernelCol.run(2, globalsize, localsize, sync);
 }

From 1ab1594d00be987d18bc6a62151a93428a7b919b Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Thu, 30 Jan 2014 17:23:56 +0400
Subject: [PATCH 5/5] workaround

---
 modules/core/src/ocl.cpp                   | 11 ++++++-----
 modules/imgproc/src/opencl/filterSepCol.cl |  1 +
 modules/imgproc/src/opencl/filterSepRow.cl |  1 +
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp
index 03abc061b1..7334670f5c 100644
--- a/modules/core/src/ocl.cpp
+++ b/modules/core/src/ocl.cpp
@@ -3826,20 +3826,21 @@ static std::string kerToStr(const Mat & k)
     if (depth <= CV_8S)
     {
         for (int i = 0; i < width; ++i)
-            stream << (int)data[i] << ", ";
-        stream << (int)data[width];
+            stream << "DIG(" << (int)data[i] << ")";
+        stream << "DIG(" << (int)data[width] << ")";
     }
     else if (depth == CV_32F)
     {
         stream.setf(std::ios_base::showpoint);
         for (int i = 0; i < width; ++i)
-            stream << data[i] << "f, ";
-        stream << data[width] << "f";
+            stream << "DIG(" << data[i] << "f)";
+        stream << "DIG(" << data[width] << "f)";
     }
     else
     {
         for (int i = 0; i < width; ++i)
-            stream << data[i] << ", ";
+            stream << "DIG(" << data[i] << ")";
+        stream << "DIG(" << data[width] << ")";
     }
 
     return stream.str();
diff --git a/modules/imgproc/src/opencl/filterSepCol.cl b/modules/imgproc/src/opencl/filterSepCol.cl
index 721eb90097..2657ae9312 100644
--- a/modules/imgproc/src/opencl/filterSepCol.cl
+++ b/modules/imgproc/src/opencl/filterSepCol.cl
@@ -60,6 +60,7 @@ Niko
 The info above maybe obsolete.
 ***********************************************************************************/
 
+#define DIG(a) a,
 __constant float mat_kernel[] = { COEFF };
 
 __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void col_filter
diff --git a/modules/imgproc/src/opencl/filterSepRow.cl b/modules/imgproc/src/opencl/filterSepRow.cl
index efb082e3e4..d0623f5905 100644
--- a/modules/imgproc/src/opencl/filterSepRow.cl
+++ b/modules/imgproc/src/opencl/filterSepRow.cl
@@ -144,6 +144,7 @@ Niko
 The info above maybe obsolete.
 ***********************************************************************************/
 
+#define DIG(a) a,
 __constant float mat_kernel[] = { COEFF };
 
 __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_C1_D0