From 1f9ab2e0cac2d7bfbd2330d5a620eac4fd3e191e Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Thu, 31 Oct 2013 23:23:56 +0400
Subject: [PATCH] fixed warnings in ocl kernels

---
 modules/ocl/src/opencl/arithm_bitwise_not.cl  |   2 -
 modules/ocl/src/opencl/arithm_cartToPolar.cl  |  22 +-
 modules/ocl/src/opencl/arithm_minMax.cl       | 200 ++++++++---------
 modules/ocl/src/opencl/arithm_minMaxLoc.cl    | 206 +++++++++---------
 .../ocl/src/opencl/arithm_minMaxLoc_mask.cl   | 182 ++++++++--------
 modules/ocl/src/opencl/arithm_nonzero.cl      |   8 +-
 modules/ocl/src/opencl/arithm_phase.cl        |  18 +-
 modules/ocl/src/opencl/arithm_polarToCart.cl  |  13 +-
 modules/ocl/src/opencl/arithm_sum.cl          |  58 ++---
 modules/ocl/src/opencl/brute_force_match.cl   |  18 +-
 modules/ocl/src/opencl/cvt_color.cl           |   6 +-
 modules/ocl/src/opencl/haarobjectdetect.cl    |   1 -
 .../src/opencl/haarobjectdetect_scaled2.cl    |   1 -
 modules/ocl/src/opencl/imgproc_calcHarris.cl  |   9 +-
 .../ocl/src/opencl/imgproc_calcMinEigenVal.cl |   9 +-
 modules/ocl/src/opencl/imgproc_canny.cl       |  12 +-
 modules/ocl/src/opencl/imgproc_clahe.cl       |  12 +-
 modules/ocl/src/opencl/imgproc_integral.cl    |  24 +-
 .../ocl/src/opencl/imgproc_integral_sum.cl    |  16 +-
 modules/ocl/src/opencl/imgproc_median.cl      |  32 +--
 modules/ocl/src/opencl/imgproc_remap.cl       |  18 +-
 modules/ocl/src/opencl/imgproc_resize.cl      |  24 +-
 modules/ocl/src/opencl/imgproc_threshold.cl   |  20 +-
 modules/ocl/src/opencl/imgproc_warpAffine.cl  |   8 +-
 .../ocl/src/opencl/imgproc_warpPerspective.cl |  34 +--
 modules/ocl/src/opencl/kernel_sort_by_key.cl  |   1 -
 .../src/opencl/kernel_stablesort_by_key.cl    |  21 +-
 modules/ocl/src/opencl/knearest.cl            |  73 +++----
 modules/ocl/src/opencl/match_template.cl      |   6 +-
 modules/ocl/src/opencl/meanShift.cl           |   3 +-
 modules/ocl/src/opencl/moments.cl             |  24 +-
 modules/ocl/src/opencl/objdetect_hog.cl       |   4 +-
 .../ocl/src/opencl/optical_flow_farneback.cl  |   4 +-
 modules/ocl/src/opencl/pyr_down.cl            |  12 +-
 modules/ocl/src/opencl/pyrlk.cl               |  29 +--
 modules/ocl/src/opencl/split_mat.cl           |   4 +-
 modules/ocl/src/opencl/stereobm.cl            |  14 +-
 modules/ocl/src/opencl/stereobp.cl            |   8 +-
 modules/ocl/src/opencl/stereocsbp.cl          |  26 ++-
 39 files changed, 578 insertions(+), 604 deletions(-)

diff --git a/modules/ocl/src/opencl/arithm_bitwise_not.cl b/modules/ocl/src/opencl/arithm_bitwise_not.cl
index e5b46c9368..5bc1839d6a 100644
--- a/modules/ocl/src/opencl/arithm_bitwise_not.cl
+++ b/modules/ocl/src/opencl/arithm_bitwise_not.cl
@@ -67,7 +67,6 @@ __kernel void arithm_bitwise_not_D0 (__global uchar *src1, int src1_step, int sr
         x = x << 2;
         int src1_index = mad24(y, src1_step, x + src1_offset);
 
-        int dst_start  = mad24(y, dst_step, dst_offset);
         int dst_end    = mad24(y, dst_step, dst_offset + dst_step1);
         int dst_index  = mad24(y, dst_step, dst_offset + x);
 
@@ -97,7 +96,6 @@ __kernel void arithm_bitwise_not_D1 (__global char *src1, int src1_step, int src
         x = x << 2;
         int src1_index = mad24(y, src1_step, x + src1_offset);
 
-        int dst_start  = mad24(y, dst_step, dst_offset);
         int dst_end    = mad24(y, dst_step, dst_offset + dst_step1);
         int dst_index  = mad24(y, dst_step, dst_offset + x);
 
diff --git a/modules/ocl/src/opencl/arithm_cartToPolar.cl b/modules/ocl/src/opencl/arithm_cartToPolar.cl
index 6c779ead90..e37818c40f 100644
--- a/modules/ocl/src/opencl/arithm_cartToPolar.cl
+++ b/modules/ocl/src/opencl/arithm_cartToPolar.cl
@@ -44,14 +44,18 @@
 //M*/
 
 #if defined (DOUBLE_SUPPORT)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
+    #pragma OPENCL EXTENSION cl_khr_fp64:enable
+    #define CV_PI   3.1415926535897932384626433832795
+    #ifndef DBL_EPSILON
+        #define DBL_EPSILON 0x1.0p-52
+    #endif
+#else
+    #define CV_PI   3.1415926535897932384626433832795f
+    #ifndef DBL_EPSILON
+        #define DBL_EPSILON 0x1.0p-52f
+    #endif
 #endif
 
-#define CV_PI   3.1415926535897932384626433832795
-
-#ifndef DBL_EPSILON
-#define DBL_EPSILON 0x1.0p-52
-#endif
 
 __kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int src1_offset,
                                      __global float *src2, int src2_step, int src2_offset,
@@ -82,9 +86,9 @@ __kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int sr
         float tmp = y >= 0 ? 0 : CV_PI*2;
         tmp = x < 0 ? CV_PI : tmp;
 
-        float tmp1 = y >= 0 ? CV_PI*0.5 : CV_PI*1.5;
-        cartToPolar = y2 <= x2 ? x*y/(x2 + 0.28f*y2 + (float)DBL_EPSILON)  + tmp :
-                                 tmp1 - x*y/(y2 + 0.28f*x2 + (float)DBL_EPSILON);
+        float tmp1 = y >= 0 ? CV_PI*0.5f : CV_PI*1.5f;
+        cartToPolar = y2 <= x2 ? x*y/(x2 + 0.28f*y2 + DBL_EPSILON)  + tmp :
+                                 tmp1 - x*y/(y2 + 0.28f*x2 + DBL_EPSILON);
 
         cartToPolar = angInDegree == 0 ? cartToPolar : cartToPolar * (float)(180/CV_PI);
 
diff --git a/modules/ocl/src/opencl/arithm_minMax.cl b/modules/ocl/src/opencl/arithm_minMax.cl
index 35f4cdd700..33a39d83f3 100644
--- a/modules/ocl/src/opencl/arithm_minMax.cl
+++ b/modules/ocl/src/opencl/arithm_minMax.cl
@@ -66,53 +66,53 @@
 __kernel void arithm_op_minMax(__global const T * src, __global T * dst,
     int cols, int invalid_cols, int offset, int elemnum, int groupnum)
 {
-   unsigned int lid = get_local_id(0);
-   unsigned int gid = get_group_id(0);
-   unsigned int id = get_global_id(0);
-
-   unsigned int idx = offset + id + (id / cols) * invalid_cols;
-
-   __local T localmem_max[128], localmem_min[128];
-   T minval = (T)(MAX_VAL), maxval = (T)(MIN_VAL), temp;
-
-   for (int grainSize = groupnum << 8; id < elemnum; id += grainSize)
-   {
-       idx = offset + id + (id / cols) * invalid_cols;
-       temp = src[idx];
-       minval = min(minval, temp);
-       maxval = max(maxval, temp);
-   }
-
-   if (lid > 127)
-   {
-       localmem_min[lid - 128] = minval;
-       localmem_max[lid - 128] = maxval;
-   }
-   barrier(CLK_LOCAL_MEM_FENCE);
-
-   if (lid < 128)
-   {
-       localmem_min[lid] = min(minval, localmem_min[lid]);
-       localmem_max[lid] = max(maxval, localmem_max[lid]);
-   }
-   barrier(CLK_LOCAL_MEM_FENCE);
-
-   for (int lsize = 64; lsize > 0; lsize >>= 1)
-   {
-       if (lid < lsize)
-       {
-           int lid2 = lsize + lid;
-           localmem_min[lid] = min(localmem_min[lid], localmem_min[lid2]);
-           localmem_max[lid] = max(localmem_max[lid], localmem_max[lid2]);
-       }
-       barrier(CLK_LOCAL_MEM_FENCE);
-   }
-
-   if (lid == 0)
-   {
-       dst[gid] = localmem_min[0];
-       dst[gid + groupnum] = localmem_max[0];
-   }
+    int lid = get_local_id(0);
+    int gid = get_group_id(0);
+    int id = get_global_id(0);
+
+    int idx = offset + id + (id / cols) * invalid_cols;
+
+    __local T localmem_max[128], localmem_min[128];
+    T minval = (T)(MAX_VAL), maxval = (T)(MIN_VAL), temp;
+
+    for (int grainSize = groupnum << 8; id < elemnum; id += grainSize)
+    {
+        idx = offset + id + (id / cols) * invalid_cols;
+        temp = src[idx];
+        minval = min(minval, temp);
+        maxval = max(maxval, temp);
+    }
+
+    if (lid > 127)
+    {
+        localmem_min[lid - 128] = minval;
+        localmem_max[lid - 128] = maxval;
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (lid < 128)
+    {
+        localmem_min[lid] = min(minval, localmem_min[lid]);
+        localmem_max[lid] = max(maxval, localmem_max[lid]);
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    for (int lsize = 64; lsize > 0; lsize >>= 1)
+    {
+        if (lid < lsize)
+        {
+            int lid2 = lsize + lid;
+            localmem_min[lid] = min(localmem_min[lid], localmem_min[lid2]);
+            localmem_max[lid] = max(localmem_max[lid], localmem_max[lid2]);
+        }
+        barrier(CLK_LOCAL_MEM_FENCE);
+    }
+
+    if (lid == 0)
+    {
+        dst[gid] = localmem_min[0];
+        dst[gid + groupnum] = localmem_max[0];
+    }
 }
 
 __kernel void arithm_op_minMax_mask(__global const T * src, __global T * dst,
@@ -120,57 +120,57 @@ __kernel void arithm_op_minMax_mask(__global const T * src, __global T * dst,
     int elemnum, int groupnum,
     const __global uchar * mask, int minvalid_cols, int moffset)
 {
-   unsigned int lid = get_local_id(0);
-   unsigned int gid = get_group_id(0);
-   unsigned int id = get_global_id(0);
-
-   unsigned int idx = offset + id + (id / cols) * invalid_cols;
-   unsigned int midx = moffset + id + (id / cols) * minvalid_cols;
-
-   __local T localmem_max[128], localmem_min[128];
-   T minval = (T)(MAX_VAL), maxval = (T)(MIN_VAL), temp;
-
-   for (int grainSize = groupnum << 8; id < elemnum; id += grainSize)
-   {
-       idx = offset + id + (id / cols) * invalid_cols;
-       midx = moffset + id + (id / cols) * minvalid_cols;
-
-       if (mask[midx])
-       {
-           temp = src[idx];
-           minval = min(minval, temp);
-           maxval = max(maxval, temp);
-       }
-   }
-
-   if (lid > 127)
-   {
-       localmem_min[lid - 128] = minval;
-       localmem_max[lid - 128] = maxval;
-   }
-   barrier(CLK_LOCAL_MEM_FENCE);
-
-   if (lid < 128)
-   {
-       localmem_min[lid] = min(minval, localmem_min[lid]);
-       localmem_max[lid] = max(maxval, localmem_max[lid]);
-   }
-   barrier(CLK_LOCAL_MEM_FENCE);
-
-   for (int lsize = 64; lsize > 0; lsize >>= 1)
-   {
-       if (lid < lsize)
-       {
-           int lid2 = lsize + lid;
-           localmem_min[lid] = min(localmem_min[lid], localmem_min[lid2]);
-           localmem_max[lid] = max(localmem_max[lid], localmem_max[lid2]);
-       }
-       barrier(CLK_LOCAL_MEM_FENCE);
-   }
-
-   if (lid == 0)
-   {
-       dst[gid] = localmem_min[0];
-       dst[gid + groupnum] = localmem_max[0];
-   }
+    int lid = get_local_id(0);
+    int gid = get_group_id(0);
+    int id = get_global_id(0);
+
+    int idx = offset + id + (id / cols) * invalid_cols;
+    int midx = moffset + id + (id / cols) * minvalid_cols;
+
+    __local T localmem_max[128], localmem_min[128];
+    T minval = (T)(MAX_VAL), maxval = (T)(MIN_VAL), temp;
+
+    for (int grainSize = groupnum << 8; id < elemnum; id += grainSize)
+    {
+        idx = offset + id + (id / cols) * invalid_cols;
+        midx = moffset + id + (id / cols) * minvalid_cols;
+
+        if (mask[midx])
+        {
+            temp = src[idx];
+            minval = min(minval, temp);
+            maxval = max(maxval, temp);
+        }
+    }
+
+    if (lid > 127)
+    {
+        localmem_min[lid - 128] = minval;
+        localmem_max[lid - 128] = maxval;
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (lid < 128)
+    {
+        localmem_min[lid] = min(minval, localmem_min[lid]);
+        localmem_max[lid] = max(maxval, localmem_max[lid]);
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    for (int lsize = 64; lsize > 0; lsize >>= 1)
+    {
+        if (lid < lsize)
+        {
+            int lid2 = lsize + lid;
+            localmem_min[lid] = min(localmem_min[lid], localmem_min[lid2]);
+            localmem_max[lid] = max(localmem_max[lid], localmem_max[lid2]);
+        }
+        barrier(CLK_LOCAL_MEM_FENCE);
+    }
+
+    if (lid == 0)
+    {
+        dst[gid] = localmem_min[0];
+        dst[gid + groupnum] = localmem_max[0];
+    }
 }
diff --git a/modules/ocl/src/opencl/arithm_minMaxLoc.cl b/modules/ocl/src/opencl/arithm_minMaxLoc.cl
index 21f95611b5..076fb06001 100644
--- a/modules/ocl/src/opencl/arithm_minMaxLoc.cl
+++ b/modules/ocl/src/opencl/arithm_minMaxLoc.cl
@@ -137,118 +137,114 @@
 #define repeat_e(a) a.s3 = a.s0;a.s2 = a.s0;a.s1 = a.s0;
 #endif
 
-
-#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics:enable
-#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics:enable
-
 /**************************************Array minMax**************************************/
 
 __kernel void arithm_op_minMaxLoc(int cols, int invalid_cols, int offset, int elemnum, int groupnum,
                                   __global VEC_TYPE *src, __global RES_TYPE *dst)
 {
-   unsigned int lid = get_local_id(0);
-   unsigned int gid = get_group_id(0);
-   unsigned int  id = get_global_id(0);
-   unsigned int idx = offset + id + (id / cols) * invalid_cols;
-
-   __local VEC_TYPE localmem_max[128], localmem_min[128];
-   VEC_TYPE minval, maxval, temp;
-
-   __local VEC_TYPE_LOC localmem_maxloc[128], localmem_minloc[128];
-   VEC_TYPE_LOC minloc, maxloc, temploc, negative = -1;
-
-   int idx_c;
-
-   if (id < elemnum)
-   {
-       temp = src[idx];
-       idx_c = idx << 2;
-       temploc = (VEC_TYPE_LOC)(idx_c, idx_c + 1, idx_c + 2, idx_c + 3);
-
-       if (id % cols == 0 )
-       {
-           repeat_s(temp);
-           repeat_s(temploc);
-       }
-       if (id % cols == cols - 1)
-       {
-           repeat_e(temp);
-           repeat_e(temploc);
-       }
-       minval = temp;
-       maxval = temp;
-       minloc = temploc;
-       maxloc = temploc;
-   }
-   else
-   {
-       minval = MAX_VAL;
-       maxval = MIN_VAL;
-       minloc = negative;
-       maxloc = negative;
-   }
-
-   int grainSize = (groupnum << 8);
-   for (id = id + grainSize; id < elemnum; id = id + grainSize)
-   {
-       idx = offset + id + (id / cols) * invalid_cols;
-       temp = src[idx];
-       idx_c = idx << 2;
-       temploc = (VEC_TYPE_LOC)(idx_c, idx_c+1, idx_c+2, idx_c+3);
-
-       if (id % cols == 0 )
-       {
-               repeat_s(temp);
-               repeat_s(temploc);
-       }
-       if (id % cols == cols - 1)
-       {
-               repeat_e(temp);
-               repeat_e(temploc);
-       }
-
-       minval = min(minval, temp);
-       maxval = max(maxval, temp);
-       minloc = CONDITION_FUNC(minval == temp, temploc, minloc);
-       maxloc = CONDITION_FUNC(maxval == temp, temploc, maxloc);
-   }
-
-   if (lid > 127)
-   {
-       localmem_min[lid - 128] = minval;
-       localmem_max[lid - 128] = maxval;
-       localmem_minloc[lid - 128] = minloc;
-       localmem_maxloc[lid - 128] = maxloc;
-   }
-   barrier(CLK_LOCAL_MEM_FENCE);
-
-   if (lid < 128)
-   {
-       localmem_min[lid] = min(minval,localmem_min[lid]);
-       localmem_max[lid] = max(maxval,localmem_max[lid]);
-       localmem_minloc[lid] = CONDITION_FUNC(localmem_min[lid] == minval, minloc, localmem_minloc[lid]);
-       localmem_maxloc[lid] = CONDITION_FUNC(localmem_max[lid] == maxval, maxloc, localmem_maxloc[lid]);
-   }
-   barrier(CLK_LOCAL_MEM_FENCE);
-
-   for (int lsize = 64; lsize > 0; lsize >>= 1)
-   {
+    int lid = get_local_id(0);
+    int gid = get_group_id(0);
+    int  id = get_global_id(0);
+    int idx = offset + id + (id / cols) * invalid_cols;
+
+    __local VEC_TYPE localmem_max[128], localmem_min[128];
+    VEC_TYPE minval, maxval, temp;
+
+    __local VEC_TYPE_LOC localmem_maxloc[128], localmem_minloc[128];
+    VEC_TYPE_LOC minloc, maxloc, temploc, negative = -1;
+
+    int idx_c;
+
+    if (id < elemnum)
+    {
+        temp = src[idx];
+        idx_c = idx << 2;
+        temploc = (VEC_TYPE_LOC)(idx_c, idx_c + 1, idx_c + 2, idx_c + 3);
+
+        if (id % cols == 0 )
+        {
+            repeat_s(temp);
+            repeat_s(temploc);
+        }
+        if (id % cols == cols - 1)
+        {
+            repeat_e(temp);
+            repeat_e(temploc);
+        }
+        minval = temp;
+        maxval = temp;
+        minloc = temploc;
+        maxloc = temploc;
+    }
+    else
+    {
+        minval = MAX_VAL;
+        maxval = MIN_VAL;
+        minloc = negative;
+        maxloc = negative;
+    }
+
+    int grainSize = (groupnum << 8);
+    for (id = id + grainSize; id < elemnum; id = id + grainSize)
+    {
+        idx = offset + id + (id / cols) * invalid_cols;
+        temp = src[idx];
+        idx_c = idx << 2;
+        temploc = (VEC_TYPE_LOC)(idx_c, idx_c+1, idx_c+2, idx_c+3);
+
+        if (id % cols == 0 )
+        {
+            repeat_s(temp);
+            repeat_s(temploc);
+        }
+        if (id % cols == cols - 1)
+        {
+            repeat_e(temp);
+            repeat_e(temploc);
+        }
+
+        minval = min(minval, temp);
+        maxval = max(maxval, temp);
+        minloc = CONDITION_FUNC(minval == temp, temploc, minloc);
+        maxloc = CONDITION_FUNC(maxval == temp, temploc, maxloc);
+    }
+
+    if (lid > 127)
+    {
+        localmem_min[lid - 128] = minval;
+        localmem_max[lid - 128] = maxval;
+        localmem_minloc[lid - 128] = minloc;
+        localmem_maxloc[lid - 128] = maxloc;
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    if (lid < 128)
+    {
+        localmem_min[lid] = min(minval,localmem_min[lid]);
+        localmem_max[lid] = max(maxval,localmem_max[lid]);
+        localmem_minloc[lid] = CONDITION_FUNC(localmem_min[lid] == minval, minloc, localmem_minloc[lid]);
+        localmem_maxloc[lid] = CONDITION_FUNC(localmem_max[lid] == maxval, maxloc, localmem_maxloc[lid]);
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    for (int lsize = 64; lsize > 0; lsize >>= 1)
+    {
        if (lid < lsize)
        {
-           int lid2 = lsize + lid;
-           localmem_min[lid] = min(localmem_min[lid], localmem_min[lid2]);
-           localmem_max[lid] = max(localmem_max[lid], localmem_max[lid2]);
-           localmem_minloc[lid] = CONDITION_FUNC(localmem_min[lid] == localmem_min[lid2], localmem_minloc[lid2], localmem_minloc[lid]);
-           localmem_maxloc[lid] = CONDITION_FUNC(localmem_max[lid] == localmem_max[lid2], localmem_maxloc[lid2], localmem_maxloc[lid]);
+            int lid2 = lsize + lid;
+            localmem_min[lid] = min(localmem_min[lid], localmem_min[lid2]);
+            localmem_max[lid] = max(localmem_max[lid], localmem_max[lid2]);
+            localmem_minloc[lid] = CONDITION_FUNC(localmem_min[lid] == localmem_min[lid2], localmem_minloc[lid2], localmem_minloc[lid]);
+            localmem_maxloc[lid] = CONDITION_FUNC(localmem_max[lid] == localmem_max[lid2], localmem_maxloc[lid2], localmem_maxloc[lid]);
        }
        barrier(CLK_LOCAL_MEM_FENCE);
-   }
-
-   if ( lid == 0)
-   {
-       dst[gid] = CONVERT_RES_TYPE(localmem_min[0]);
-       dst[gid + groupnum] = CONVERT_RES_TYPE(localmem_max[0]);
-       dst[gid + 2 * groupnum] = CONVERT_RES_TYPE(localmem_minloc[0]);
-       dst[gid + 3 * groupnum] = CONVERT_RES_TYPE(localmem_maxloc[0]);
-   }
+    }
+
+    if ( lid == 0)
+    {
+        dst[gid] = CONVERT_RES_TYPE(localmem_min[0]);
+        dst[gid + groupnum] = CONVERT_RES_TYPE(localmem_max[0]);
+        dst[gid + 2 * groupnum] = CONVERT_RES_TYPE(localmem_minloc[0]);
+        dst[gid + 3 * groupnum] = CONVERT_RES_TYPE(localmem_maxloc[0]);
+    }
 }
diff --git a/modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl b/modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl
index 6d514e99d3..4d73be9541 100644
--- a/modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl
+++ b/modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl
@@ -147,96 +147,96 @@
 __kernel void arithm_op_minMaxLoc_mask (int cols,int invalid_cols,int offset,int elemnum,int groupnum,__global TYPE *src,
                                         int minvalid_cols,int moffset,__global uchar *mask,__global RES_TYPE  *dst)
 {
-   unsigned int lid = get_local_id(0);
-   unsigned int gid = get_group_id(0);
-   unsigned int  id = get_global_id(0);
-   unsigned int idx = id + (id / cols) * invalid_cols;
-   unsigned int midx = id + (id / cols) * minvalid_cols;
-   __local VEC_TYPE lm_max[128],lm_min[128];
-   VEC_TYPE minval,maxval,temp,m_temp;
-   __local VEC_TYPE_LOC lm_maxloc[128],lm_minloc[128];
-   VEC_TYPE_LOC minloc,maxloc,temploc,negative = -1,one = 1,zero = 0;
-   if(id < elemnum)
-   {
-       temp = vload4(idx, &src[offset]);
-       m_temp = CONVERT_TYPE(vload4(midx,&mask[moffset]));
-       int idx_c = (idx << 2) + offset;
-       temploc = (VEC_TYPE_LOC)(idx_c,idx_c+1,idx_c+2,idx_c+3);
-       if(id % cols == cols - 1)
-       {
-           repeat_me(m_temp);
-           repeat_e(temploc);
-       }
-       minval = m_temp != (VEC_TYPE)0 ? temp : (VEC_TYPE)MAX_VAL;
-       maxval = m_temp != (VEC_TYPE)0 ? temp : (VEC_TYPE)MIN_VAL;
-       minloc = CONDITION_FUNC(m_temp != (VEC_TYPE)0, temploc , negative);
-       maxloc = minloc;
-   }
-   else
-   {
-       minval = MAX_VAL;
-       maxval = MIN_VAL;
-       minloc = negative;
-       maxloc = negative;
-   }
-   for(id=id + (groupnum << 8); id < elemnum;id = id + (groupnum << 8))
-   {
-       idx = id + (id / cols) * invalid_cols;
-       midx = id + (id / cols) * minvalid_cols;
-       temp = vload4(idx, &src[offset]);
-       m_temp = CONVERT_TYPE(vload4(midx,&mask[moffset]));
-       int idx_c = (idx << 2) + offset;
-       temploc = (VEC_TYPE_LOC)(idx_c,idx_c+1,idx_c+2,idx_c+3);
-       if(id % cols == cols - 1)
-       {
-           repeat_me(m_temp);
-           repeat_e(temploc);
-       }
-       minval = min(minval,m_temp != (VEC_TYPE)0 ? temp : minval);
-       maxval = max(maxval,m_temp != (VEC_TYPE)0 ? temp : maxval);
+    int lid = get_local_id(0);
+    int gid = get_group_id(0);
+    int  id = get_global_id(0);
+    int idx = id + (id / cols) * invalid_cols;
+    int midx = id + (id / cols) * minvalid_cols;
+    __local VEC_TYPE lm_max[128],lm_min[128];
+    VEC_TYPE minval,maxval,temp,m_temp;
+    __local VEC_TYPE_LOC lm_maxloc[128],lm_minloc[128];
+    VEC_TYPE_LOC minloc,maxloc,temploc,negative = -1,one = 1,zero = 0;
+    if(id < elemnum)
+    {
+        temp = vload4(idx, &src[offset]);
+        m_temp = CONVERT_TYPE(vload4(midx,&mask[moffset]));
+        int idx_c = (idx << 2) + offset;
+        temploc = (VEC_TYPE_LOC)(idx_c,idx_c+1,idx_c+2,idx_c+3);
+        if(id % cols == cols - 1)
+        {
+            repeat_me(m_temp);
+            repeat_e(temploc);
+        }
+        minval = m_temp != (VEC_TYPE)0 ? temp : (VEC_TYPE)MAX_VAL;
+        maxval = m_temp != (VEC_TYPE)0 ? temp : (VEC_TYPE)MIN_VAL;
+        minloc = CONDITION_FUNC(m_temp != (VEC_TYPE)0, temploc , negative);
+        maxloc = minloc;
+    }
+    else
+    {
+        minval = MAX_VAL;
+        maxval = MIN_VAL;
+        minloc = negative;
+        maxloc = negative;
+    }
+    for(id=id + (groupnum << 8); id < elemnum;id = id + (groupnum << 8))
+    {
+        idx = id + (id / cols) * invalid_cols;
+        midx = id + (id / cols) * minvalid_cols;
+        temp = vload4(idx, &src[offset]);
+        m_temp = CONVERT_TYPE(vload4(midx,&mask[moffset]));
+        int idx_c = (idx << 2) + offset;
+        temploc = (VEC_TYPE_LOC)(idx_c,idx_c+1,idx_c+2,idx_c+3);
+        if(id % cols == cols - 1)
+        {
+            repeat_me(m_temp);
+            repeat_e(temploc);
+        }
+        minval = min(minval,m_temp != (VEC_TYPE)0 ? temp : minval);
+        maxval = max(maxval,m_temp != (VEC_TYPE)0 ? temp : maxval);
 
-       minloc = CONDITION_FUNC((minval == temp) && (m_temp != (VEC_TYPE)0), temploc , minloc);
-       maxloc = CONDITION_FUNC((maxval == temp) && (m_temp != (VEC_TYPE)0), temploc , maxloc);
-   }
-   if(lid > 127)
-   {
-       lm_min[lid - 128] = minval;
-       lm_max[lid - 128] = maxval;
-       lm_minloc[lid - 128] = minloc;
-       lm_maxloc[lid - 128] = maxloc;
-   }
-   barrier(CLK_LOCAL_MEM_FENCE);
-   if(lid < 128)
-   {
-       lm_min[lid] = min(minval,lm_min[lid]);
-       lm_max[lid] = max(maxval,lm_max[lid]);
-       VEC_TYPE con_min = CONVERT_TYPE(minloc != negative ? one : zero);
-       VEC_TYPE con_max = CONVERT_TYPE(maxloc != negative ? one : zero);
-       lm_minloc[lid] = CONDITION_FUNC((lm_min[lid] == minval) && (con_min != (VEC_TYPE)0), minloc , lm_minloc[lid]);
-       lm_maxloc[lid] = CONDITION_FUNC((lm_max[lid] == maxval) && (con_max != (VEC_TYPE)0), maxloc , lm_maxloc[lid]);
-   }
-   barrier(CLK_LOCAL_MEM_FENCE);
-   for(int lsize = 64; lsize > 0; lsize >>= 1)
-   {
-       if(lid < lsize)
-       {
-           int lid2 = lsize + lid;
-           lm_min[lid] = min(lm_min[lid] , lm_min[lid2]);
-           lm_max[lid] = max(lm_max[lid] , lm_max[lid2]);
-           VEC_TYPE con_min = CONVERT_TYPE(lm_minloc[lid2] != negative ? one : zero);
-           VEC_TYPE con_max = CONVERT_TYPE(lm_maxloc[lid2] != negative ? one : zero);
-           lm_minloc[lid] =
-              CONDITION_FUNC((lm_min[lid] == lm_min[lid2]) && (con_min != (VEC_TYPE)0), lm_minloc[lid2] , lm_minloc[lid]);
-           lm_maxloc[lid] =
-              CONDITION_FUNC((lm_max[lid] == lm_max[lid2]) && (con_max != (VEC_TYPE)0), lm_maxloc[lid2] , lm_maxloc[lid]);
-       }
-       barrier(CLK_LOCAL_MEM_FENCE);
-   }
-   if( lid == 0)
-   {
-       dst[gid] = CONVERT_RES_TYPE(lm_min[0]);
-       dst[gid + groupnum] = CONVERT_RES_TYPE(lm_max[0]);
-       dst[gid + 2 * groupnum] = CONVERT_RES_TYPE(lm_minloc[0]);
-       dst[gid + 3 * groupnum] = CONVERT_RES_TYPE(lm_maxloc[0]);
-   }
+        minloc = CONDITION_FUNC((minval == temp) && (m_temp != (VEC_TYPE)0), temploc , minloc);
+        maxloc = CONDITION_FUNC((maxval == temp) && (m_temp != (VEC_TYPE)0), temploc , maxloc);
+    }
+    if(lid > 127)
+    {
+        lm_min[lid - 128] = minval;
+        lm_max[lid - 128] = maxval;
+        lm_minloc[lid - 128] = minloc;
+        lm_maxloc[lid - 128] = maxloc;
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(lid < 128)
+    {
+        lm_min[lid] = min(minval,lm_min[lid]);
+        lm_max[lid] = max(maxval,lm_max[lid]);
+        VEC_TYPE con_min = CONVERT_TYPE(minloc != negative ? one : zero);
+        VEC_TYPE con_max = CONVERT_TYPE(maxloc != negative ? one : zero);
+        lm_minloc[lid] = CONDITION_FUNC((lm_min[lid] == minval) && (con_min != (VEC_TYPE)0), minloc , lm_minloc[lid]);
+        lm_maxloc[lid] = CONDITION_FUNC((lm_max[lid] == maxval) && (con_max != (VEC_TYPE)0), maxloc , lm_maxloc[lid]);
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+    for(int lsize = 64; lsize > 0; lsize >>= 1)
+    {
+        if(lid < lsize)
+        {
+            int lid2 = lsize + lid;
+            lm_min[lid] = min(lm_min[lid] , lm_min[lid2]);
+            lm_max[lid] = max(lm_max[lid] , lm_max[lid2]);
+            VEC_TYPE con_min = CONVERT_TYPE(lm_minloc[lid2] != negative ? one : zero);
+            VEC_TYPE con_max = CONVERT_TYPE(lm_maxloc[lid2] != negative ? one : zero);
+            lm_minloc[lid] =
+                CONDITION_FUNC((lm_min[lid] == lm_min[lid2]) && (con_min != (VEC_TYPE)0), lm_minloc[lid2] , lm_minloc[lid]);
+            lm_maxloc[lid] =
+                CONDITION_FUNC((lm_max[lid] == lm_max[lid2]) && (con_max != (VEC_TYPE)0), lm_maxloc[lid2] , lm_maxloc[lid]);
+        }
+        barrier(CLK_LOCAL_MEM_FENCE);
+    }
+    if( lid == 0)
+    {
+        dst[gid] = CONVERT_RES_TYPE(lm_min[0]);
+        dst[gid + groupnum] = CONVERT_RES_TYPE(lm_max[0]);
+        dst[gid + 2 * groupnum] = CONVERT_RES_TYPE(lm_minloc[0]);
+        dst[gid + 3 * groupnum] = CONVERT_RES_TYPE(lm_maxloc[0]);
+    }
 }
diff --git a/modules/ocl/src/opencl/arithm_nonzero.cl b/modules/ocl/src/opencl/arithm_nonzero.cl
index 085386f5c3..fc98257962 100644
--- a/modules/ocl/src/opencl/arithm_nonzero.cl
+++ b/modules/ocl/src/opencl/arithm_nonzero.cl
@@ -55,11 +55,11 @@
 __kernel void arithm_op_nonzero(int cols, int invalid_cols, int offset, int elemnum, int groupnum,
                                   __global srcT *src, __global dstT *dst)
 {
-    unsigned int lid = get_local_id(0);
-    unsigned int gid = get_group_id(0);
-    unsigned int  id = get_global_id(0);
+    int lid = get_local_id(0);
+    int gid = get_group_id(0);
+    int  id = get_global_id(0);
 
-    unsigned int idx = offset + id + (id / cols) * invalid_cols;
+    int idx = offset + id + (id / cols) * invalid_cols;
     __local dstT localmem_nonzero[128];
     dstT nonzero = (dstT)(0);
     srcT zero = (srcT)(0), one = (srcT)(1);
diff --git a/modules/ocl/src/opencl/arithm_phase.cl b/modules/ocl/src/opencl/arithm_phase.cl
index b6bc7b42b4..f9835948c4 100644
--- a/modules/ocl/src/opencl/arithm_phase.cl
+++ b/modules/ocl/src/opencl/arithm_phase.cl
@@ -45,15 +45,17 @@
 //
 
 #if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
+    #ifdef cl_khr_fp64
+        #pragma OPENCL EXTENSION cl_khr_fp64:enable
+    #elif defined (cl_amd_fp64)
+        #pragma OPENCL EXTENSION cl_amd_fp64:enable
+    #endif
+    #define CV_PI 3.1415926535897932384626433832795
+    #define CV_2PI 2*CV_PI
+#else
+    #define CV_PI 3.1415926535897932384626433832795f
+    #define CV_2PI 2*CV_PI
 #endif
-#endif
-
-#define CV_PI 3.1415926535898
-#define CV_2PI 2*3.1415926535898
 
 /**************************************phase inradians**************************************/
 
diff --git a/modules/ocl/src/opencl/arithm_polarToCart.cl b/modules/ocl/src/opencl/arithm_polarToCart.cl
index 8af840db82..8469cdb097 100644
--- a/modules/ocl/src/opencl/arithm_polarToCart.cl
+++ b/modules/ocl/src/opencl/arithm_polarToCart.cl
@@ -43,12 +43,13 @@
 //
 //M*/
 
-#if defined (DOUBLE_SUPPORT)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#ifdef DOUBLE_SUPPORT
+    #pragma OPENCL EXTENSION cl_khr_fp64:enable
+    #define CV_PI   3.1415926535897932384626433832795
+#else
+    #define CV_PI   3.1415926535897932384626433832795f
 #endif
 
-#define CV_PI   3.1415926535897932384626433832795
-
 /////////////////////////////////////////////////////////////////////////////////////////////////////
 /////////////////////////////////////////polarToCart with magnitude//////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////////////////////////
@@ -72,7 +73,7 @@ __kernel void arithm_polarToCart_mag_D5 (__global float *src1, int src1_step, in
         float x = *((__global float *)((__global char *)src1 + src1_index));
         float y = *((__global float *)((__global char *)src2 + src2_index));
 
-        float ascale = CV_PI/180.0;
+        float ascale = CV_PI/180.0f;
         float alpha  = angInDegree == 1 ? y * ascale : y;
         float a = cos(alpha) * x;
         float b = sin(alpha) * x;
@@ -134,7 +135,7 @@ __kernel void arithm_polarToCart_D5 (__global float *src,  int src_step,  int sr
 
         float y = *((__global float *)((__global char *)src + src_index));
 
-        float ascale = CV_PI/180.0;
+        float ascale = CV_PI/180.0f;
         float alpha  = angInDegree == 1 ? y * ascale : y;
         float a = cos(alpha);
         float b = sin(alpha);
diff --git a/modules/ocl/src/opencl/arithm_sum.cl b/modules/ocl/src/opencl/arithm_sum.cl
index 6eb6e48323..7ada5be4c1 100644
--- a/modules/ocl/src/opencl/arithm_sum.cl
+++ b/modules/ocl/src/opencl/arithm_sum.cl
@@ -66,39 +66,39 @@
 __kernel void arithm_op_sum(int cols,int invalid_cols,int offset,int elemnum,int groupnum,
                                 __global srcT *src, __global dstT *dst)
 {
-   unsigned int lid = get_local_id(0);
-   unsigned int gid = get_group_id(0);
-   unsigned int id = get_global_id(0);
-   unsigned int idx = offset + id + (id / cols) * invalid_cols;
+    int lid = get_local_id(0);
+    int gid = get_group_id(0);
+    int id = get_global_id(0);
+    int idx = offset + id + (id / cols) * invalid_cols;
 
-   __local dstT localmem_sum[128];
-   dstT sum = (dstT)(0), temp;
+    __local dstT localmem_sum[128];
+    dstT sum = (dstT)(0), temp;
 
-   for (int grainSize = groupnum << 8; id < elemnum; id += grainSize)
-   {
-       idx = offset + id + (id / cols) * invalid_cols;
-       temp = convertToDstT(src[idx]);
-       FUNC(temp, sum);
-   }
+    for (int grainSize = groupnum << 8; id < elemnum; id += grainSize)
+    {
+        idx = offset + id + (id / cols) * invalid_cols;
+        temp = convertToDstT(src[idx]);
+        FUNC(temp, sum);
+    }
 
-   if (lid > 127)
-       localmem_sum[lid - 128] = sum;
-   barrier(CLK_LOCAL_MEM_FENCE);
+    if (lid > 127)
+        localmem_sum[lid - 128] = sum;
+    barrier(CLK_LOCAL_MEM_FENCE);
 
-   if (lid < 128)
-       localmem_sum[lid] = sum + localmem_sum[lid];
-   barrier(CLK_LOCAL_MEM_FENCE);
+    if (lid < 128)
+        localmem_sum[lid] = sum + localmem_sum[lid];
+    barrier(CLK_LOCAL_MEM_FENCE);
 
-   for (int lsize = 64; lsize > 0; lsize >>= 1)
-   {
-       if (lid < lsize)
-       {
-           int lid2 = lsize + lid;
-           localmem_sum[lid] = localmem_sum[lid] + localmem_sum[lid2];
-       }
-       barrier(CLK_LOCAL_MEM_FENCE);
-   }
+    for (int lsize = 64; lsize > 0; lsize >>= 1)
+    {
+        if (lid < lsize)
+        {
+            int lid2 = lsize + lid;
+            localmem_sum[lid] = localmem_sum[lid] + localmem_sum[lid2];
+        }
+        barrier(CLK_LOCAL_MEM_FENCE);
+    }
 
-   if (lid == 0)
-       dst[gid] = localmem_sum[0];
+    if (lid == 0)
+        dst[gid] = localmem_sum[0];
 }
diff --git a/modules/ocl/src/opencl/brute_force_match.cl b/modules/ocl/src/opencl/brute_force_match.cl
index 8f85f7d936..ce0d86e8a4 100644
--- a/modules/ocl/src/opencl/brute_force_match.cl
+++ b/modules/ocl/src/opencl/brute_force_match.cl
@@ -64,7 +64,7 @@
 #endif
 
 //http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
-int bit1Count(int v)
+static int bit1Count(int v)
 {
     v = v - ((v >> 1) & 0x55555555);                    // reuse input as temporary
     v = (v & 0x33333333) + ((v >> 2) & 0x33333333);     // temp
@@ -95,7 +95,7 @@ typedef int result_type;
 #define DIST_RES(x) (x)
 #endif
 
-result_type reduce_block(
+static result_type reduce_block(
     __local value_type *s_query,
     __local value_type *s_train,
     int lidx,
@@ -113,7 +113,7 @@ result_type reduce_block(
     return DIST_RES(result);
 }
 
-result_type reduce_block_match(
+static result_type reduce_block_match(
     __local value_type *s_query,
     __local value_type *s_train,
     int lidx,
@@ -131,7 +131,7 @@ result_type reduce_block_match(
     return (result);
 }
 
-result_type reduce_multi_block(
+static result_type reduce_multi_block(
     __local value_type *s_query,
     __local value_type *s_train,
     int block_index,
@@ -187,7 +187,6 @@ __kernel void BruteForceMatch_UnrollMatch(
     int myBestTrainIdx = -1;
 
     // loopUnrolledCached to find the best trainIdx and best distance.
-    volatile int imgIdx = 0;
     for (int t = 0, endt = (train_rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; t++)
     {
         result_type result = 0;
@@ -212,7 +211,6 @@ __kernel void BruteForceMatch_UnrollMatch(
 
         if (queryIdx < query_rows && trainIdx < train_rows && result < myBestDistance/* && mask(queryIdx, trainIdx)*/)
         {
-            //bestImgIdx = imgIdx;
             myBestDistance = result;
             myBestTrainIdx = trainIdx;
         }
@@ -304,7 +302,6 @@ __kernel void BruteForceMatch_Match(
 
         if (queryIdx < query_rows && trainIdx < train_rows && result < myBestDistance /*&& mask(queryIdx, trainIdx)*/)
         {
-            //myBestImgidx = imgIdx;
             myBestDistance = result;
             myBestTrainIdx = trainIdx;
         }
@@ -390,11 +387,10 @@ __kernel void BruteForceMatch_RadiusUnrollMatch(
     if (queryIdx < query_rows && trainIdx < train_rows &&
         convert_float(result) < maxDistance/* && mask(queryIdx, trainIdx)*/)
     {
-        unsigned int ind = atom_inc(nMatches + queryIdx/*, (unsigned int) -1*/);
+        int ind = atom_inc(nMatches + queryIdx/*, (unsigned int) -1*/);
 
         if(ind < bestTrainIdx_cols)
         {
-            //bestImgIdx = imgIdx;
             bestTrainIdx[queryIdx * (ostep / sizeof(int)) + ind] = trainIdx;
             bestDistance[queryIdx * (ostep / sizeof(float)) + ind] = result;
         }
@@ -451,11 +447,10 @@ __kernel void BruteForceMatch_RadiusMatch(
     if (queryIdx < query_rows && trainIdx < train_rows &&
         convert_float(result) < maxDistance/* && mask(queryIdx, trainIdx)*/)
     {
-        unsigned int ind = atom_inc(nMatches + queryIdx);
+        int ind = atom_inc(nMatches + queryIdx);
 
         if(ind < bestTrainIdx_cols)
         {
-            //bestImgIdx = imgIdx;
             bestTrainIdx[queryIdx * (ostep / sizeof(int)) + ind] = trainIdx;
             bestDistance[queryIdx * (ostep / sizeof(float)) + ind] = result;
         }
@@ -498,7 +493,6 @@ __kernel void BruteForceMatch_knnUnrollMatch(
     int myBestTrainIdx2 = -1;
 
     //loopUnrolledCached
-    volatile int imgIdx = 0;
     for (int t = 0 ; t < (train_rows + BLOCK_SIZE - 1) / BLOCK_SIZE ; t++)
     {
         result_type result = 0;
diff --git a/modules/ocl/src/opencl/cvt_color.cl b/modules/ocl/src/opencl/cvt_color.cl
index fcbf67ca7a..01286f7ad7 100644
--- a/modules/ocl/src/opencl/cvt_color.cl
+++ b/modules/ocl/src/opencl/cvt_color.cl
@@ -50,8 +50,6 @@
 #pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
 
-#define DATA_TYPE UNDEFINED
-
 #if defined (DEPTH_0)
 #define DATA_TYPE uchar
 #define MAX_NUM  255
@@ -73,6 +71,10 @@
 #define SAT_CAST(num) (num)
 #endif
 
+#ifndef DATA_TYPE
+    #define DATA_TYPE UNDEFINED
+#endif
+
 #define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))
 
 enum
diff --git a/modules/ocl/src/opencl/haarobjectdetect.cl b/modules/ocl/src/opencl/haarobjectdetect.cl
index 1d53f2b880..9e4ab2fe71 100644
--- a/modules/ocl/src/opencl/haarobjectdetect.cl
+++ b/modules/ocl/src/opencl/haarobjectdetect.cl
@@ -37,7 +37,6 @@
 //
 //
 
-#pragma OPENCL EXTENSION cl_amd_printf : enable
 #define CV_HAAR_FEATURE_MAX           3
 
 #define calc_sum(rect,offset)        (sum[(rect).p0+offset] - sum[(rect).p1+offset] - sum[(rect).p2+offset] + sum[(rect).p3+offset])
diff --git a/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl b/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
index 17e95b4e4a..b7a8ce1379 100644
--- a/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
+++ b/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
@@ -120,7 +120,6 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
     int grpidx = get_group_id(0);
     int lclidx = get_local_id(0);
     int lclidy = get_local_id(1);
-    int lcl_sz = mul24(grpszx, grpszy);
     int lcl_id = mad24(lclidy, grpszx, lclidx);
     __local int glboutindex[1];
     __local int lclcount[1];
diff --git a/modules/ocl/src/opencl/imgproc_calcHarris.cl b/modules/ocl/src/opencl/imgproc_calcHarris.cl
index bf54d3867d..0a981e12e8 100644
--- a/modules/ocl/src/opencl/imgproc_calcHarris.cl
+++ b/modules/ocl/src/opencl/imgproc_calcHarris.cl
@@ -99,7 +99,6 @@ __kernel void calcHarris(__global const float *Dx, __global const float *Dy, __g
     int col = get_local_id(0);
     int gX = get_group_id(0);
     int gY = get_group_id(1);
-    int glx = get_global_id(0);
     int gly = get_global_id(1);
 
     int dx_x_off = (dx_offset % dx_step) >> 2;
@@ -126,11 +125,11 @@ __kernel void calcHarris(__global const float *Dx, __global const float *Dy, __g
     {
         dx_con = dx_startX+col >= 0 && dx_startX+col < dx_whole_cols && dx_startY+i >= 0 && dx_startY+i < dx_whole_rows;
         dx_s = Dx[(dx_startY+i)*(dx_step>>2)+(dx_startX+col)];
-        dx_data[i] = dx_con ? dx_s : 0.0;
+        dx_data[i] = dx_con ? dx_s : 0.0f;
 
         dy_con = dy_startX+col >= 0 && dy_startX+col < dy_whole_cols && dy_startY+i >= 0 && dy_startY+i < dy_whole_rows;
         dy_s = Dy[(dy_startY+i)*(dy_step>>2)+(dy_startX+col)];
-        dy_data[i] = dy_con ? dy_s : 0.0;
+        dy_data[i] = dy_con ? dy_s : 0.0f;
 
         data[0][i] = dx_data[i] * dx_data[i];
         data[1][i] = dx_data[i] * dy_data[i];
@@ -155,7 +154,7 @@ __kernel void calcHarris(__global const float *Dx, __global const float *Dy, __g
         data[2][i] = dy_data[i] * dy_data[i];
     }
 #endif
-    float sum0 = 0.0, sum1 = 0.0, sum2 = 0.0;
+    float sum0 = 0.0f, sum1 = 0.0f, sum2 = 0.0f;
     for (int i=1; i < ksY; i++)
     {
         sum0 += data[0][i];
@@ -183,7 +182,7 @@ __kernel void calcHarris(__global const float *Dx, __global const float *Dy, __g
         int posX = dst_startX - dst_x_off + col - anX;
         int posY = (gly << 1);
         int till = (ksX + 1)%2;
-        float tmp_sum[6] = { 0.0, 0.0 , 0.0, 0.0, 0.0, 0.0 };
+        float tmp_sum[6] = { 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f };
         for (int k=0; k<6; k++)
             for (int i=-anX; i<=anX - till; i++)
                 tmp_sum[k] += temp[k][col+i];
diff --git a/modules/ocl/src/opencl/imgproc_calcMinEigenVal.cl b/modules/ocl/src/opencl/imgproc_calcMinEigenVal.cl
index 5f39176e99..110d204a59 100644
--- a/modules/ocl/src/opencl/imgproc_calcMinEigenVal.cl
+++ b/modules/ocl/src/opencl/imgproc_calcMinEigenVal.cl
@@ -98,7 +98,6 @@ __kernel void calcMinEigenVal(__global const float *Dx,__global const float *Dy,
     int col = get_local_id(0);
     int gX = get_group_id(0);
     int gY = get_group_id(1);
-    int glx = get_global_id(0);
     int gly = get_global_id(1);
 
     int dx_x_off = (dx_offset % dx_step) >> 2;
@@ -125,10 +124,10 @@ __kernel void calcMinEigenVal(__global const float *Dx,__global const float *Dy,
     {
         dx_con = dx_startX+col >= 0 && dx_startX+col < dx_whole_cols && dx_startY+i >= 0 && dx_startY+i < dx_whole_rows;
         dx_s = Dx[(dx_startY+i)*(dx_step>>2)+(dx_startX+col)];
-        dx_data[i] = dx_con ? dx_s : 0.0;
+        dx_data[i] = dx_con ? dx_s : 0.0f;
         dy_con = dy_startX+col >= 0 && dy_startX+col < dy_whole_cols && dy_startY+i >= 0 && dy_startY+i < dy_whole_rows;
         dy_s = Dy[(dy_startY+i)*(dy_step>>2)+(dy_startX+col)];
-        dy_data[i] = dy_con ? dy_s : 0.0;
+        dy_data[i] = dy_con ? dy_s : 0.0f;
         data[0][i] = dx_data[i] * dx_data[i];
         data[1][i] = dx_data[i] * dy_data[i];
         data[2][i] = dy_data[i] * dy_data[i];
@@ -152,7 +151,7 @@ __kernel void calcMinEigenVal(__global const float *Dx,__global const float *Dy,
         data[2][i] = dy_data[i] * dy_data[i];
     }
 #endif
-    float sum0 = 0.0, sum1 = 0.0, sum2 = 0.0;
+    float sum0 = 0.0f, sum1 = 0.0f, sum2 = 0.0f;
     for (int i=1; i < ksY; i++)
     {
         sum0 += (data[0][i]);
@@ -180,7 +179,7 @@ __kernel void calcMinEigenVal(__global const float *Dx,__global const float *Dy,
         int posX = dst_startX - dst_x_off + col - anX;
         int posY = (gly << 1);
         int till = (ksX + 1)%2;
-        float tmp_sum[6] = { 0.0, 0.0 , 0.0, 0.0, 0.0, 0.0 };
+        float tmp_sum[6] = { 0.0f, 0.0f , 0.0f, 0.0f, 0.0f, 0.0f };
         for (int k=0; k<6; k++)
             for (int i=-anX; i<=anX - till; i++)
                 tmp_sum[k] += temp[k][col+i];
diff --git a/modules/ocl/src/opencl/imgproc_canny.cl b/modules/ocl/src/opencl/imgproc_canny.cl
index c77cae99a3..0a54f1468c 100644
--- a/modules/ocl/src/opencl/imgproc_canny.cl
+++ b/modules/ocl/src/opencl/imgproc_canny.cl
@@ -43,9 +43,6 @@
 //
 //M*/
 
-#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
-#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
-
 #ifdef L2GRAD
 inline float calc(int x, int y)
 {
@@ -248,7 +245,12 @@ void calcMagnitude
 //////////////////////////////////////////////////////////////////////////////////////////
 // 0.4142135623730950488016887242097 is tan(22.5)
 #define CANNY_SHIFT 15
-#define TG22        (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5)
+
+#ifdef DOUBLE_SUPPORT
+    #define TG22        (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5)
+#else
+    #define TG22        (int)(0.4142135623730950488016887242097f*(1<<CANNY_SHIFT) + 0.5f)
+#endif
 
 //First pass of edge detection and non-maximum suppression
 // edgetype is set to for each pixel:
@@ -681,7 +683,7 @@ edgesHysteresisGlobal
 
             ind = s_ind;
 
-            for (int i = lidx; i < s_counter; i += get_local_size(0))
+            for (int i = lidx; i < (int)s_counter; i += get_local_size(0))
             {
                 st2[ind + i] = s_st[i];
             }
diff --git a/modules/ocl/src/opencl/imgproc_clahe.cl b/modules/ocl/src/opencl/imgproc_clahe.cl
index 16c68fd474..57d945e21c 100644
--- a/modules/ocl/src/opencl/imgproc_clahe.cl
+++ b/modules/ocl/src/opencl/imgproc_clahe.cl
@@ -47,7 +47,7 @@
 #define WAVE_SIZE 1
 #endif
 
-int calc_lut(__local int* smem, int val, int tid)
+static int calc_lut(__local int* smem, int val, int tid)
 {
     smem[tid] = val;
     barrier(CLK_LOCAL_MEM_FENCE);
@@ -61,7 +61,7 @@ int calc_lut(__local int* smem, int val, int tid)
 }
 
 #ifdef CPU
-void reduce(volatile __local int* smem, int val, int tid)
+static void reduce(volatile __local int* smem, int val, int tid)
 {
     smem[tid] = val;
     barrier(CLK_LOCAL_MEM_FENCE);
@@ -101,7 +101,7 @@ void reduce(volatile __local int* smem, int val, int tid)
 
 #else
 
-void reduce(__local volatile int* smem, int val, int tid)
+static void reduce(__local volatile int* smem, int val, int tid)
 {
     smem[tid] = val;
     barrier(CLK_LOCAL_MEM_FENCE);
@@ -147,9 +147,9 @@ __kernel void calcLut(__global __const uchar * src, __global uchar * lut,
 {
     __local int smem[512];
 
-    const int tx = get_group_id(0);
-    const int ty = get_group_id(1);
-    const unsigned int tid = get_local_id(1) * get_local_size(0)
+    int tx = get_group_id(0);
+    int ty = get_group_id(1);
+    int tid = get_local_id(1) * get_local_size(0)
                              + get_local_id(0);
 
     smem[tid] = 0;
diff --git a/modules/ocl/src/opencl/imgproc_integral.cl b/modules/ocl/src/opencl/imgproc_integral.cl
index f10b184e55..05e76f9647 100644
--- a/modules/ocl/src/opencl/imgproc_integral.cl
+++ b/modules/ocl/src/opencl/imgproc_integral.cl
@@ -63,8 +63,8 @@
 kernel void integral_cols_D4(__global uchar4 *src,__global int *sum ,__global float *sqsum,
                           int src_offset,int pre_invalid,int rows,int cols,int src_step,int dst_step)
 {
-    unsigned int lid = get_local_id(0);
-    unsigned int gid = get_group_id(0);
+    int lid = get_local_id(0);
+    int gid = get_group_id(0);
     int4 src_t[2], sum_t[2];
     float4 sqsum_t[2];
     __local int4 lm_sum[2][LSIZE + LOG_LSIZE];
@@ -75,8 +75,8 @@ kernel void integral_cols_D4(__global uchar4 *src,__global int *sum ,__global fl
     gid = gid << 1;
     for(int i = 0; i < rows; i =i + LSIZE_1)
     {
-        src_t[0] = (i + lid < rows ? convert_int4(src[src_offset + (lid+i) * src_step + min(gid, (uint)cols - 1)]) : 0);
-        src_t[1] = (i + lid < rows ? convert_int4(src[src_offset + (lid+i) * src_step + min(gid + 1, (uint)cols - 1)]) : 0);
+        src_t[0] = (i + lid < rows ? convert_int4(src[src_offset + (lid+i) * src_step + min(gid, cols - 1)]) : 0);
+        src_t[1] = (i + lid < rows ? convert_int4(src[src_offset + (lid+i) * src_step + min(gid + 1, cols - 1)]) : 0);
 
         sum_t[0] = (i == 0 ? 0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
         sqsum_t[0] = (i == 0 ? (float4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
@@ -163,8 +163,8 @@ kernel void integral_rows_D4(__global int4 *srcsum,__global float4 * srcsqsum,__
                           __global float *sqsum,int rows,int cols,int src_step,int sum_step,
                           int sqsum_step,int sum_offset,int sqsum_offset)
 {
-    unsigned int lid = get_local_id(0);
-    unsigned int gid = get_group_id(0);
+    int lid = get_local_id(0);
+    int gid = get_group_id(0);
     int4 src_t[2], sum_t[2];
     float4 sqsrc_t[2],sqsum_t[2];
     __local int4 lm_sum[2][LSIZE + LOG_LSIZE];
@@ -279,8 +279,8 @@ kernel void integral_rows_D4(__global int4 *srcsum,__global float4 * srcsqsum,__
 kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global float *sqsum,
                           int src_offset,int pre_invalid,int rows,int cols,int src_step,int dst_step)
 {
-    unsigned int lid = get_local_id(0);
-    unsigned int gid = get_group_id(0);
+    int lid = get_local_id(0);
+    int gid = get_group_id(0);
     float4 src_t[2], sum_t[2];
     float4 sqsum_t[2];
     __local float4 lm_sum[2][LSIZE + LOG_LSIZE];
@@ -291,8 +291,8 @@ kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global
     gid = gid << 1;
     for(int i = 0; i < rows; i =i + LSIZE_1)
     {
-        src_t[0] = (i + lid < rows ? convert_float4(src[src_offset + (lid+i) * src_step + min(gid, (uint)cols - 1)]) : (float4)0);
-        src_t[1] = (i + lid < rows ? convert_float4(src[src_offset + (lid+i) * src_step + min(gid + 1, (uint)cols - 1)]) : (float4)0);
+        src_t[0] = (i + lid < rows ? convert_float4(src[src_offset + (lid+i) * src_step + min(gid, cols - 1)]) : (float4)0);
+        src_t[1] = (i + lid < rows ? convert_float4(src[src_offset + (lid+i) * src_step + min(gid + 1, cols - 1)]) : (float4)0);
 
         sum_t[0] = (i == 0 ? (float4)0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
         sqsum_t[0] = (i == 0 ? (float4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
@@ -379,8 +379,8 @@ kernel void integral_rows_D5(__global float4 *srcsum,__global float4 * srcsqsum,
                           __global float *sqsum,int rows,int cols,int src_step,int sum_step,
                           int sqsum_step,int sum_offset,int sqsum_offset)
 {
-    unsigned int lid = get_local_id(0);
-    unsigned int gid = get_group_id(0);
+    int lid = get_local_id(0);
+    int gid = get_group_id(0);
     float4 src_t[2], sum_t[2];
     float4 sqsrc_t[2],sqsum_t[2];
     __local float4 lm_sum[2][LSIZE + LOG_LSIZE];
diff --git a/modules/ocl/src/opencl/imgproc_integral_sum.cl b/modules/ocl/src/opencl/imgproc_integral_sum.cl
index ee063a558a..a6f73c748d 100644
--- a/modules/ocl/src/opencl/imgproc_integral_sum.cl
+++ b/modules/ocl/src/opencl/imgproc_integral_sum.cl
@@ -64,8 +64,8 @@
 kernel void integral_sum_cols_D4(__global uchar4 *src,__global int *sum ,
                               int src_offset,int pre_invalid,int rows,int cols,int src_step,int dst_step)
 {
-    unsigned int lid = get_local_id(0);
-    unsigned int gid = get_group_id(0);
+    int lid = get_local_id(0);
+    int gid = get_group_id(0);
     int4 src_t[2], sum_t[2];
     __local int4 lm_sum[2][LSIZE + LOG_LSIZE];
     __local int* sum_p;
@@ -146,8 +146,8 @@ kernel void integral_sum_rows_D4(__global int4 *srcsum,__global int *sum ,
                               int rows,int cols,int src_step,int sum_step,
                               int sum_offset)
 {
-    unsigned int lid = get_local_id(0);
-    unsigned int gid = get_group_id(0);
+    int lid = get_local_id(0);
+    int gid = get_group_id(0);
     int4 src_t[2], sum_t[2];
     __local int4 lm_sum[2][LSIZE + LOG_LSIZE];
     __local int *sum_p;
@@ -239,8 +239,8 @@ kernel void integral_sum_rows_D4(__global int4 *srcsum,__global int *sum ,
 kernel void integral_sum_cols_D5(__global uchar4 *src,__global float *sum ,
                               int src_offset,int pre_invalid,int rows,int cols,int src_step,int dst_step)
 {
-    unsigned int lid = get_local_id(0);
-    unsigned int gid = get_group_id(0);
+    int lid = get_local_id(0);
+    int gid = get_group_id(0);
     float4 src_t[2], sum_t[2];
     __local float4 lm_sum[2][LSIZE + LOG_LSIZE];
     __local float* sum_p;
@@ -321,8 +321,8 @@ kernel void integral_sum_rows_D5(__global float4 *srcsum,__global float *sum ,
                               int rows,int cols,int src_step,int sum_step,
                               int sum_offset)
 {
-    unsigned int lid = get_local_id(0);
-    unsigned int gid = get_group_id(0);
+    int lid = get_local_id(0);
+    int gid = get_group_id(0);
     float4 src_t[2], sum_t[2];
     __local float4 lm_sum[2][LSIZE + LOG_LSIZE];
     __local float *sum_p;
diff --git a/modules/ocl/src/opencl/imgproc_median.cl b/modules/ocl/src/opencl/imgproc_median.cl
index ccb529957b..5fa7a17b8e 100644
--- a/modules/ocl/src/opencl/imgproc_median.cl
+++ b/modules/ocl/src/opencl/imgproc_median.cl
@@ -106,10 +106,10 @@ __kernel void medianFilter3_C4_D0(__global uchar4 * src, __global uchar4 * dst,
     op(p3, p6); op(p1, p4); op(p2, p5); op(p4, p7);
     op(p4, p2); op(p6, p4); op(p4, p2);
 
-    if(get_global_id(1)<rows && get_global_id(0)<cols)
+    if((int)get_global_id(1)<rows && (int)get_global_id(0)<cols)
         dst[dstOffset + get_global_id(1)*dstStep + get_global_id(0)]=p4;
 }
-#undef op(a,b)
+#undef op
 
 #define op(a,b) {mid=a; a=min(a,b); b=max(mid,b);}
 __kernel void medianFilter3_C1_D0(__global uchar * src, __global uchar * dst,  int srcOffset, int dstOffset, int cols,
@@ -148,10 +148,10 @@ __kernel void medianFilter3_C1_D0(__global uchar * src, __global uchar * dst,  i
     op(p3, p6); op(p1, p4); op(p2, p5); op(p4, p7);
     op(p4, p2); op(p6, p4); op(p4, p2);
 
-    if(get_global_id(1)<rows && get_global_id(0)<cols)
+    if((int)get_global_id(1)<rows && (int)get_global_id(0)<cols)
         dst[dstOffset + get_global_id(1)*dstStep + get_global_id(0)]=p4;
 }
-#undef op(a,b)
+#undef op
 
 #define op(a,b) {mid=a; a=min(a,b); b=max(mid,b);}
 __kernel void medianFilter3_C1_D5(__global float * src, __global float * dst,  int srcOffset, int dstOffset, int cols,
@@ -190,10 +190,10 @@ __kernel void medianFilter3_C1_D5(__global float * src, __global float * dst,  i
     op(p3, p6); op(p1, p4); op(p2, p5); op(p4, p7);
     op(p4, p2); op(p6, p4); op(p4, p2);
 
-    if(get_global_id(1)<rows && get_global_id(0)<cols)
+    if((int)get_global_id(1)<rows && (int)get_global_id(0)<cols)
         dst[dstOffset + get_global_id(1)*dstStep + get_global_id(0)]=p4;
 }
-#undef op(a,b)
+#undef op
 
 #define op(a,b) {mid=a; a=min(a,b); b=max(mid,b);}
 __kernel void medianFilter3_C4_D5(__global float4 * src, __global float4 * dst,  int srcOffset, int dstOffset, int cols,
@@ -232,10 +232,10 @@ __kernel void medianFilter3_C4_D5(__global float4 * src, __global float4 * dst,
     op(p3, p6); op(p1, p4); op(p2, p5); op(p4, p7);
     op(p4, p2); op(p6, p4); op(p4, p2);
 
-    if(get_global_id(1)<rows && get_global_id(0)<cols)
+    if((int)get_global_id(1)<rows && (int)get_global_id(0)<cols)
         dst[dstOffset + get_global_id(1)*dstStep + get_global_id(0)]=p4;
 }
-#undef op(a,b)
+#undef op
 
 #define op(a,b) {mid=a; a=min(a,b); b=max(mid,b);}
 __kernel void medianFilter5_C4_D0(__global uchar4 * src, __global uchar4 * dst,  int srcOffset, int dstOffset, int cols,
@@ -294,10 +294,10 @@ __kernel void medianFilter5_C4_D0(__global uchar4 * src, __global uchar4 * dst,
     op(p13, p17); op(p3, p15); op(p11, p23); op(p11, p15); op(p7, p19);
     op(p7, p11); op(p11, p13); op(p11, p12);
 
-    if(get_global_id(1)<rows && get_global_id(0)<cols)
+    if((int)get_global_id(1)<rows && (int)get_global_id(0)<cols)
         dst[dstOffset + get_global_id(1)*dstStep + get_global_id(0)]=p12;
 }
-#undef op(a,b)
+#undef op
 
 #define op(a,b) {mid=a; a=min(a,b); b=max(mid,b);}
 __kernel void medianFilter5_C1_D0(__global uchar * src, __global uchar * dst,  int srcOffset, int dstOffset, int cols,
@@ -356,10 +356,10 @@ __kernel void medianFilter5_C1_D0(__global uchar * src, __global uchar * dst,  i
     op(p13, p17); op(p3, p15); op(p11, p23); op(p11, p15); op(p7, p19);
     op(p7, p11); op(p11, p13); op(p11, p12);
 
-    if(get_global_id(1)<rows && get_global_id(0)<cols)
+    if((int)get_global_id(1)<rows && (int)get_global_id(0)<cols)
         dst[dstOffset + get_global_id(1)*dstStep + get_global_id(0)]=p12;
 }
-#undef op(a,b)
+#undef op
 
 #define op(a,b) {mid=a; a=min(a,b); b=max(mid,b);}
 __kernel void medianFilter5_C4_D5(__global float4 * src, __global float4 * dst,  int srcOffset, int dstOffset, int cols,
@@ -418,10 +418,10 @@ __kernel void medianFilter5_C4_D5(__global float4 * src, __global float4 * dst,
     op(p13, p17); op(p3, p15); op(p11, p23); op(p11, p15); op(p7, p19);
     op(p7, p11); op(p11, p13); op(p11, p12);
 
-    if(get_global_id(1)<rows && get_global_id(0)<cols)
+    if((int)get_global_id(1)<rows && (int)get_global_id(0)<cols)
         dst[dstOffset + get_global_id(1)*dstStep + get_global_id(0)]=p12;
 }
-#undef op(a,b)
+#undef op
 
 #define op(a,b) {mid=a; a=min(a,b); b=max(mid,b);}
 __kernel void medianFilter5_C1_D5(__global float * src, __global float * dst,  int srcOffset, int dstOffset, int cols,
@@ -480,7 +480,7 @@ __kernel void medianFilter5_C1_D5(__global float * src, __global float * dst,  i
     op(p13, p17); op(p3, p15); op(p11, p23); op(p11, p15); op(p7, p19);
     op(p7, p11); op(p11, p13); op(p11, p12);
 
-    if(get_global_id(1)<rows && get_global_id(0)<cols)
+    if((int)get_global_id(1)<rows && (int)get_global_id(0)<cols)
         dst[dstOffset + get_global_id(1)*dstStep + get_global_id(0)]=p12;
 }
-#undef op(a,b)
+#undef op
diff --git a/modules/ocl/src/opencl/imgproc_remap.cl b/modules/ocl/src/opencl/imgproc_remap.cl
index d545497f0f..53c053947f 100644
--- a/modules/ocl/src/opencl/imgproc_remap.cl
+++ b/modules/ocl/src/opencl/imgproc_remap.cl
@@ -60,7 +60,7 @@
 #elif defined BORDER_REPLICATE
 #define EXTRAPOLATE(v2, v) \
     { \
-        v2 = max(min(v2, (int2)(src_cols - 1, src_rows - 1)), zero); \
+        v2 = max(min(v2, (int2)(src_cols - 1, src_rows - 1)), (int2)(0)); \
         v = convertToWT(src[mad24(v2.y, src_step, v2.x + src_offset)]); \
     }
 #elif defined BORDER_WRAP
@@ -139,7 +139,9 @@ __kernel void remap_2_32FC1(__global const T * restrict src, __global T * dst,
 
         if (NEED_EXTRAPOLATION(gx, gy))
         {
-            int2 gxy = (int2)(gx, gy), zero = (int2)(0);
+#ifndef BORDER_CONSTANT
+            int2 gxy = (int2)(gx, gy);
+#endif
             EXTRAPOLATE(gxy, dst[dstIdx]);
         }
         else
@@ -167,10 +169,7 @@ __kernel void remap_32FC2(__global const T * restrict src, __global T * dst, __g
         int gx = gxy.x, gy = gxy.y;
 
         if (NEED_EXTRAPOLATION(gx, gy))
-        {
-            int2 zero = (int2)(0);
-            EXTRAPOLATE(gxy, dst[dstIdx]);
-        }
+            EXTRAPOLATE(gxy, dst[dstIdx])
         else
         {
             int srcIdx = mad24(gy, src_step, gx + src_offset);
@@ -196,10 +195,7 @@ __kernel void remap_16SC2(__global const T * restrict src, __global T * dst, __g
         int gx = gxy.x, gy = gxy.y;
 
         if (NEED_EXTRAPOLATION(gx, gy))
-        {
-            int2 zero = (int2)(0);
-            EXTRAPOLATE(gxy, dst[dstIdx]);
-        }
+            EXTRAPOLATE(gxy, dst[dstIdx])
         else
         {
             int srcIdx = mad24(gy, src_step, gx + src_offset);
@@ -231,7 +227,6 @@ __kernel void remap_2_32FC1(__global T const * restrict  src, __global T * dst,
         int2 map_dataB = (int2)(map_dataA.x + 1, map_dataA.y);
         int2 map_dataC = (int2)(map_dataA.x, map_dataA.y + 1);
         int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y +1);
-        int2 zero = (int2)(0);
 
         float2 _u = map_data - convert_float2(map_dataA);
         WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)32)) / (WT2)32;
@@ -285,7 +280,6 @@ __kernel void remap_32FC2(__global T const * restrict  src, __global T * dst,
         int2 map_dataB = (int2)(map_dataA.x + 1, map_dataA.y);
         int2 map_dataC = (int2)(map_dataA.x, map_dataA.y + 1);
         int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y + 1);
-        int2 zero = (int2)(0);
 
         float2 _u = map_data - convert_float2(map_dataA);
         WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)32)) / (WT2)32;
diff --git a/modules/ocl/src/opencl/imgproc_resize.cl b/modules/ocl/src/opencl/imgproc_resize.cl
index 4c258d8f58..2bb75b90cf 100644
--- a/modules/ocl/src/opencl/imgproc_resize.cl
+++ b/modules/ocl/src/opencl/imgproc_resize.cl
@@ -182,10 +182,10 @@ __kernel void resizeLN_C4_D0(__global uchar4 * dst, __global uchar4 * src,
     int x = floor(sx), y = floor(sy);
     float u = sx - x, v = sy - y;
 
-    x<0 ? x=0,u=0 : x,u;
-    x>=src_cols ? x=src_cols-1,u=0 : x,u;
-    y<0 ? y=0,v=0 : y,v;
-    y>=src_rows ? y=src_rows-1,v=0 : y,v;
+    if ( x<0 ) x=0,u=0;
+    if ( x>=src_cols ) x=src_cols-1,u=0;
+    if ( y<0 ) y=0,v=0;
+    if (y>=src_rows ) y=src_rows-1,v=0;
 
     u = u * INTER_RESIZE_COEF_SCALE;
     v = v * INTER_RESIZE_COEF_SCALE;
@@ -225,10 +225,10 @@ __kernel void resizeLN_C1_D5(__global float * dst, __global float * src,
     int x = floor(sx), y = floor(sy);
     float u = sx - x, v = sy - y;
 
-    x<0 ? x=0,u=0 : x,u;
-    x>=src_cols ? x=src_cols-1,u=0 : x,u;
-    y<0 ? y=0,v=0 : y,v;
-    y>=src_rows ? y=src_rows-1,v=0 : y,v;
+    if ( x<0 ) x=0,u=0;
+    if ( x>=src_cols ) x=src_cols-1,u=0;
+    if ( y<0 ) y=0,v=0;
+    if (y>=src_rows ) y=src_rows-1,v=0;
 
     int y_ = INC(y,src_rows);
     int x_ = INC(x,src_cols);
@@ -264,10 +264,10 @@ __kernel void resizeLN_C4_D5(__global float4 * dst, __global float4 * src,
     int x = floor(sx), y = floor(sy);
     float u = sx - x, v = sy - y;
 
-    x<0 ? x=0,u=0 : x;
-    x>=src_cols ? x=src_cols-1,u=0 : x;
-    y<0 ? y=0,v=0 : y;
-    y>=src_rows ? y=src_rows-1,v=0 : y;
+    if ( x<0 ) x=0,u=0;
+    if ( x>=src_cols ) x=src_cols-1,u=0;
+    if ( y<0 ) y=0,v=0;
+    if (y>=src_rows ) y=src_rows-1,v=0;
 
     int y_ = INC(y,src_rows);
     int x_ = INC(x,src_cols);
diff --git a/modules/ocl/src/opencl/imgproc_threshold.cl b/modules/ocl/src/opencl/imgproc_threshold.cl
index 6b847c83f8..400ac806cf 100644
--- a/modules/ocl/src/opencl/imgproc_threshold.cl
+++ b/modules/ocl/src/opencl/imgproc_threshold.cl
@@ -71,18 +71,18 @@ __kernel void threshold(__global const T * restrict src, int src_offset, int src
 #else
         VT sdata = VLOADN(0, src + src_index);
 #endif
-        VT vthresh = (VT)(thresh), zero = (VT)(0);
+        VT vthresh = (VT)(thresh);
 
 #ifdef THRESH_BINARY
-        VT vecValue = sdata > vthresh ? max_val : zero;
+        VT vecValue = sdata > vthresh ? max_val : (VT)(0);
 #elif defined THRESH_BINARY_INV
-        VT vecValue = sdata > vthresh ? zero : max_val;
+        VT vecValue = sdata > vthresh ? (VT)(0) : max_val;
 #elif defined THRESH_TRUNC
         VT vecValue = sdata > vthresh ? thresh : sdata;
 #elif defined THRESH_TOZERO
-        VT vecValue = sdata > vthresh ? sdata : zero;
+        VT vecValue = sdata > vthresh ? sdata : (VT)(0);
 #elif defined THRESH_TOZERO_INV
-        VT vecValue = sdata > vthresh ? zero : sdata;
+        VT vecValue = sdata > vthresh ? (VT)(0) : sdata;
 #endif
 
         if (gx + VECSIZE <= max_index)
@@ -117,18 +117,18 @@ __kernel void threshold(__global const T * restrict src, int src_offset, int src
         int src_index = mad24(gy, src_step, src_offset + gx);
         int dst_index = mad24(gy, dst_step, dst_offset + gx);
 
-        T sdata = src[src_index], zero = (T)(0);
+        T sdata = src[src_index];
 
 #ifdef THRESH_BINARY
-        dst[dst_index] = sdata > thresh ? max_val : zero;
+        dst[dst_index] = sdata > thresh ? max_val : (T)(0);
 #elif defined THRESH_BINARY_INV
-        dst[dst_index] = sdata > thresh ? zero : max_val;
+        dst[dst_index] = sdata > thresh ? (T)(0) : max_val;
 #elif defined THRESH_TRUNC
         dst[dst_index] = sdata > thresh ? thresh : sdata;
 #elif defined THRESH_TOZERO
-        dst[dst_index] = sdata > thresh ? sdata : zero;
+        dst[dst_index] = sdata > thresh ? sdata : (T)(0);
 #elif defined THRESH_TOZERO_INV
-        dst[dst_index] = sdata > thresh ? zero : sdata;
+        dst[dst_index] = sdata > thresh ? (T)(0) : sdata;
 #endif
     }
 }
diff --git a/modules/ocl/src/opencl/imgproc_warpAffine.cl b/modules/ocl/src/opencl/imgproc_warpAffine.cl
index caafdfb92c..a5050bbf03 100644
--- a/modules/ocl/src/opencl/imgproc_warpAffine.cl
+++ b/modules/ocl/src/opencl/imgproc_warpAffine.cl
@@ -537,9 +537,9 @@ __kernel void warpAffineLinear_C1_D5(__global float * src, __global float * dst,
 
         float tab[4];
         float taby[2], tabx[2];
-        taby[0] = 1.0 - 1.f/INTER_TAB_SIZE*ay0;
+        taby[0] = 1.0f - 1.f/INTER_TAB_SIZE*ay0;
         taby[1] = 1.f/INTER_TAB_SIZE*ay0;
-        tabx[0] = 1.0 - 1.f/INTER_TAB_SIZE*ax0;
+        tabx[0] = 1.0f - 1.f/INTER_TAB_SIZE*ax0;
         tabx[1] = 1.f/INTER_TAB_SIZE*ax0;
 
         tab[0] = taby[0] * tabx[0];
@@ -680,9 +680,9 @@ __kernel void warpAffineLinear_C4_D5(__global float4 * src, __global float4 * ds
 
         float tab[4];
         float taby[2], tabx[2];
-        taby[0] = 1.0 - 1.f/INTER_TAB_SIZE*ay0;
+        taby[0] = 1.0f - 1.f/INTER_TAB_SIZE*ay0;
         taby[1] = 1.f/INTER_TAB_SIZE*ay0;
-        tabx[0] = 1.0 - 1.f/INTER_TAB_SIZE*ax0;
+        tabx[0] = 1.0f - 1.f/INTER_TAB_SIZE*ax0;
         tabx[1] = 1.f/INTER_TAB_SIZE*ax0;
 
         tab[0] = taby[0] * tabx[0];
diff --git a/modules/ocl/src/opencl/imgproc_warpPerspective.cl b/modules/ocl/src/opencl/imgproc_warpPerspective.cl
index dc37c1f04d..eee1c81750 100644
--- a/modules/ocl/src/opencl/imgproc_warpPerspective.cl
+++ b/modules/ocl/src/opencl/imgproc_warpPerspective.cl
@@ -133,7 +133,7 @@ __kernel void warpPerspectiveLinear_C1_D0(__global const uchar * restrict src, _
         F X0 = M[0]*dx + M[1]*dy + M[2];
         F Y0 = M[3]*dx + M[4]*dy + M[5];
         F W = M[6]*dx + M[7]*dy + M[8];
-        W = (W != 0.0) ? INTER_TAB_SIZE/W : 0.0;
+        W = (W != 0.0f) ? INTER_TAB_SIZE/W : 0.0f;
         int X = rint(X0*W);
         int Y = rint(Y0*W);
 
@@ -150,9 +150,9 @@ __kernel void warpPerspectiveLinear_C1_D0(__global const uchar * restrict src, _
 
         short itab[4];
         float tab1y[2], tab1x[2];
-        tab1y[0] = 1.0 - 1.f/INTER_TAB_SIZE*ay;
+        tab1y[0] = 1.0f - 1.f/INTER_TAB_SIZE*ay;
         tab1y[1] = 1.f/INTER_TAB_SIZE*ay;
-        tab1x[0] = 1.0 - 1.f/INTER_TAB_SIZE*ax;
+        tab1x[0] = 1.0f - 1.f/INTER_TAB_SIZE*ax;
         tab1x[1] = 1.f/INTER_TAB_SIZE*ax;
 
 #pragma unroll 4
@@ -185,7 +185,7 @@ __kernel void warpPerspectiveCubic_C1_D0(__global uchar * src, __global uchar *
         F X0 = M[0]*dx + M[1]*dy + M[2];
         F Y0 = M[3]*dx + M[4]*dy + M[5];
         F W = M[6]*dx + M[7]*dy + M[8];
-        W = (W != 0.0) ? INTER_TAB_SIZE/W : 0.0;
+        W = (W != 0.0f) ? INTER_TAB_SIZE/W : 0.0f;
         int X = rint(X0*W);
         int Y = rint(Y0*W);
 
@@ -265,7 +265,7 @@ __kernel void warpPerspectiveNN_C4_D0(__global uchar4 const * restrict src, __gl
         F X0 = M[0]*dx + M[1]*dy + M[2];
         F Y0 = M[3]*dx + M[4]*dy + M[5];
         F W = M[6]*dx + M[7]*dy + M[8];
-        W = (W != 0.0) ? 1./W : 0.0;
+        W = (W != 0.0f) ? 1.f/W : 0.0f;
         short sx = convert_short_sat_rte(X0*W);
         short sy = convert_short_sat_rte(Y0*W);
 
@@ -289,7 +289,7 @@ __kernel void warpPerspectiveLinear_C4_D0(__global uchar4 const * restrict src,
         F X0 = M[0]*dx + M[1]*dy + M[2];
         F Y0 = M[3]*dx + M[4]*dy + M[5];
         F W = M[6]*dx + M[7]*dy + M[8];
-        W = (W != 0.0) ? INTER_TAB_SIZE/W : 0.0;
+        W = (W != 0.0f) ? INTER_TAB_SIZE/W : 0.0f;
         int X = rint(X0*W);
         int Y = rint(Y0*W);
 
@@ -341,7 +341,7 @@ __kernel void warpPerspectiveCubic_C4_D0(__global uchar4 const * restrict src, _
         F X0 = M[0]*dx + M[1]*dy + M[2];
         F Y0 = M[3]*dx + M[4]*dy + M[5];
         F W = M[6]*dx + M[7]*dy + M[8];
-        W = (W != 0.0) ? INTER_TAB_SIZE/W : 0.0;
+        W = (W != 0.0f) ? INTER_TAB_SIZE/W : 0.0f;
         int X = rint(X0*W);
         int Y = rint(Y0*W);
 
@@ -424,7 +424,7 @@ __kernel void warpPerspectiveNN_C1_D5(__global float * src, __global float * dst
         F X0 = M[0]*dx + M[1]*dy + M[2];
         F Y0 = M[3]*dx + M[4]*dy + M[5];
         F W = M[6]*dx + M[7]*dy + M[8];
-        W = (W != 0.0) ? 1./W : 0.0;
+        W = (W != 0.0f) ? 1.f/W : 0.0f;
         short sx = convert_short_sat_rte(X0*W);
         short sy = convert_short_sat_rte(Y0*W);
 
@@ -447,7 +447,7 @@ __kernel void warpPerspectiveLinear_C1_D5(__global float * src, __global float *
         F X0 = M[0]*dx + M[1]*dy + M[2];
         F Y0 = M[3]*dx + M[4]*dy + M[5];
         F W = M[6]*dx + M[7]*dy + M[8];
-        W = (W != 0.0) ? INTER_TAB_SIZE/W : 0.0;
+        W = (W != 0.0f) ? INTER_TAB_SIZE/W : 0.0f;
         int X = rint(X0*W);
         int Y = rint(Y0*W);
 
@@ -465,9 +465,9 @@ __kernel void warpPerspectiveLinear_C1_D5(__global float * src, __global float *
 
         float tab[4];
         float taby[2], tabx[2];
-        taby[0] = 1.0 - 1.f/INTER_TAB_SIZE*ay;
+        taby[0] = 1.0f - 1.f/INTER_TAB_SIZE*ay;
         taby[1] = 1.f/INTER_TAB_SIZE*ay;
-        tabx[0] = 1.0 - 1.f/INTER_TAB_SIZE*ax;
+        tabx[0] = 1.0f - 1.f/INTER_TAB_SIZE*ax;
         tabx[1] = 1.f/INTER_TAB_SIZE*ax;
 
         tab[0] = taby[0] * tabx[0];
@@ -497,7 +497,7 @@ __kernel void warpPerspectiveCubic_C1_D5(__global float * src, __global float *
         F X0 = M[0]*dx + M[1]*dy + M[2];
         F Y0 = M[3]*dx + M[4]*dy + M[5];
         F W = M[6]*dx + M[7]*dy + M[8];
-        W = (W != 0.0) ? INTER_TAB_SIZE/W : 0.0;
+        W = (W != 0.0f) ? INTER_TAB_SIZE/W : 0.0f;
         int X = rint(X0*W);
         int Y = rint(Y0*W);
 
@@ -557,7 +557,7 @@ __kernel void warpPerspectiveNN_C4_D5(__global float4 * src, __global float4 * d
         F X0 = M[0]*dx + M[1]*dy + M[2];
         F Y0 = M[3]*dx + M[4]*dy + M[5];
         F W = M[6]*dx + M[7]*dy + M[8];
-        W =(W != 0.0)? 1./W : 0.0;
+        W =(W != 0.0f)? 1.f/W : 0.0f;
         short sx = convert_short_sat_rte(X0*W);
         short sy = convert_short_sat_rte(Y0*W);
 
@@ -583,7 +583,7 @@ __kernel void warpPerspectiveLinear_C4_D5(__global float4 * src, __global float4
         F X0 = M[0]*dx + M[1]*dy + M[2];
         F Y0 = M[3]*dx + M[4]*dy + M[5];
         F W = M[6]*dx + M[7]*dy + M[8];
-        W = (W != 0.0) ? INTER_TAB_SIZE/W : 0.0;
+        W = (W != 0.0f) ? INTER_TAB_SIZE/W : 0.0f;
         int X = rint(X0*W);
         int Y = rint(Y0*W);
 
@@ -602,9 +602,9 @@ __kernel void warpPerspectiveLinear_C4_D5(__global float4 * src, __global float4
 
         float tab[4];
         float taby[2], tabx[2];
-        taby[0] = 1.0 - 1.f/INTER_TAB_SIZE*ay0;
+        taby[0] = 1.0f - 1.f/INTER_TAB_SIZE*ay0;
         taby[1] = 1.f/INTER_TAB_SIZE*ay0;
-        tabx[0] = 1.0 - 1.f/INTER_TAB_SIZE*ax0;
+        tabx[0] = 1.0f - 1.f/INTER_TAB_SIZE*ax0;
         tabx[1] = 1.f/INTER_TAB_SIZE*ax0;
 
         tab[0] = taby[0] * tabx[0];
@@ -636,7 +636,7 @@ __kernel void warpPerspectiveCubic_C4_D5(__global float4 * src, __global float4
         F X0 = M[0]*dx + M[1]*dy + M[2];
         F Y0 = M[3]*dx + M[4]*dy + M[5];
         F W = M[6]*dx + M[7]*dy + M[8];
-        W = (W != 0.0) ? INTER_TAB_SIZE/W : 0.0;
+        W = (W != 0.0f) ? INTER_TAB_SIZE/W : 0.0f;
         int X = rint(X0*W);
         int Y = rint(Y0*W);
 
diff --git a/modules/ocl/src/opencl/kernel_sort_by_key.cl b/modules/ocl/src/opencl/kernel_sort_by_key.cl
index 0ad11b8bcf..0e8d581b74 100644
--- a/modules/ocl/src/opencl/kernel_sort_by_key.cl
+++ b/modules/ocl/src/opencl/kernel_sort_by_key.cl
@@ -192,7 +192,6 @@ __kernel
 {
     const int          i  = get_local_id(0); // index in workgroup
     const int numOfGroups = get_num_groups(0); // index in workgroup
-    const int groupID     = get_group_id(0);
     const int         wg  = get_local_size(0); // workgroup size = block size
     int pos = 0, same = 0;
     const int offset = get_group_id(0) * wg;
diff --git a/modules/ocl/src/opencl/kernel_stablesort_by_key.cl b/modules/ocl/src/opencl/kernel_stablesort_by_key.cl
index 2d2c0a19cd..2d38fbf2f7 100644
--- a/modules/ocl/src/opencl/kernel_stablesort_by_key.cl
+++ b/modules/ocl/src/opencl/kernel_stablesort_by_key.cl
@@ -63,7 +63,7 @@
 
 ///////////// parallel merge sort ///////////////
 // ported from https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/stablesort_by_key_kernels.cl
-uint lowerBoundLinear( global K_T* data, uint left, uint right, K_T searchVal)
+static uint lowerBoundLinear( global K_T* data, uint left, uint right, K_T searchVal)
 {
     //  The values firstIndex and lastIndex get modified within the loop, narrowing down the potential sequence
     uint firstIndex = left;
@@ -94,7 +94,7 @@ uint lowerBoundLinear( global K_T* data, uint left, uint right, K_T searchVal)
 //  by a base pointer and left and right index for a particular candidate value.  The comparison operator is
 //  passed as a functor parameter my_comp
 //  This function returns an index that is the first index whos value would be equal to the searched value
-uint lowerBoundBinary( global K_T* data, uint left, uint right, K_T searchVal)
+static uint lowerBoundBinary( global K_T* data, uint left, uint right, K_T searchVal)
 {
     //  The values firstIndex and lastIndex get modified within the loop, narrowing down the potential sequence
     uint firstIndex = left;
@@ -130,7 +130,7 @@ uint lowerBoundBinary( global K_T* data, uint left, uint right, K_T searchVal)
 //  passed as a functor parameter my_comp
 //  This function returns an index that is the first index whos value would be greater than the searched value
 //  If the search value is not found in the sequence, upperbound returns the same result as lowerbound
-uint upperBoundBinary( global K_T* data, uint left, uint right, K_T searchVal)
+static uint upperBoundBinary( global K_T* data, uint left, uint right, K_T searchVal)
 {
     uint upperBound = lowerBoundBinary( data, left, right, searchVal );
 
@@ -167,9 +167,6 @@ kernel void merge(
 )
 {
     size_t globalID     = get_global_id( 0 );
-    size_t groupID      = get_group_id( 0 );
-    size_t localID      = get_local_id( 0 );
-    size_t wgSize       = get_local_size( 0 );
 
     //  Abort threads that are passed the end of the input vector
     if( globalID >= srcVecSize )
@@ -230,12 +227,12 @@ kernel void blockInsertionSort(
     local V_T*    val_lds
 )
 {
-    size_t gloId    = get_global_id( 0 );
-    size_t groId    = get_group_id( 0 );
-    size_t locId    = get_local_id( 0 );
-    size_t wgSize   = get_local_size( 0 );
+    int gloId    = get_global_id( 0 );
+    int groId    = get_group_id( 0 );
+    int locId    = get_local_id( 0 );
+    int wgSize   = get_local_size( 0 );
 
-    bool in_range = gloId < vecSize;
+    bool in_range = gloId < (int)vecSize;
     K_T key;
     V_T val;
     //  Abort threads that are passed the end of the input vector
@@ -254,7 +251,7 @@ kernel void blockInsertionSort(
     {
         //  The last workgroup may have an irregular size, so we calculate a per-block endIndex
         //  endIndex is essentially emulating a mod operator with subtraction and multiply
-        size_t endIndex = vecSize - ( groId * wgSize );
+        int endIndex = vecSize - ( groId * wgSize );
         endIndex = min( endIndex, wgSize );
 
         // printf( "Debug: endIndex[%i]=%i\n", groId, endIndex );
diff --git a/modules/ocl/src/opencl/knearest.cl b/modules/ocl/src/opencl/knearest.cl
index e670df7e6f..bc0ae89a83 100644
--- a/modules/ocl/src/opencl/knearest.cl
+++ b/modules/ocl/src/opencl/knearest.cl
@@ -129,58 +129,53 @@ __kernel void knn_find_nearest(__global float* sample, int sample_row, int sampl
     }
     /*! find_nearest_neighbor done!*/
     /*! write_results start!*/
-    switch (regression)
+    if (regression)
     {
-    case true:
-        {
-            TYPE s;
+        TYPE s;
 #ifdef DOUBLE_SUPPORT
-            s = 0.0;
+        s = 0.0;
 #else
-            s = 0.0f;
+        s = 0.0f;
 #endif
-            for(j = 0; j < K1; j++)
-                s += nr[j * nThreads + threadY];
+        for(j = 0; j < K1; j++)
+            s += nr[j * nThreads + threadY];
 
-            _results[y * _results_step] = (float)(s * inv_scale);
-        }
-        break;
-    case false:
-        {
-            int prev_start = 0, best_count = 0, cur_count;
-            float best_val;
+        _results[y * _results_step] = (float)(s * inv_scale);
+    }
+    else
+    {
+        int prev_start = 0, best_count = 0, cur_count;
+        float best_val;
 
-            for(j = K1 - 1; j > 0; j--)
+        for(j = K1 - 1; j > 0; j--)
+        {
+            bool swap_f1 = false;
+            for(j1 = 0; j1 < j; j1++)
             {
-                bool swap_f1 = false;
-                for(j1 = 0; j1 < j; j1++)
+                if(nr[j1 * nThreads + threadY] > nr[(j1 + 1) * nThreads + threadY])
                 {
-                    if(nr[j1 * nThreads + threadY] > nr[(j1 + 1) * nThreads + threadY])
-                    {
-                        int t;
-                        CV_SWAP(nr[j1 * nThreads + threadY], nr[(j1 + 1) * nThreads + threadY], t);
-                        swap_f1 = true;
-                    }
+                    int t;
+                    CV_SWAP(nr[j1 * nThreads + threadY], nr[(j1 + 1) * nThreads + threadY], t);
+                    swap_f1 = true;
                 }
-                if(!swap_f1)
-                    break;
             }
+            if(!swap_f1)
+                break;
+        }
 
-            best_val = 0;
-            for(j = 1; j <= K1; j++)
-                if(j == K1 || nr[j * nThreads + threadY] != nr[(j - 1) * nThreads + threadY])
+        best_val = 0;
+        for(j = 1; j <= K1; j++)
+            if(j == K1 || nr[j * nThreads + threadY] != nr[(j - 1) * nThreads + threadY])
+            {
+                cur_count = j - prev_start;
+                if(best_count < cur_count)
                 {
-                    cur_count = j - prev_start;
-                    if(best_count < cur_count)
-                    {
-                        best_count = cur_count;
-                        best_val = nr[(j - 1) * nThreads + threadY];
-                    }
-                    prev_start = j;
+                    best_count = cur_count;
+                    best_val = nr[(j - 1) * nThreads + threadY];
                 }
-                _results[y * _results_step] = best_val;
-        }
-        break;
+                prev_start = j;
+            }
+            _results[y * _results_step] = best_val;
     }
     ///*! write_results done!*/
 }
diff --git a/modules/ocl/src/opencl/match_template.cl b/modules/ocl/src/opencl/match_template.cl
index 6fc4c748cf..8b63c3bd2d 100644
--- a/modules/ocl/src/opencl/match_template.cl
+++ b/modules/ocl/src/opencl/match_template.cl
@@ -43,8 +43,6 @@
 //
 //M*/
 
-#pragma OPENCL EXTENSION cl_amd_printf : enable
-
 #if defined (DOUBLE_SUPPORT)
 
 #ifdef cl_khr_fp64
@@ -70,7 +68,7 @@
 #define SUMS_PTR(ox, oy) mad24(gidy + oy, img_sums_step, gidx + img_sums_offset + ox)
 // normAcc* are accurate normalization routines which make GPU matchTemplate
 // consistent with CPU one
-float normAcc(float num, float denum)
+inline float normAcc(float num, float denum)
 {
     if(fabs(num) < denum)
     {
@@ -83,7 +81,7 @@ float normAcc(float num, float denum)
     return 0;
 }
 
-float normAcc_SQDIFF(float num, float denum)
+inline float normAcc_SQDIFF(float num, float denum)
 {
     if(fabs(num) < denum)
     {
diff --git a/modules/ocl/src/opencl/meanShift.cl b/modules/ocl/src/opencl/meanShift.cl
index 728e2f9695..ea5060e467 100644
--- a/modules/ocl/src/opencl/meanShift.cl
+++ b/modules/ocl/src/opencl/meanShift.cl
@@ -46,7 +46,7 @@
 //
 //M*/
 
-short2 do_mean_shift(int x0, int y0, __global uchar4* out,int out_step,
+static short2 do_mean_shift(int x0, int y0, __global uchar4* out,int out_step,
                __global uchar4* in, int in_step, int dst_off, int src_off,
                int cols, int rows, int sp, int sr, int maxIter, float eps)
 {
@@ -56,7 +56,6 @@ short2 do_mean_shift(int x0, int y0, __global uchar4* out,int out_step,
     src_off = src_off >> 2;
     dst_off = dst_off >> 2;
     int idx = src_off + y0 * in_step + x0;
-//    uchar4 c = vload4(0, (__global uchar*)in+idx);
     uchar4 c = in[idx];
     int base = dst_off + get_global_id(1)*out_step + get_global_id(0) ;
 
diff --git a/modules/ocl/src/opencl/moments.cl b/modules/ocl/src/opencl/moments.cl
index 602ebd1c1d..31c4c85ec7 100644
--- a/modules/ocl/src/opencl/moments.cl
+++ b/modules/ocl/src/opencl/moments.cl
@@ -162,7 +162,6 @@ __kernel void CvMoments(__global TT* src_data, int src_rows, int src_cols, int s
     WT4 x3 = (WT4)(0.f);
 
     __global TT* row = src_data + gidy * src_step + ly * src_step + gidx * 256;
-    bool switchFlag = false;
 
     WT4 p;
     WT4 x;
@@ -173,7 +172,7 @@ __kernel void CvMoments(__global TT* src_data, int src_rows, int src_cols, int s
 
     if(dy < src_rows)
     {
-        if((x_rest > 0) && (gidx == (get_num_groups(0) - 1)))
+        if((x_rest > 0) && (gidx == ((int)get_num_groups(0) - 1)))
         {
             int i;
             for(i = 0; i < x_rest - 4; i += 4)
@@ -190,11 +189,8 @@ __kernel void CvMoments(__global TT* src_data, int src_rows, int src_cols, int s
             }
 
             x0.s0 = x0.s0 + x0.s1 + x0.s2 + x0.s3;
-
             x1.s0 = x1.s0 + x1.s1 + x1.s2 + x1.s3;
-
             x2.s0 = x2.s0 + x2.s1 + x2.s2 + x2.s3;
-
             x3.s0 = x3.s0 + x3.s1 + x3.s2 + x3.s3;
 
             WT x0_ = 0;
@@ -238,11 +234,8 @@ __kernel void CvMoments(__global TT* src_data, int src_rows, int src_cols, int s
             }
 
             x0.s0 = x0.s0 + x0.s1 + x0.s2 + x0.s3;
-
             x1.s0 = x1.s0 + x1.s1 + x1.s2 + x1.s3;
-
             x2.s0 = x2.s0 + x2.s1 + x2.s2 + x2.s3;
-
             x3.s0 = x3.s0 + x3.s1 + x3.s2 + x3.s3;
         }
 
@@ -251,7 +244,7 @@ __kernel void CvMoments(__global TT* src_data, int src_rows, int src_cols, int s
     }
     __local WT mom[10][256];
 
-    if((y_rest > 0) && (gidy == (get_num_groups(1) - 1)))
+    if((y_rest > 0) && (gidy == ((int)get_num_groups(1) - 1)))
     {
         if(ly < y_rest)
         {
@@ -268,13 +261,10 @@ __kernel void CvMoments(__global TT* src_data, int src_rows, int src_cols, int s
         }
         barrier(CLK_LOCAL_MEM_FENCE);
         if(ly < 10)
-        {
             for(int i = 1; i < y_rest; i++)
-            {
                 mom[ly][0] = mom[ly][i] + mom[ly][0];
-            }
-        }
-    }else
+    }
+    else
     {
         mom[9][ly] = py * sy;
         mom[8][ly] = x1.s0 * sy;
@@ -413,11 +403,9 @@ __kernel void CvMoments(__global TT* src_data, int src_rows, int src_cols, int s
 
     if(binary)
     {
-        WT s = 1./255;
+        WT s = 1.0f/255;
         if(ly < 10)
-        {
             mom[ly][0] *= s;
-        }
         barrier(CLK_LOCAL_MEM_FENCE);
     }
     WT xm = (gidx * 256) * mom[0][0];
@@ -440,7 +428,5 @@ __kernel void CvMoments(__global TT* src_data, int src_rows, int src_cols, int s
     barrier(CLK_LOCAL_MEM_FENCE);
 
     if(ly < 10)
-    {
         dst_m[10 * gidy * dst_step + ly * dst_step + gidx] = mom[ly][1];
-    }
 }
diff --git a/modules/ocl/src/opencl/objdetect_hog.cl b/modules/ocl/src/opencl/objdetect_hog.cl
index 685eccf688..0d2f26f966 100644
--- a/modules/ocl/src/opencl/objdetect_hog.cl
+++ b/modules/ocl/src/opencl/objdetect_hog.cl
@@ -200,7 +200,7 @@ __kernel void normalize_hists_36_kernel(__global float* block_hists,
 //-------------------------------------------------------------
 //  Normalization of histograms via L2Hys_norm
 //
-float reduce_smem(volatile __local float* smem, int size)
+static float reduce_smem(volatile __local float* smem, int size)
 {
     unsigned int tid = get_local_id(0);
     float sum = smem[tid];
@@ -564,7 +564,6 @@ __kernel void compute_gradients_8UC4_kernel(
     const int x = get_global_id(0);
     const int tid = get_local_id(0);
     const int gSizeX = get_local_size(0);
-    const int gidX = get_group_id(0);
     const int gidY = get_group_id(1);
 
     __global const uchar4* row = img + gidY * img_step;
@@ -667,7 +666,6 @@ __kernel void compute_gradients_8UC1_kernel(
     const int x = get_global_id(0);
     const int tid = get_local_id(0);
     const int gSizeX = get_local_size(0);
-    const int gidX = get_group_id(0);
     const int gidY = get_group_id(1);
 
     __global const uchar* row = img + gidY * img_step;
diff --git a/modules/ocl/src/opencl/optical_flow_farneback.cl b/modules/ocl/src/opencl/optical_flow_farneback.cl
index 917f7f215d..4725662c60 100644
--- a/modules/ocl/src/opencl/optical_flow_farneback.cl
+++ b/modules/ocl/src/opencl/optical_flow_farneback.cl
@@ -44,10 +44,10 @@
 //M*/
 
 
-#define tx  get_local_id(0)
+#define tx  (int)get_local_id(0)
 #define ty  get_local_id(1)
 #define bx  get_group_id(0)
-#define bdx get_local_size(0)
+#define bdx (int)get_local_size(0)
 
 #define BORDER_SIZE 5
 #define MAX_KSIZE_HALF 100
diff --git a/modules/ocl/src/opencl/pyr_down.cl b/modules/ocl/src/opencl/pyr_down.cl
index e09846457c..6f10067e9f 100644
--- a/modules/ocl/src/opencl/pyr_down.cl
+++ b/modules/ocl/src/opencl/pyr_down.cl
@@ -43,32 +43,32 @@
 //
 //M*/
 
-int idx_row_low(int y, int last_row)
+inline int idx_row_low(int y, int last_row)
 {
     return abs(y) % (last_row + 1);
 }
 
-int idx_row_high(int y, int last_row)
+inline int idx_row_high(int y, int last_row)
 {
     return abs(last_row - (int)abs(last_row - y)) % (last_row + 1);
 }
 
-int idx_row(int y, int last_row)
+inline int idx_row(int y, int last_row)
 {
     return idx_row_low(idx_row_high(y, last_row), last_row);
 }
 
-int idx_col_low(int x, int last_col)
+inline int idx_col_low(int x, int last_col)
 {
     return abs(x) % (last_col + 1);
 }
 
-int idx_col_high(int x, int last_col)
+inline int idx_col_high(int x, int last_col)
 {
     return abs(last_col - (int)abs(last_col - x)) % (last_col + 1);
 }
 
-int idx_col(int x, int last_col)
+inline int idx_col(int x, int last_col)
 {
     return idx_col_low(idx_col_high(x, last_col), last_col);
 }
diff --git a/modules/ocl/src/opencl/pyrlk.cl b/modules/ocl/src/opencl/pyrlk.cl
index 85f4d39343..a7fc27838b 100644
--- a/modules/ocl/src/opencl/pyrlk.cl
+++ b/modules/ocl/src/opencl/pyrlk.cl
@@ -53,7 +53,8 @@
 #define WAVE_SIZE 1
 #endif
 #ifdef CPU
-void reduce3(float val1, float val2, float val3,  __local float* smem1,  __local float* smem2,  __local float* smem3, int tid)
+
+static void reduce3(float val1, float val2, float val3,  __local float* smem1,  __local float* smem2,  __local float* smem3, int tid)
 {
     smem1[tid] = val1;
     smem2[tid] = val2;
@@ -72,7 +73,7 @@ void reduce3(float val1, float val2, float val3,  __local float* smem1,  __local
     }
 }
 
-void reduce2(float val1, float val2, volatile __local float* smem1, volatile __local float* smem2, int tid)
+static void reduce2(float val1, float val2, volatile __local float* smem1, volatile __local float* smem2, int tid)
 {
     smem1[tid] = val1;
     smem2[tid] = val2;
@@ -89,7 +90,7 @@ void reduce2(float val1, float val2, volatile __local float* smem1, volatile __l
     }
 }
 
-void reduce1(float val1, volatile __local float* smem1, int tid)
+static void reduce1(float val1, volatile __local float* smem1, int tid)
 {
     smem1[tid] = val1;
     barrier(CLK_LOCAL_MEM_FENCE);
@@ -104,7 +105,7 @@ void reduce1(float val1, volatile __local float* smem1, int tid)
     }
 }
 #else
-void reduce3(float val1, float val2, float val3,
+static void reduce3(float val1, float val2, float val3,
              __local volatile float* smem1, __local volatile float* smem2, __local volatile float* smem3, int tid)
 {
     smem1[tid] = val1;
@@ -151,7 +152,7 @@ void reduce3(float val1, float val2, float val3,
     barrier(CLK_LOCAL_MEM_FENCE);
 }
 
-void reduce2(float val1, float val2, __local volatile float* smem1, __local volatile float* smem2, int tid)
+static void reduce2(float val1, float val2, __local volatile float* smem1, __local volatile float* smem2, int tid)
 {
     smem1[tid] = val1;
     smem2[tid] = val2;
@@ -190,7 +191,7 @@ void reduce2(float val1, float val2, __local volatile float* smem1, __local vola
     barrier(CLK_LOCAL_MEM_FENCE);
 }
 
-void reduce1(float val1, __local volatile float* smem1, int tid)
+static void reduce1(float val1, __local volatile float* smem1, int tid)
 {
     smem1[tid] = val1;
     barrier(CLK_LOCAL_MEM_FENCE);
@@ -226,7 +227,7 @@ void reduce1(float val1, __local volatile float* smem1, int tid)
 // Image read mode
 __constant sampler_t sampler    = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;
 
-void SetPatch(image2d_t I, float x, float y,
+static void SetPatch(image2d_t I, float x, float y,
               float* Pch, float* Dx, float* Dy,
               float* A11, float* A12, float* A22)
 {
@@ -247,7 +248,7 @@ void SetPatch(image2d_t I, float x, float y,
     *A22 += dIdy * dIdy;
 }
 
-void GetPatch(image2d_t J, float x, float y,
+inline void GetPatch(image2d_t J, float x, float y,
               float* Pch, float* Dx, float* Dy,
               float* b1, float* b2)
 {
@@ -257,13 +258,13 @@ void GetPatch(image2d_t J, float x, float y,
     *b2 += diff**Dy;
 }
 
-void GetError(image2d_t J, const float x, const float y, const float* Pch, float* errval)
+inline void GetError(image2d_t J, const float x, const float y, const float* Pch, float* errval)
 {
     float diff = read_imagef(J, sampler, (float2)(x,y)).x-*Pch;
     *errval += fabs(diff);
 }
 
-void SetPatch4(image2d_t I, const float x, const float y,
+static void SetPatch4(image2d_t I, const float x, const float y,
                float4* Pch, float4* Dx, float4* Dy,
                float* A11, float* A12, float* A22)
 {
@@ -286,7 +287,7 @@ void SetPatch4(image2d_t I, const float x, const float y,
     *A22 += sqIdx.x + sqIdx.y + sqIdx.z;
 }
 
-void GetPatch4(image2d_t J, const float x, const float y,
+static void GetPatch4(image2d_t J, const float x, const float y,
                const float4* Pch, const float4* Dx, const float4* Dy,
                float* b1, float* b2)
 {
@@ -298,7 +299,7 @@ void GetPatch4(image2d_t J, const float x, const float y,
     *b2 += xdiff.x + xdiff.y + xdiff.z;
 }
 
-void GetError4(image2d_t J, const float x, const float y, const float4* Pch, float* errval)
+static void GetError4(image2d_t J, const float x, const float y, const float4* Pch, float* errval)
 {
     float4 diff = read_imagef(J, sampler, (float2)(x,y))-*Pch;
     *errval += fabs(diff.x) + fabs(diff.y) + fabs(diff.z);
@@ -318,7 +319,7 @@ __kernel void lkSparse_C1_D5(image2d_t I, image2d_t J,
     unsigned int gid=get_group_id(0);
     unsigned int xsize=get_local_size(0);
     unsigned int ysize=get_local_size(1);
-    int xBase, yBase, i, j, k;
+    int xBase, yBase, k;
 
     float2 c_halfWin = (float2)((c_winSize_x - 1)>>1, (c_winSize_y - 1)>>1);
 
@@ -597,7 +598,7 @@ __kernel void lkSparse_C4_D5(image2d_t I, image2d_t J,
     unsigned int gid=get_group_id(0);
     unsigned int xsize=get_local_size(0);
     unsigned int ysize=get_local_size(1);
-    int xBase, yBase, i, j, k;
+    int xBase, yBase, k;
 
     float2 c_halfWin = (float2)((c_winSize_x - 1)>>1, (c_winSize_y - 1)>>1);
 
diff --git a/modules/ocl/src/opencl/split_mat.cl b/modules/ocl/src/opencl/split_mat.cl
index 7e1b15c994..b9aa048b07 100644
--- a/modules/ocl/src/opencl/split_mat.cl
+++ b/modules/ocl/src/opencl/split_mat.cl
@@ -183,7 +183,7 @@ __kernel void split_vector(
         int dst ## xOffsetLimitBytes = dst ## Offset.x + size.x * sizeof(TYPE); \
         int dst ## xOffsetBytes = dst ## Offset.x + x * sizeof(TYPE); \
         int dst ## yOffsetBytes = (dst ## Offset.y + y) * dst ## StepBytes; \
-        if (!BYPASS_VSTORE && dst ## xOffsetBytes + sizeof(DST_VEC_TYPE) <= dst ## xOffsetLimitBytes) \
+        if (!BYPASS_VSTORE && dst ## xOffsetBytes + (int)sizeof(DST_VEC_TYPE) <= dst ## xOffsetLimitBytes) \
         { \
             VSTORE_ ## dst(((__global char*)dst + dst ## yOffsetBytes + dst ## xOffsetBytes), vecValue); \
         } \
@@ -192,7 +192,7 @@ __kernel void split_vector(
             VEC_TO_ARRAY(vecValue, vecValue##Array); \
             for (int i = 0; i < VEC_SIZE; i++, dst ## xOffsetBytes += sizeof(TYPE)) \
             { \
-                if (dst ## xOffsetBytes + sizeof(TYPE) <= dst ## xOffsetLimitBytes) \
+                if (dst ## xOffsetBytes + (int)sizeof(TYPE) <= dst ## xOffsetLimitBytes) \
                     *(__global TYPE*)((__global char*)dst + dst ## yOffsetBytes + dst ## xOffsetBytes) = vecValue##Array[i]; \
                 else \
                     break; \
diff --git a/modules/ocl/src/opencl/stereobm.cl b/modules/ocl/src/opencl/stereobm.cl
index 773aee618f..207bf0047f 100644
--- a/modules/ocl/src/opencl/stereobm.cl
+++ b/modules/ocl/src/opencl/stereobm.cl
@@ -56,7 +56,7 @@
 #define radius 64
 #endif
 
-unsigned int CalcSSD(__local unsigned int *col_ssd)
+static unsigned int CalcSSD(__local unsigned int *col_ssd)
 {
     unsigned int cache = col_ssd[0];
 
@@ -67,7 +67,7 @@ unsigned int CalcSSD(__local unsigned int *col_ssd)
     return cache;
 }
 
-uint2 MinSSD(__local unsigned int *col_ssd)
+static uint2 MinSSD(__local unsigned int *col_ssd)
 {
     unsigned int ssd[N_DISPARITIES];
     const int win_size = (radius << 1);
@@ -95,7 +95,7 @@ uint2 MinSSD(__local unsigned int *col_ssd)
     return (uint2)(mssd, bestIdx);
 }
 
-void StepDown(int idx1, int idx2, __global unsigned char* imageL,
+static void StepDown(int idx1, int idx2, __global unsigned char* imageL,
               __global unsigned char* imageR, int d,   __local unsigned int *col_ssd)
 {
     uint8 imgR1 = convert_uint8(vload8(0, imageR + (idx1 - d - 7)));
@@ -114,7 +114,7 @@ void StepDown(int idx1, int idx2, __global unsigned char* imageL,
     col_ssd[7 * (BLOCK_W + win_size)] += res.s0;
 }
 
-void InitColSSD(int x_tex, int y_tex, int im_pitch, __global unsigned char* imageL,
+static void InitColSSD(int x_tex, int y_tex, int im_pitch, __global unsigned char* imageL,
                 __global unsigned char* imageR, int d,
                  __local unsigned int *col_ssd)
 {
@@ -153,7 +153,7 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char
 
     int X = get_group_id(0) * BLOCK_W + get_local_id(0) + maxdisp + radius;
 
-#define Y (get_group_id(1) * ROWSperTHREAD + radius)
+#define Y (int)(get_group_id(1) * ROWSperTHREAD + radius)
 
     __global unsigned int* minSSDImage = cminSSDImage + X + Y * cminSSD_step;
     __global unsigned char* disparImage = disp + X + Y * disp_step;
@@ -241,7 +241,7 @@ __kernel void prefilter_xsobel(__global unsigned char *input, __global unsigned
 /////////////////////////////////// Textureness filtering ////////////////////////////////////////
 //////////////////////////////////////////////////////////////////////////////////////////////////
 
-float sobel(__global unsigned char *input, int x, int y, int rows, int cols)
+static float sobel(__global unsigned char *input, int x, int y, int rows, int cols)
 {
     float conv = 0;
     int y1 = y==0? 0 : y-1;
@@ -256,7 +256,7 @@ float sobel(__global unsigned char *input, int x, int y, int rows, int cols)
     return fabs(conv);
 }
 
-float CalcSums(__local float *cols, __local float *cols_cache, int winsz)
+static float CalcSums(__local float *cols, __local float *cols_cache, int winsz)
 {
     unsigned int cache = cols[0];
 
diff --git a/modules/ocl/src/opencl/stereobp.cl b/modules/ocl/src/opencl/stereobp.cl
index 4818399c57..ec02f827a9 100644
--- a/modules/ocl/src/opencl/stereobp.cl
+++ b/modules/ocl/src/opencl/stereobp.cl
@@ -65,7 +65,7 @@
 ///////////////////////////////////////////////////////////////
 /////////////////common///////////////////////////////////////
 /////////////////////////////////////////////////////////////
-T saturate_cast(float v){
+inline T saturate_cast(float v){
 #ifdef T_SHORT
     return convert_short_sat_rte(v);
 #else
@@ -73,7 +73,7 @@ T saturate_cast(float v){
 #endif
 }
 
-T4 saturate_cast4(float4 v){
+inline T4 saturate_cast4(float4 v){
 #ifdef T_SHORT
     return convert_short4_sat_rte(v);
 #else
@@ -99,7 +99,7 @@ inline float pix_diff_1(const uchar4 l, __global const uchar *rs)
     return abs((int)(l.x) - *rs);
 }
 
-float pix_diff_4(const uchar4 l, __global const uchar *rs)
+static float pix_diff_4(const uchar4 l, __global const uchar *rs)
 {
     uchar4 r;
     r = *((__global uchar4 *)rs);
@@ -235,7 +235,7 @@ __kernel void level_up_message(__global T *src, int src_rows, int src_step,
 ///////////////////////////////////////////////////////////////
 ////////////////////  calc all iterations /////////////////////
 ///////////////////////////////////////////////////////////////
-void message(__global T *us_, __global T *ds_, __global T *ls_, __global T *rs_,
+static void message(__global T *us_, __global T *ds_, __global T *ls_, __global T *rs_,
               const __global T *dt,
               int u_step, int msg_disp_step, int data_disp_step,
               float4 cmax_disc_term, float4 cdisc_single_jump)
diff --git a/modules/ocl/src/opencl/stereocsbp.cl b/modules/ocl/src/opencl/stereocsbp.cl
index 50aabaca68..13a201cc1c 100644
--- a/modules/ocl/src/opencl/stereocsbp.cl
+++ b/modules/ocl/src/opencl/stereocsbp.cl
@@ -248,7 +248,7 @@ __kernel void get_first_k_initial_local_1(__global float *data_cost_selected_, _
 ///////////////////////////////////////////////////////////////
 /////////////////////// init data cost ////////////////////////
 ///////////////////////////////////////////////////////////////
-float compute_3(__global uchar* left, __global uchar* right,
+inline float compute_3(__global uchar* left, __global uchar* right,
     float cdata_weight,  float cmax_data_term)
 {
     float tb = 0.114f * abs((int)left[0] - right[0]);
@@ -257,17 +257,21 @@ float compute_3(__global uchar* left, __global uchar* right,
 
     return fmin(cdata_weight * (tr + tg + tb), cdata_weight * cmax_data_term);
 }
-float compute_1(__global uchar* left, __global uchar* right,
+inline float compute_1(__global uchar* left, __global uchar* right,
     float cdata_weight,  float cmax_data_term)
 {
     return fmin(cdata_weight * abs((int)*left - (int)*right), cdata_weight * cmax_data_term);
 }
-short round_short(float v){
+
+inline short round_short(float v)
+{
     return convert_short_sat_rte(v);
 }
+
 ///////////////////////////////////////////////////////////////////////////////////////////////
 ///////////////////////////////////init_data_cost///////////////////////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////////////////////
+
 __kernel void init_data_cost_0(__global short *ctemp, __global uchar *cleft, __global uchar *cright,
     int h, int w, int level, int channels,
     int cmsg_step1, float cdata_weight, float cmax_data_term, int cdisp_step1,
@@ -993,7 +997,8 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
 ///////////////////////////////////////////////////////////////
 //////////////////////// init message /////////////////////////
 ///////////////////////////////////////////////////////////////
-void get_first_k_element_increase_0(__global short* u_new, __global short *d_new, __global short *l_new,
+
+static void get_first_k_element_increase_0(__global short* u_new, __global short *d_new, __global short *l_new,
     __global short *r_new, __global const short *u_cur, __global const short *d_cur,
     __global const short *l_cur, __global const short *r_cur,
     __global short *data_cost_selected, __global short *disparity_selected_new,
@@ -1027,7 +1032,8 @@ void get_first_k_element_increase_0(__global short* u_new, __global short *d_new
         data_cost_new[id * cdisp_step1] = SHRT_MAX;
     }
 }
-void get_first_k_element_increase_1(__global float *u_new, __global float *d_new, __global float *l_new,
+
+static void get_first_k_element_increase_1(__global float *u_new, __global float *d_new, __global float *l_new,
     __global float *r_new, __global const float *u_cur, __global const float *d_cur,
     __global const float *l_cur, __global const float *r_cur,
     __global float *data_cost_selected, __global float *disparity_selected_new,
@@ -1190,7 +1196,8 @@ __kernel void init_message_1(__global float *u_new_, __global float *d_new_, __g
 ///////////////////////////////////////////////////////////////
 ////////////////////  calc all iterations /////////////////////
 ///////////////////////////////////////////////////////////////
-void message_per_pixel_0(__global const short *data, __global short *msg_dst, __global const short *msg1,
+
+static void message_per_pixel_0(__global const short *data, __global short *msg_dst, __global const short *msg1,
     __global const short *msg2, __global const short *msg3,
     __global const short *dst_disp, __global const short *src_disp,
     int nr_plane, __global short *temp,
@@ -1226,7 +1233,8 @@ void message_per_pixel_0(__global const short *data, __global short *msg_dst, __
     for(int d = 0; d < nr_plane; d++)
         msg_dst[d * cdisp_step1] = convert_short_sat_rte(temp[d * cdisp_step1] - sum);
 }
-void message_per_pixel_1(__global const float *data, __global float *msg_dst, __global const float *msg1,
+
+static void message_per_pixel_1(__global const float *data, __global float *msg_dst, __global const float *msg1,
     __global const float *msg2, __global const float *msg3,
     __global const float *dst_disp, __global const float *src_disp,
     int nr_plane, __global float *temp,
@@ -1262,6 +1270,7 @@ void message_per_pixel_1(__global const float *data, __global float *msg_dst, __
     for(int d = 0; d < nr_plane; d++)
         msg_dst[d * cdisp_step1] = temp[d * cdisp_step1] - sum;
 }
+
 __kernel void compute_message_0(__global short *u_, __global short *d_, __global short *l_, __global short *r_,
     __global const short *data_cost_selected, __global const short *selected_disp_pyr_cur,
     __global short *ctemp, int h, int w, int nr_plane, int i,
@@ -1293,6 +1302,7 @@ __kernel void compute_message_0(__global short *u_, __global short *d_, __global
             cmax_disc_term, cdisp_step1, cdisc_single_jump);
     }
 }
+
 __kernel void compute_message_1(__global float *u_, __global float *d_, __global float *l_, __global float *r_,
     __global const float *data_cost_selected, __global const float *selected_disp_pyr_cur,
     __global float *ctemp, int h, int w, int nr_plane, int i,
@@ -1327,6 +1337,7 @@ __kernel void compute_message_1(__global float *u_, __global float *d_, __global
 ///////////////////////////////////////////////////////////////
 /////////////////////////// output ////////////////////////////
 ///////////////////////////////////////////////////////////////
+
 __kernel void compute_disp_0(__global const short *u_, __global const short *d_, __global const short *l_,
     __global const short *r_, __global const short * data_cost_selected,
     __global const short *disp_selected_pyr,
@@ -1364,6 +1375,7 @@ __kernel void compute_disp_0(__global const short *u_, __global const short *d_,
         disp[res_step * y + x] = best;
     }
 }
+
 __kernel void compute_disp_1(__global const float *u_, __global const float *d_, __global const float *l_,
     __global const float *r_, __global const float *data_cost_selected,
     __global const float *disp_selected_pyr,