From b597c87bedc303377494ae11f3bceadbd0e2052e Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.alekhin@intel.com>
Date: Tue, 24 Jul 2018 15:49:59 +0300
Subject: [PATCH] dnn(ocl): avoid memory access violation

---
 modules/dnn/src/opencl/conv_layer_spatial.cl | 35 ++++++++++----------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/modules/dnn/src/opencl/conv_layer_spatial.cl b/modules/dnn/src/opencl/conv_layer_spatial.cl
index adeb38574e..c60b8fcdbb 100644
--- a/modules/dnn/src/opencl/conv_layer_spatial.cl
+++ b/modules/dnn/src/opencl/conv_layer_spatial.cl
@@ -248,39 +248,38 @@ convolve_simd(
 
   int curr_y = or * STRIDE_Y;
   int curr_x = oc * STRIDE_X + lid;
-#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0
-  int saved_y = curr_y;
-#endif
+
   int in_addr = input_batch_offset
                 +  (curr_y - INPUT_PAD_H) * INPUT_WIDTH          // y tile offset
                 +   curr_x - INPUT_PAD_W;                        // x tile offset
 
+  const int in_limit = (get_global_size(2) / ALIGNED_NUM_FILTERS) * TOTAL_INPUT_DEPTH_SIZE * INPUT_PITCH - 1;
+
   Dtype in_buf[INVEC_SIZE];
 
   for(int kd = 0; kd < INPUT_DEPTH; kd++)
   {
+#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0
+    const bool cx_out_of_range = !(curr_x >= INPUT_PAD_W && curr_x < INPUT_WIDTH + INPUT_PAD_W);
     int in_offset = in_addr;
     __attribute__((opencl_unroll_hint(INVEC_SIZE)))
-    for (int reg = 0; reg < INVEC_SIZE; reg++)
+    for (int reg = 0; reg < INVEC_SIZE; reg++, in_offset += INPUT_WIDTH)
     {
-        in_buf[reg] = inputs[in_offset];
-#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0
-        if (!(curr_y >= INPUT_PAD_H && curr_y < INPUT_HEIGHT + INPUT_PAD_H &&
-              curr_x >= INPUT_PAD_W && curr_x < INPUT_WIDTH + INPUT_PAD_W))
-        {
-          in_buf[reg] = 0;
-        }
-#endif
-        curr_y += 1;
-        in_offset += INPUT_WIDTH;
+      Dtype input = inputs[clamp(in_offset, 0, in_limit)];
+      int cy = curr_y + reg;
+      in_buf[reg] = (cx_out_of_range || cy < INPUT_PAD_H || cy >= INPUT_HEIGHT + INPUT_PAD_H) ? 0 : input;
     }
+#else
+    int in_offset = in_addr;
+    __attribute__((opencl_unroll_hint(INVEC_SIZE)))
+    for (int reg = 0; reg < INVEC_SIZE; reg++, in_offset += INPUT_WIDTH)
+    {
+      in_buf[reg] = inputs[min(in_offset, in_limit)];
+    }
+#endif
 
     in_addr += INPUT_PITCH;
 
-#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0
-    curr_y = saved_y;
-#endif
-
     Dtype weight_buf[WEIGHT_PREF];
     int w_idx=0;