diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp
index af5e3d4c4..4bd47a61e 100644
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@@ -1,4 +1,5 @@
 #include "../precomp.hpp"
+#include <opencv2/core/ocl.hpp>
 #include "layers_common.hpp"
 #include "convolution_layer.hpp"
 #include "im2col.hpp"
@@ -107,6 +108,14 @@ namespace dnn
             return;
         }
 
+        if (ocl::useOpenCL() && inpBlob.type() == CV_32F)
+        {
+            UMat src = inpBlob.getMatRef().getUMat(ACCESS_READ);
+            UMat dst(colMat.size(), colMat.type());
+            im2col_ocl(src, inpGroupCn, inpH, inpW, kerH, kerW, padH, padW, strideH, strideW, dst);
+            dst.copyTo(colMat);
+        }
+
         if (inpBlob.type() == CV_32F)
             im2col_cpu((float *)srcPtr, inpGroupCn, inpH, inpW, kerH, kerW, padH, padW, strideH, strideW, (float *)colMat.ptr());
         if (inpBlob.type() == CV_64F)
diff --git a/modules/dnn/src/layers/im2col.cpp b/modules/dnn/src/layers/im2col.cpp
new file mode 100644
index 000000000..519c9c66f
--- /dev/null
+++ b/modules/dnn/src/layers/im2col.cpp
@@ -0,0 +1,41 @@
+#include "../precomp.hpp"
+#include <opencv2/core/ocl.hpp>
+#include "im2col.hpp"
+#include "opencl_kernels_dnn.hpp"
+
+namespace cv
+{
+namespace dnn
+{
+
+void im2col_ocl(UMat &img,
+                int channels, int height, int width, 
+                int kernel_h, int kernel_w,
+                int pad_h, int pad_w,
+                int stride_h, int stride_w, 
+                UMat &col)
+{
+    int h_out = (height + 2 * pad_h - kernel_h) / stride_h + 1;
+    int w_out = (width + 2 * pad_w - kernel_w) / stride_w + 1;
+
+    CV_Assert(img.isContinuous() && col.isContinuous());
+    CV_Assert(img.total() == (size_t)channels * height * width);
+    CV_Assert(col.total() == (size_t)h_out * w_out * kernel_h * kernel_w);
+
+    ocl::Kernel im2col_ker("im2col", ocl::dnn::im2col_oclsrc);
+
+    im2col_ker.args(ocl::KernelArg::PtrReadOnly(img), 
+             channels, height, width,
+             kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
+             h_out, w_out,
+             ocl::KernelArg::PtrWriteOnly(col)
+        );
+
+    size_t globalSize[] = { (size_t)channels * h_out * w_out };
+    size_t localSize[] = { ocl::Device::getDefault().maxWorkGroupSize() };
+
+    CV_Assert(im2col_ker.run(1, globalSize, localSize, false));
+}
+
+}
+}
\ No newline at end of file
diff --git a/modules/dnn/src/layers/im2col.hpp b/modules/dnn/src/layers/im2col.hpp
index b356a0550..bdc253751 100644
--- a/modules/dnn/src/layers/im2col.hpp
+++ b/modules/dnn/src/layers/im2col.hpp
@@ -7,11 +7,12 @@ namespace dnn
 {
 
 template <typename Dtype>
-void im2col_cpu(const Dtype* data_im, const int channels,
-    const int height, const int width, const int kernel_h, const int kernel_w,
-    const int pad_h, const int pad_w,
-    const int stride_h, const int stride_w,
-    Dtype* data_col)
+void im2col_cpu(const Dtype* data_im, 
+                int channels, int height, int width, 
+                int kernel_h, int kernel_w,
+                int pad_h, int pad_w,
+                int stride_h, int stride_w,
+                Dtype* data_col)
 {
     int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
     int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
@@ -35,11 +36,12 @@ void im2col_cpu(const Dtype* data_im, const int channels,
 }
 
 template <typename Dtype>
-void col2im_cpu(const Dtype* data_col, const int channels,
-    const int height, const int width, const int patch_h, const int patch_w,
-    const int pad_h, const int pad_w,
-    const int stride_h, const int stride_w,
-    Dtype* data_im)
+void col2im_cpu(const Dtype* data_col, 
+                int channels, int height, int width,
+                int patch_h, int patch_w,
+                int pad_h, int pad_w,
+                int stride_h, int stride_w,
+                Dtype* data_im)
 {
     memset(data_im, 0, height * width * channels * sizeof(Dtype));
 
@@ -68,6 +70,13 @@ void col2im_cpu(const Dtype* data_col, const int channels,
     }
 }
 
+void im2col_ocl(UMat &img,
+                int channels, int height, int width,
+                int kernel_h, int kernel_w,
+                int pad_h, int pad_w,
+                int stride_h, int stride_w,
+                UMat &col);
+
 }
 }
 
diff --git a/modules/dnn/src/opencl/im2col.cl b/modules/dnn/src/opencl/im2col.cl
new file mode 100644
index 000000000..301826c40
--- /dev/null
+++ b/modules/dnn/src/opencl/im2col.cl
@@ -0,0 +1,30 @@
+__kernel void im2col(__global const float *im_src,
+                     int channels, int height_inp, int width_inp, 
+                     int kernel_h, int kernel_w, int pad_h, int pad_w, int stride_h, int stride_w,
+                     int height_out, int width_out,
+                     __global float *im_col
+                    )
+{
+    int index = get_global_id(0);
+    int j_out = index % width_out;
+    int i_out = (index / width_out) % height_out;
+    int c_inp = (index / width_out) / height_out;
+    
+    int c_out = c_inp * kernel_h * kernel_w;
+    int i_inp = i_out * stride_h - pad_h;
+    int j_inp = j_out * stride_w - pad_w;
+
+    im_col += (c_out * height_out + i_out) * width_out + j_out;
+    im_src += (c_inp * height_inp + i_inp) * width_inp + j_inp;
+
+    for (int ki = 0; ki < kernel_h; ++ki)
+        for (int kj = 0; kj < kernel_w; ++kj) {
+            int i = i_inp + ki;
+            int j = j_inp + kj;
+            *im_col = (h >= 0 && w >= 0 && h < height_inp && w < width_inp) ?
+                im_src[i * width_inp + j] : 0;
+            im_col += height_out * width_out;
+      }
+}
+
+}
\ No newline at end of file
diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp
index 9f51ebdca..d021468e5 100644
--- a/modules/dnn/test/test_caffe_importer.cpp
+++ b/modules/dnn/test/test_caffe_importer.cpp
@@ -12,7 +12,7 @@ static std::string _tf(TString filename)
     return (getOpenCVExtraDir() + "/dnn/") + filename;
 }
 
-TEST(ReadCaffe_GTSRB, Accuracy)
+TEST(Test_Caffe, read_gtsrb)
 {
     Net net;
     {
@@ -22,7 +22,7 @@ TEST(ReadCaffe_GTSRB, Accuracy)
     }
 }
 
-TEST(ReadCaffe_GoogLeNet, Accuracy)
+TEST(Test_Caffe, read_googlenet)
 {
     Net net;
     {
diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp
index a272e2bd2..e0de390ab 100644
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@@ -1,4 +1,5 @@
 #include "test_precomp.hpp"
+#include <opencv2/core/ocl.hpp>
 #include <iostream>
 #include "npy_blob.hpp"
 
@@ -56,6 +57,17 @@ TEST(Layer_Test_Convolution, Accuracy)
      testLayer("layer_convolution", true);
 }
 
+//TODO: move this test into separate file
+TEST(Layer_Test_Convolution, AccuracyOCL)
+{
+    if (cv::ocl::haveOpenCL())
+    {
+        cv::ocl::setUseOpenCL(true);
+        testLayer("layer_convolution", true);
+        cv::ocl::setUseOpenCL(false);
+    }
+}
+
 TEST(Layer_Test_InnerProduct, Accuracy)
 {
      testLayer("layer_inner_product", true);
diff --git a/modules/dnn/test/test_torch_importer.cpp b/modules/dnn/test/test_torch_importer.cpp
index 514c46d5b..0b17a1c56 100644
--- a/modules/dnn/test/test_torch_importer.cpp
+++ b/modules/dnn/test/test_torch_importer.cpp
@@ -58,8 +58,7 @@ TEST(Torch_Importer, run_pool_max)
 
 TEST(Torch_Importer, run_pool_ave)
 {
-    //TODO: fix
-    //runTorchNet("net_pool_ave", "l1_Pooling", false);
+    runTorchNet("net_pool_ave", "l1_Pooling", false);
 }
 
 TEST(Torch_Importer, run_reshape)