Added OpenCL im2col

10 years ago · 23d3ede6ab
parent 06f949a590
commit 23d3ede6ab
7 changed files with 114 additions and 14 deletions
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@ -1,4 +1,5 @@
 #include "../precomp.hpp"
+#include <opencv2/core/ocl.hpp>
 #include "layers_common.hpp"
 #include "convolution_layer.hpp"
 #include "im2col.hpp"
@ -107,6 +108,14 @@ namespace dnn
            return;
        }

+        if (ocl::useOpenCL() && inpBlob.type() == CV_32F)
+        {
+            UMat src = inpBlob.getMatRef().getUMat(ACCESS_READ);
+            UMat dst(colMat.size(), colMat.type());
+            im2col_ocl(src, inpGroupCn, inpH, inpW, kerH, kerW, padH, padW, strideH, strideW, dst);
+            dst.copyTo(colMat);
+        }
+
        if (inpBlob.type() == CV_32F)
            im2col_cpu((float *)srcPtr, inpGroupCn, inpH, inpW, kerH, kerW, padH, padW, strideH, strideW, (float *)colMat.ptr());
        if (inpBlob.type() == CV_64F)
--- a/modules/dnn/src/layers/im2col.cpp
+++ b/modules/dnn/src/layers/im2col.cpp
@ -0,0 +1,41 @@
+#include "../precomp.hpp"
+#include <opencv2/core/ocl.hpp>
+#include "im2col.hpp"
+#include "opencl_kernels_dnn.hpp"
+
+namespace cv
+{
+namespace dnn
+{
+
+void im2col_ocl(UMat &img,
+                int channels, int height, int width, 
+                int kernel_h, int kernel_w,
+                int pad_h, int pad_w,
+                int stride_h, int stride_w, 
+                UMat &col)
+{
+    int h_out = (height + 2 * pad_h - kernel_h) / stride_h + 1;
+    int w_out = (width + 2 * pad_w - kernel_w) / stride_w + 1;
+
+    CV_Assert(img.isContinuous() && col.isContinuous());
+    CV_Assert(img.total() == (size_t)channels * height * width);
+    CV_Assert(col.total() == (size_t)h_out * w_out * kernel_h * kernel_w);
+
+    ocl::Kernel im2col_ker("im2col", ocl::dnn::im2col_oclsrc);
+
+    im2col_ker.args(ocl::KernelArg::PtrReadOnly(img), 
+             channels, height, width,
+             kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
+             h_out, w_out,
+             ocl::KernelArg::PtrWriteOnly(col)
+        );
+
+    size_t globalSize[] = { (size_t)channels * h_out * w_out };
+    size_t localSize[] = { ocl::Device::getDefault().maxWorkGroupSize() };
+
+    CV_Assert(im2col_ker.run(1, globalSize, localSize, false));
+}
+
+}
+}
--- a/modules/dnn/src/layers/im2col.hpp
+++ b/modules/dnn/src/layers/im2col.hpp
@ -7,10 +7,11 @@ namespace dnn
 {

 template <typename Dtype>
-void im2col_cpu(const Dtype* data_im, const int channels,
-    const int height, const int width, const int kernel_h, const int kernel_w,
-    const int pad_h, const int pad_w,
-    const int stride_h, const int stride_w,
+void im2col_cpu(const Dtype* data_im, 
+                int channels, int height, int width, 
+                int kernel_h, int kernel_w,
+                int pad_h, int pad_w,
+                int stride_h, int stride_w,
                Dtype* data_col)
 {
    int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
@ -35,10 +36,11 @@ void im2col_cpu(const Dtype* data_im, const int channels,
 }

 template <typename Dtype>
-void col2im_cpu(const Dtype* data_col, const int channels,
-    const int height, const int width, const int patch_h, const int patch_w,
-    const int pad_h, const int pad_w,
-    const int stride_h, const int stride_w,
+void col2im_cpu(const Dtype* data_col, 
+                int channels, int height, int width,
+                int patch_h, int patch_w,
+                int pad_h, int pad_w,
+                int stride_h, int stride_w,
                Dtype* data_im)
 {
    memset(data_im, 0, height * width * channels * sizeof(Dtype));
@ -68,6 +70,13 @@ void col2im_cpu(const Dtype* data_col, const int channels,
    }
 }

+void im2col_ocl(UMat &img,
+                int channels, int height, int width,
+                int kernel_h, int kernel_w,
+                int pad_h, int pad_w,
+                int stride_h, int stride_w,
+                UMat &col);
+
 }
 }

--- a/modules/dnn/src/opencl/im2col.cl
+++ b/modules/dnn/src/opencl/im2col.cl
@ -0,0 +1,30 @@
+__kernel void im2col(__global const float *im_src,
+                     int channels, int height_inp, int width_inp, 
+                     int kernel_h, int kernel_w, int pad_h, int pad_w, int stride_h, int stride_w,
+                     int height_out, int width_out,
+                     __global float *im_col
+                    )
+{
+    int index = get_global_id(0);
+    int j_out = index % width_out;
+    int i_out = (index / width_out) % height_out;
+    int c_inp = (index / width_out) / height_out;
+    
+    int c_out = c_inp * kernel_h * kernel_w;
+    int i_inp = i_out * stride_h - pad_h;
+    int j_inp = j_out * stride_w - pad_w;
+
+    im_col += (c_out * height_out + i_out) * width_out + j_out;
+    im_src += (c_inp * height_inp + i_inp) * width_inp + j_inp;
+
+    for (int ki = 0; ki < kernel_h; ++ki)
+        for (int kj = 0; kj < kernel_w; ++kj) {
+            int i = i_inp + ki;
+            int j = j_inp + kj;
+            *im_col = (h >= 0 && w >= 0 && h < height_inp && w < width_inp) ?
+                im_src[i * width_inp + j] : 0;
+            im_col += height_out * width_out;
+      }
+}
+
+}
--- a/modules/dnn/test/test_caffe_importer.cpp
+++ b/modules/dnn/test/test_caffe_importer.cpp
@ -12,7 +12,7 @@ static std::string _tf(TString filename)
    return (getOpenCVExtraDir() + "/dnn/") + filename;
 }

-TEST(ReadCaffe_GTSRB, Accuracy)
+TEST(Test_Caffe, read_gtsrb)
 {
    Net net;
    {
@ -22,7 +22,7 @@ TEST(ReadCaffe_GTSRB, Accuracy)
    }
 }

-TEST(ReadCaffe_GoogLeNet, Accuracy)
+TEST(Test_Caffe, read_googlenet)
 {
    Net net;
    {
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@ -1,4 +1,5 @@
 #include "test_precomp.hpp"
+#include <opencv2/core/ocl.hpp>
 #include <iostream>
 #include "npy_blob.hpp"

@ -56,6 +57,17 @@ TEST(Layer_Test_Convolution, Accuracy)
     testLayer("layer_convolution", true);
 }

+//TODO: move this test into separate file
+TEST(Layer_Test_Convolution, AccuracyOCL)
+{
+    if (cv::ocl::haveOpenCL())
+    {
+        cv::ocl::setUseOpenCL(true);
+        testLayer("layer_convolution", true);
+        cv::ocl::setUseOpenCL(false);
+    }
+}
+
 TEST(Layer_Test_InnerProduct, Accuracy)
 {
     testLayer("layer_inner_product", true);
--- a/modules/dnn/test/test_torch_importer.cpp
+++ b/modules/dnn/test/test_torch_importer.cpp
@ -58,8 +58,7 @@ TEST(Torch_Importer, run_pool_max)

 TEST(Torch_Importer, run_pool_ave)
 {
-    //TODO: fix
-    //runTorchNet("net_pool_ave", "l1_Pooling", false);
+    runTorchNet("net_pool_ave", "l1_Pooling", false);
 }

 TEST(Torch_Importer, run_reshape)