Added OpenCL im2col

10 years ago · 23d3ede6ab
parent 06f949a590
commit 23d3ede6ab
7 changed files with 114 additions and 14 deletions
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@ -1,4 +1,5 @@
 #include "../precomp.hpp"
 #include <opencv2/core/ocl.hpp>
 #include "layers_common.hpp"
 #include "convolution_layer.hpp"
 #include "im2col.hpp"
@ -107,6 +108,14 @@ namespace dnn
            return;
        }
        if (ocl::useOpenCL() && inpBlob.type() == CV_32F)
        {
            UMat src = inpBlob.getMatRef().getUMat(ACCESS_READ);
            UMat dst(colMat.size(), colMat.type());
            im2col_ocl(src, inpGroupCn, inpH, inpW, kerH, kerW, padH, padW, strideH, strideW, dst);
            dst.copyTo(colMat);
        }
        if (inpBlob.type() == CV_32F)
            im2col_cpu((float *)srcPtr, inpGroupCn, inpH, inpW, kerH, kerW, padH, padW, strideH, strideW, (float *)colMat.ptr());
        if (inpBlob.type() == CV_64F)
--- a/modules/dnn/src/layers/im2col.cpp
+++ b/modules/dnn/src/layers/im2col.cpp
@ -0,0 +1,41 @@
 #include "../precomp.hpp"
 #include <opencv2/core/ocl.hpp>
 #include "im2col.hpp"
 #include "opencl_kernels_dnn.hpp"
 namespace cv
 {
 namespace dnn
 {
 void im2col_ocl(UMat &img,
                int channels, int height, int width, 
                int kernel_h, int kernel_w,
                int pad_h, int pad_w,
                int stride_h, int stride_w, 
                UMat &col)
 {
    int h_out = (height + 2 * pad_h - kernel_h) / stride_h + 1;
    int w_out = (width + 2 * pad_w - kernel_w) / stride_w + 1;
    CV_Assert(img.isContinuous() && col.isContinuous());
    CV_Assert(img.total() == (size_t)channels * height * width);
    CV_Assert(col.total() == (size_t)h_out * w_out * kernel_h * kernel_w);
    ocl::Kernel im2col_ker("im2col", ocl::dnn::im2col_oclsrc);
    im2col_ker.args(ocl::KernelArg::PtrReadOnly(img), 
             channels, height, width,
             kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
             h_out, w_out,
             ocl::KernelArg::PtrWriteOnly(col)
        );
    size_t globalSize[] = { (size_t)channels * h_out * w_out };
    size_t localSize[] = { ocl::Device::getDefault().maxWorkGroupSize() };
    CV_Assert(im2col_ker.run(1, globalSize, localSize, false));
 }
 }
 }
--- a/modules/dnn/src/layers/im2col.hpp
+++ b/modules/dnn/src/layers/im2col.hpp
@ -7,11 +7,12 @@ namespace dnn
 {
 template <typename Dtype>
-void im2col_cpu(const Dtype* data_im, const int channels,
+void im2col_cpu(const Dtype* data_im, 
-    const int height, const int width, const int kernel_h, const int kernel_w,
+                int channels, int height, int width, 
-    const int pad_h, const int pad_w,
+                int kernel_h, int kernel_w,
-    const int stride_h, const int stride_w,
+                int pad_h, int pad_w,
-    Dtype* data_col)
+                int stride_h, int stride_w,
                Dtype* data_col)
 {
    int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
    int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
@ -35,11 +36,12 @@ void im2col_cpu(const Dtype* data_im, const int channels,
 }
 template <typename Dtype>
-void col2im_cpu(const Dtype* data_col, const int channels,
+void col2im_cpu(const Dtype* data_col, 
-    const int height, const int width, const int patch_h, const int patch_w,
+                int channels, int height, int width,
-    const int pad_h, const int pad_w,
+                int patch_h, int patch_w,
-    const int stride_h, const int stride_w,
+                int pad_h, int pad_w,
-    Dtype* data_im)
+                int stride_h, int stride_w,
                Dtype* data_im)
 {
    memset(data_im, 0, height * width * channels * sizeof(Dtype));
@ -68,6 +70,13 @@ void col2im_cpu(const Dtype* data_col, const int channels,
    }
 }
 void im2col_ocl(UMat &img,
                int channels, int height, int width,
                int kernel_h, int kernel_w,
                int pad_h, int pad_w,
                int stride_h, int stride_w,
                UMat &col);
 }
 }
--- a/modules/dnn/src/opencl/im2col.cl
+++ b/modules/dnn/src/opencl/im2col.cl
@ -0,0 +1,30 @@
 __kernel void im2col(__global const float *im_src,
                     int channels, int height_inp, int width_inp, 
                     int kernel_h, int kernel_w, int pad_h, int pad_w, int stride_h, int stride_w,
                     int height_out, int width_out,
                     __global float *im_col
                    )
 {
    int index = get_global_id(0);
    int j_out = index % width_out;
    int i_out = (index / width_out) % height_out;
    int c_inp = (index / width_out) / height_out;
    int c_out = c_inp * kernel_h * kernel_w;
    int i_inp = i_out * stride_h - pad_h;
    int j_inp = j_out * stride_w - pad_w;
    im_col += (c_out * height_out + i_out) * width_out + j_out;
    im_src += (c_inp * height_inp + i_inp) * width_inp + j_inp;
    for (int ki = 0; ki < kernel_h; ++ki)
        for (int kj = 0; kj < kernel_w; ++kj) {
            int i = i_inp + ki;
            int j = j_inp + kj;
            *im_col = (h >= 0 && w >= 0 && h < height_inp && w < width_inp) ?
                im_src[i * width_inp + j] : 0;
            im_col += height_out * width_out;
      }
 }
 }
--- a/modules/dnn/test/test_caffe_importer.cpp
+++ b/modules/dnn/test/test_caffe_importer.cpp
@ -12,7 +12,7 @@ static std::string _tf(TString filename)
    return (getOpenCVExtraDir() + "/dnn/") + filename;
 }
-TEST(ReadCaffe_GTSRB, Accuracy)
+TEST(Test_Caffe, read_gtsrb)
 {
    Net net;
    {
@ -22,7 +22,7 @@ TEST(ReadCaffe_GTSRB, Accuracy)
    }
 }
-TEST(ReadCaffe_GoogLeNet, Accuracy)
+TEST(Test_Caffe, read_googlenet)
 {
    Net net;
    {
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@ -1,4 +1,5 @@
 #include "test_precomp.hpp"
 #include <opencv2/core/ocl.hpp>
 #include <iostream>
 #include "npy_blob.hpp"
@ -56,6 +57,17 @@ TEST(Layer_Test_Convolution, Accuracy)
     testLayer("layer_convolution", true);
 }
 //TODO: move this test into separate file
 TEST(Layer_Test_Convolution, AccuracyOCL)
 {
    if (cv::ocl::haveOpenCL())
    {
        cv::ocl::setUseOpenCL(true);
        testLayer("layer_convolution", true);
        cv::ocl::setUseOpenCL(false);
    }
 }
 TEST(Layer_Test_InnerProduct, Accuracy)
 {
     testLayer("layer_inner_product", true);
--- a/modules/dnn/test/test_torch_importer.cpp
+++ b/modules/dnn/test/test_torch_importer.cpp
@ -58,8 +58,7 @@ TEST(Torch_Importer, run_pool_max)
 TEST(Torch_Importer, run_pool_ave)
 {
-    //TODO: fix
+    runTorchNet("net_pool_ave", "l1_Pooling", false);
    //runTorchNet("net_pool_ave", "l1_Pooling", false);
 }
 TEST(Torch_Importer, run_reshape)