Added OpenCL im2col

pull/265/head
Vitaliy Lyudvichenko 10 years ago
parent 06f949a590
commit 23d3ede6ab
  1. 9
      modules/dnn/src/layers/convolution_layer.cpp
  2. 41
      modules/dnn/src/layers/im2col.cpp
  3. 29
      modules/dnn/src/layers/im2col.hpp
  4. 30
      modules/dnn/src/opencl/im2col.cl
  5. 4
      modules/dnn/test/test_caffe_importer.cpp
  6. 12
      modules/dnn/test/test_layers.cpp
  7. 3
      modules/dnn/test/test_torch_importer.cpp

@ -1,4 +1,5 @@
#include "../precomp.hpp" #include "../precomp.hpp"
#include <opencv2/core/ocl.hpp>
#include "layers_common.hpp" #include "layers_common.hpp"
#include "convolution_layer.hpp" #include "convolution_layer.hpp"
#include "im2col.hpp" #include "im2col.hpp"
@ -107,6 +108,14 @@ namespace dnn
return; return;
} }
if (ocl::useOpenCL() && inpBlob.type() == CV_32F)
{
UMat src = inpBlob.getMatRef().getUMat(ACCESS_READ);
UMat dst(colMat.size(), colMat.type());
im2col_ocl(src, inpGroupCn, inpH, inpW, kerH, kerW, padH, padW, strideH, strideW, dst);
dst.copyTo(colMat);
}
if (inpBlob.type() == CV_32F) if (inpBlob.type() == CV_32F)
im2col_cpu((float *)srcPtr, inpGroupCn, inpH, inpW, kerH, kerW, padH, padW, strideH, strideW, (float *)colMat.ptr()); im2col_cpu((float *)srcPtr, inpGroupCn, inpH, inpW, kerH, kerW, padH, padW, strideH, strideW, (float *)colMat.ptr());
if (inpBlob.type() == CV_64F) if (inpBlob.type() == CV_64F)

@ -0,0 +1,41 @@
#include "../precomp.hpp"
#include <opencv2/core/ocl.hpp>
#include "im2col.hpp"
#include "opencl_kernels_dnn.hpp"
namespace cv
{
namespace dnn
{
void im2col_ocl(UMat &img,
int channels, int height, int width,
int kernel_h, int kernel_w,
int pad_h, int pad_w,
int stride_h, int stride_w,
UMat &col)
{
int h_out = (height + 2 * pad_h - kernel_h) / stride_h + 1;
int w_out = (width + 2 * pad_w - kernel_w) / stride_w + 1;
CV_Assert(img.isContinuous() && col.isContinuous());
CV_Assert(img.total() == (size_t)channels * height * width);
CV_Assert(col.total() == (size_t)h_out * w_out * kernel_h * kernel_w);
ocl::Kernel im2col_ker("im2col", ocl::dnn::im2col_oclsrc);
im2col_ker.args(ocl::KernelArg::PtrReadOnly(img),
channels, height, width,
kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
h_out, w_out,
ocl::KernelArg::PtrWriteOnly(col)
);
size_t globalSize[] = { (size_t)channels * h_out * w_out };
size_t localSize[] = { ocl::Device::getDefault().maxWorkGroupSize() };
CV_Assert(im2col_ker.run(1, globalSize, localSize, false));
}
}
}

@ -7,11 +7,12 @@ namespace dnn
{ {
template <typename Dtype> template <typename Dtype>
void im2col_cpu(const Dtype* data_im, const int channels, void im2col_cpu(const Dtype* data_im,
const int height, const int width, const int kernel_h, const int kernel_w, int channels, int height, int width,
const int pad_h, const int pad_w, int kernel_h, int kernel_w,
const int stride_h, const int stride_w, int pad_h, int pad_w,
Dtype* data_col) int stride_h, int stride_w,
Dtype* data_col)
{ {
int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1; int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1; int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
@ -35,11 +36,12 @@ void im2col_cpu(const Dtype* data_im, const int channels,
} }
template <typename Dtype> template <typename Dtype>
void col2im_cpu(const Dtype* data_col, const int channels, void col2im_cpu(const Dtype* data_col,
const int height, const int width, const int patch_h, const int patch_w, int channels, int height, int width,
const int pad_h, const int pad_w, int patch_h, int patch_w,
const int stride_h, const int stride_w, int pad_h, int pad_w,
Dtype* data_im) int stride_h, int stride_w,
Dtype* data_im)
{ {
memset(data_im, 0, height * width * channels * sizeof(Dtype)); memset(data_im, 0, height * width * channels * sizeof(Dtype));
@ -68,6 +70,13 @@ void col2im_cpu(const Dtype* data_col, const int channels,
} }
} }
void im2col_ocl(UMat &img,
int channels, int height, int width,
int kernel_h, int kernel_w,
int pad_h, int pad_w,
int stride_h, int stride_w,
UMat &col);
} }
} }

@ -0,0 +1,30 @@
__kernel void im2col(__global const float *im_src,
int channels, int height_inp, int width_inp,
int kernel_h, int kernel_w, int pad_h, int pad_w, int stride_h, int stride_w,
int height_out, int width_out,
__global float *im_col
)
{
int index = get_global_id(0);
int j_out = index % width_out;
int i_out = (index / width_out) % height_out;
int c_inp = (index / width_out) / height_out;
int c_out = c_inp * kernel_h * kernel_w;
int i_inp = i_out * stride_h - pad_h;
int j_inp = j_out * stride_w - pad_w;
im_col += (c_out * height_out + i_out) * width_out + j_out;
im_src += (c_inp * height_inp + i_inp) * width_inp + j_inp;
for (int ki = 0; ki < kernel_h; ++ki)
for (int kj = 0; kj < kernel_w; ++kj) {
int i = i_inp + ki;
int j = j_inp + kj;
*im_col = (h >= 0 && w >= 0 && h < height_inp && w < width_inp) ?
im_src[i * width_inp + j] : 0;
im_col += height_out * width_out;
}
}
}

@ -12,7 +12,7 @@ static std::string _tf(TString filename)
return (getOpenCVExtraDir() + "/dnn/") + filename; return (getOpenCVExtraDir() + "/dnn/") + filename;
} }
TEST(ReadCaffe_GTSRB, Accuracy) TEST(Test_Caffe, read_gtsrb)
{ {
Net net; Net net;
{ {
@ -22,7 +22,7 @@ TEST(ReadCaffe_GTSRB, Accuracy)
} }
} }
TEST(ReadCaffe_GoogLeNet, Accuracy) TEST(Test_Caffe, read_googlenet)
{ {
Net net; Net net;
{ {

@ -1,4 +1,5 @@
#include "test_precomp.hpp" #include "test_precomp.hpp"
#include <opencv2/core/ocl.hpp>
#include <iostream> #include <iostream>
#include "npy_blob.hpp" #include "npy_blob.hpp"
@ -56,6 +57,17 @@ TEST(Layer_Test_Convolution, Accuracy)
testLayer("layer_convolution", true); testLayer("layer_convolution", true);
} }
//TODO: move this test into separate file
TEST(Layer_Test_Convolution, AccuracyOCL)
{
if (cv::ocl::haveOpenCL())
{
cv::ocl::setUseOpenCL(true);
testLayer("layer_convolution", true);
cv::ocl::setUseOpenCL(false);
}
}
TEST(Layer_Test_InnerProduct, Accuracy) TEST(Layer_Test_InnerProduct, Accuracy)
{ {
testLayer("layer_inner_product", true); testLayer("layer_inner_product", true);

@ -58,8 +58,7 @@ TEST(Torch_Importer, run_pool_max)
TEST(Torch_Importer, run_pool_ave) TEST(Torch_Importer, run_pool_ave)
{ {
//TODO: fix runTorchNet("net_pool_ave", "l1_Pooling", false);
//runTorchNet("net_pool_ave", "l1_Pooling", false);
} }
TEST(Torch_Importer, run_reshape) TEST(Torch_Importer, run_reshape)

Loading…
Cancel
Save