Added OpenCL im2col

pull/265/head
Vitaliy Lyudvichenko 10 years ago
parent 06f949a590
commit 23d3ede6ab
  1. 9
      modules/dnn/src/layers/convolution_layer.cpp
  2. 41
      modules/dnn/src/layers/im2col.cpp
  3. 25
      modules/dnn/src/layers/im2col.hpp
  4. 30
      modules/dnn/src/opencl/im2col.cl
  5. 4
      modules/dnn/test/test_caffe_importer.cpp
  6. 12
      modules/dnn/test/test_layers.cpp
  7. 3
      modules/dnn/test/test_torch_importer.cpp

@ -1,4 +1,5 @@
#include "../precomp.hpp"
#include <opencv2/core/ocl.hpp>
#include "layers_common.hpp"
#include "convolution_layer.hpp"
#include "im2col.hpp"
@ -107,6 +108,14 @@ namespace dnn
return;
}
if (ocl::useOpenCL() && inpBlob.type() == CV_32F)
{
UMat src = inpBlob.getMatRef().getUMat(ACCESS_READ);
UMat dst(colMat.size(), colMat.type());
im2col_ocl(src, inpGroupCn, inpH, inpW, kerH, kerW, padH, padW, strideH, strideW, dst);
dst.copyTo(colMat);
}
if (inpBlob.type() == CV_32F)
im2col_cpu((float *)srcPtr, inpGroupCn, inpH, inpW, kerH, kerW, padH, padW, strideH, strideW, (float *)colMat.ptr());
if (inpBlob.type() == CV_64F)

@ -0,0 +1,41 @@
#include "../precomp.hpp"
#include <opencv2/core/ocl.hpp>
#include "im2col.hpp"
#include "opencl_kernels_dnn.hpp"
namespace cv
{
namespace dnn
{
void im2col_ocl(UMat &img,
int channels, int height, int width,
int kernel_h, int kernel_w,
int pad_h, int pad_w,
int stride_h, int stride_w,
UMat &col)
{
int h_out = (height + 2 * pad_h - kernel_h) / stride_h + 1;
int w_out = (width + 2 * pad_w - kernel_w) / stride_w + 1;
CV_Assert(img.isContinuous() && col.isContinuous());
CV_Assert(img.total() == (size_t)channels * height * width);
CV_Assert(col.total() == (size_t)h_out * w_out * kernel_h * kernel_w);
ocl::Kernel im2col_ker("im2col", ocl::dnn::im2col_oclsrc);
im2col_ker.args(ocl::KernelArg::PtrReadOnly(img),
channels, height, width,
kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
h_out, w_out,
ocl::KernelArg::PtrWriteOnly(col)
);
size_t globalSize[] = { (size_t)channels * h_out * w_out };
size_t localSize[] = { ocl::Device::getDefault().maxWorkGroupSize() };
CV_Assert(im2col_ker.run(1, globalSize, localSize, false));
}
}
}

@ -7,10 +7,11 @@ namespace dnn
{
template <typename Dtype>
void im2col_cpu(const Dtype* data_im, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
void im2col_cpu(const Dtype* data_im,
int channels, int height, int width,
int kernel_h, int kernel_w,
int pad_h, int pad_w,
int stride_h, int stride_w,
Dtype* data_col)
{
int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
@ -35,10 +36,11 @@ void im2col_cpu(const Dtype* data_im, const int channels,
}
template <typename Dtype>
void col2im_cpu(const Dtype* data_col, const int channels,
const int height, const int width, const int patch_h, const int patch_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
void col2im_cpu(const Dtype* data_col,
int channels, int height, int width,
int patch_h, int patch_w,
int pad_h, int pad_w,
int stride_h, int stride_w,
Dtype* data_im)
{
memset(data_im, 0, height * width * channels * sizeof(Dtype));
@ -68,6 +70,13 @@ void col2im_cpu(const Dtype* data_col, const int channels,
}
}
void im2col_ocl(UMat &img,
int channels, int height, int width,
int kernel_h, int kernel_w,
int pad_h, int pad_w,
int stride_h, int stride_w,
UMat &col);
}
}

@ -0,0 +1,30 @@
__kernel void im2col(__global const float *im_src,
int channels, int height_inp, int width_inp,
int kernel_h, int kernel_w, int pad_h, int pad_w, int stride_h, int stride_w,
int height_out, int width_out,
__global float *im_col
)
{
int index = get_global_id(0);
int j_out = index % width_out;
int i_out = (index / width_out) % height_out;
int c_inp = (index / width_out) / height_out;
int c_out = c_inp * kernel_h * kernel_w;
int i_inp = i_out * stride_h - pad_h;
int j_inp = j_out * stride_w - pad_w;
im_col += (c_out * height_out + i_out) * width_out + j_out;
im_src += (c_inp * height_inp + i_inp) * width_inp + j_inp;
for (int ki = 0; ki < kernel_h; ++ki)
for (int kj = 0; kj < kernel_w; ++kj) {
int i = i_inp + ki;
int j = j_inp + kj;
*im_col = (h >= 0 && w >= 0 && h < height_inp && w < width_inp) ?
im_src[i * width_inp + j] : 0;
im_col += height_out * width_out;
}
}
}

@ -12,7 +12,7 @@ static std::string _tf(TString filename)
return (getOpenCVExtraDir() + "/dnn/") + filename;
}
TEST(ReadCaffe_GTSRB, Accuracy)
TEST(Test_Caffe, read_gtsrb)
{
Net net;
{
@ -22,7 +22,7 @@ TEST(ReadCaffe_GTSRB, Accuracy)
}
}
TEST(ReadCaffe_GoogLeNet, Accuracy)
TEST(Test_Caffe, read_googlenet)
{
Net net;
{

@ -1,4 +1,5 @@
#include "test_precomp.hpp"
#include <opencv2/core/ocl.hpp>
#include <iostream>
#include "npy_blob.hpp"
@ -56,6 +57,17 @@ TEST(Layer_Test_Convolution, Accuracy)
testLayer("layer_convolution", true);
}
//TODO: move this test into separate file
TEST(Layer_Test_Convolution, AccuracyOCL)
{
if (cv::ocl::haveOpenCL())
{
cv::ocl::setUseOpenCL(true);
testLayer("layer_convolution", true);
cv::ocl::setUseOpenCL(false);
}
}
TEST(Layer_Test_InnerProduct, Accuracy)
{
testLayer("layer_inner_product", true);

@ -58,8 +58,7 @@ TEST(Torch_Importer, run_pool_max)
TEST(Torch_Importer, run_pool_ave)
{
//TODO: fix
//runTorchNet("net_pool_ave", "l1_Pooling", false);
runTorchNet("net_pool_ave", "l1_Pooling", false);
}
TEST(Torch_Importer, run_reshape)

Loading…
Cancel
Save