diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index af5e3d4c4..4bd47a61e 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -1,4 +1,5 @@ #include "../precomp.hpp" +#include #include "layers_common.hpp" #include "convolution_layer.hpp" #include "im2col.hpp" @@ -107,6 +108,14 @@ namespace dnn return; } + if (ocl::useOpenCL() && inpBlob.type() == CV_32F) + { + UMat src = inpBlob.getMatRef().getUMat(ACCESS_READ); + UMat dst(colMat.size(), colMat.type()); + im2col_ocl(src, inpGroupCn, inpH, inpW, kerH, kerW, padH, padW, strideH, strideW, dst); + dst.copyTo(colMat); + } + if (inpBlob.type() == CV_32F) im2col_cpu((float *)srcPtr, inpGroupCn, inpH, inpW, kerH, kerW, padH, padW, strideH, strideW, (float *)colMat.ptr()); if (inpBlob.type() == CV_64F) diff --git a/modules/dnn/src/layers/im2col.cpp b/modules/dnn/src/layers/im2col.cpp new file mode 100644 index 000000000..519c9c66f --- /dev/null +++ b/modules/dnn/src/layers/im2col.cpp @@ -0,0 +1,41 @@ +#include "../precomp.hpp" +#include +#include "im2col.hpp" +#include "opencl_kernels_dnn.hpp" + +namespace cv +{ +namespace dnn +{ + +void im2col_ocl(UMat &img, + int channels, int height, int width, + int kernel_h, int kernel_w, + int pad_h, int pad_w, + int stride_h, int stride_w, + UMat &col) +{ + int h_out = (height + 2 * pad_h - kernel_h) / stride_h + 1; + int w_out = (width + 2 * pad_w - kernel_w) / stride_w + 1; + + CV_Assert(img.isContinuous() && col.isContinuous()); + CV_Assert(img.total() == (size_t)channels * height * width); + CV_Assert(col.total() == (size_t)h_out * w_out * kernel_h * kernel_w); + + ocl::Kernel im2col_ker("im2col", ocl::dnn::im2col_oclsrc); + + im2col_ker.args(ocl::KernelArg::PtrReadOnly(img), + channels, height, width, + kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, + h_out, w_out, + ocl::KernelArg::PtrWriteOnly(col) + ); + + size_t globalSize[] = { (size_t)channels * h_out * w_out }; + size_t localSize[] = { ocl::Device::getDefault().maxWorkGroupSize() }; + + CV_Assert(im2col_ker.run(1, globalSize, localSize, false)); +} + +} +} \ No newline at end of file diff --git a/modules/dnn/src/layers/im2col.hpp b/modules/dnn/src/layers/im2col.hpp index b356a0550..bdc253751 100644 --- a/modules/dnn/src/layers/im2col.hpp +++ b/modules/dnn/src/layers/im2col.hpp @@ -7,11 +7,12 @@ namespace dnn { template -void im2col_cpu(const Dtype* data_im, const int channels, - const int height, const int width, const int kernel_h, const int kernel_w, - const int pad_h, const int pad_w, - const int stride_h, const int stride_w, - Dtype* data_col) +void im2col_cpu(const Dtype* data_im, + int channels, int height, int width, + int kernel_h, int kernel_w, + int pad_h, int pad_w, + int stride_h, int stride_w, + Dtype* data_col) { int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1; int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1; @@ -35,11 +36,12 @@ void im2col_cpu(const Dtype* data_im, const int channels, } template -void col2im_cpu(const Dtype* data_col, const int channels, - const int height, const int width, const int patch_h, const int patch_w, - const int pad_h, const int pad_w, - const int stride_h, const int stride_w, - Dtype* data_im) +void col2im_cpu(const Dtype* data_col, + int channels, int height, int width, + int patch_h, int patch_w, + int pad_h, int pad_w, + int stride_h, int stride_w, + Dtype* data_im) { memset(data_im, 0, height * width * channels * sizeof(Dtype)); @@ -68,6 +70,13 @@ void col2im_cpu(const Dtype* data_col, const int channels, } } +void im2col_ocl(UMat &img, + int channels, int height, int width, + int kernel_h, int kernel_w, + int pad_h, int pad_w, + int stride_h, int stride_w, + UMat &col); + } } diff --git a/modules/dnn/src/opencl/im2col.cl b/modules/dnn/src/opencl/im2col.cl new file mode 100644 index 000000000..301826c40 --- /dev/null +++ b/modules/dnn/src/opencl/im2col.cl @@ -0,0 +1,30 @@ +__kernel void im2col(__global const float *im_src, + int channels, int height_inp, int width_inp, + int kernel_h, int kernel_w, int pad_h, int pad_w, int stride_h, int stride_w, + int height_out, int width_out, + __global float *im_col + ) +{ + int index = get_global_id(0); + int j_out = index % width_out; + int i_out = (index / width_out) % height_out; + int c_inp = (index / width_out) / height_out; + + int c_out = c_inp * kernel_h * kernel_w; + int i_inp = i_out * stride_h - pad_h; + int j_inp = j_out * stride_w - pad_w; + + im_col += (c_out * height_out + i_out) * width_out + j_out; + im_src += (c_inp * height_inp + i_inp) * width_inp + j_inp; + + for (int ki = 0; ki < kernel_h; ++ki) + for (int kj = 0; kj < kernel_w; ++kj) { + int i = i_inp + ki; + int j = j_inp + kj; + *im_col = (h >= 0 && w >= 0 && h < height_inp && w < width_inp) ? + im_src[i * width_inp + j] : 0; + im_col += height_out * width_out; + } +} + +} \ No newline at end of file diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp index 9f51ebdca..d021468e5 100644 --- a/modules/dnn/test/test_caffe_importer.cpp +++ b/modules/dnn/test/test_caffe_importer.cpp @@ -12,7 +12,7 @@ static std::string _tf(TString filename) return (getOpenCVExtraDir() + "/dnn/") + filename; } -TEST(ReadCaffe_GTSRB, Accuracy) +TEST(Test_Caffe, read_gtsrb) { Net net; { @@ -22,7 +22,7 @@ TEST(ReadCaffe_GTSRB, Accuracy) } } -TEST(ReadCaffe_GoogLeNet, Accuracy) +TEST(Test_Caffe, read_googlenet) { Net net; { diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index a272e2bd2..e0de390ab 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -1,4 +1,5 @@ #include "test_precomp.hpp" +#include #include #include "npy_blob.hpp" @@ -56,6 +57,17 @@ TEST(Layer_Test_Convolution, Accuracy) testLayer("layer_convolution", true); } +//TODO: move this test into separate file +TEST(Layer_Test_Convolution, AccuracyOCL) +{ + if (cv::ocl::haveOpenCL()) + { + cv::ocl::setUseOpenCL(true); + testLayer("layer_convolution", true); + cv::ocl::setUseOpenCL(false); + } +} + TEST(Layer_Test_InnerProduct, Accuracy) { testLayer("layer_inner_product", true); diff --git a/modules/dnn/test/test_torch_importer.cpp b/modules/dnn/test/test_torch_importer.cpp index 514c46d5b..0b17a1c56 100644 --- a/modules/dnn/test/test_torch_importer.cpp +++ b/modules/dnn/test/test_torch_importer.cpp @@ -58,8 +58,7 @@ TEST(Torch_Importer, run_pool_max) TEST(Torch_Importer, run_pool_ave) { - //TODO: fix - //runTorchNet("net_pool_ave", "l1_Pooling", false); + runTorchNet("net_pool_ave", "l1_Pooling", false); } TEST(Torch_Importer, run_reshape)