diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 6eac0849a..472455a8c 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -173,7 +173,7 @@ struct Net::Impl MapIdToLayerData::iterator it = layers.find(id); if (it == layers.end()) - CV_Error(Error::StsError, "Layer with requested id=" + toString(id) + " not found"); + CV_Error(Error::StsObjectNotFound, format("Layer with requested id=%d not found", id)); return it->second; } @@ -536,5 +536,10 @@ int Net::getLayerId(LayerId) return -1; } +void Net::deleteLayer(LayerId) +{ + CV_Error(Error::StsNotImplemented, ""); +} + } } diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 4bd47a61e..b3de8e00d 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -3,6 +3,7 @@ #include "layers_common.hpp" #include "convolution_layer.hpp" #include "im2col.hpp" +#include namespace cv { @@ -28,6 +29,9 @@ namespace dnn Blob &biasBlob = learnedParams[1]; CV_Assert(biasBlob.total() == (size_t)numOutput); } + + //TBD + useOpenCL = params.has("use_opencl"); } void ConvolutionLayer::allocate(const std::vector &inputs, std::vector &outputs) @@ -108,12 +112,17 @@ namespace dnn return; } - if (ocl::useOpenCL() && inpBlob.type() == CV_32F) + if (useOpenCL && ocl::useOpenCL() && inpBlob.type() == CV_32F && !is1x1()) { - UMat src = inpBlob.getMatRef().getUMat(ACCESS_READ); + std::vector ranges(4, Range::all()); + ranges[0] = Range(imNum, imNum+1); + ranges[1] = Range(cnGroup*inpGroupCn, (cnGroup + 1)*inpGroupCn); + + UMat src = inpBlob.getMatRef()(&ranges[0]).getUMat(ACCESS_READ); UMat dst(colMat.size(), colMat.type()); im2col_ocl(src, inpGroupCn, inpH, inpW, kerH, kerW, padH, padW, strideH, strideW, dst); dst.copyTo(colMat); + return; } if (inpBlob.type() == CV_32F) diff --git a/modules/dnn/src/layers/convolution_layer.hpp b/modules/dnn/src/layers/convolution_layer.hpp index 54c15a251..0e33f857e 100644 --- a/modules/dnn/src/layers/convolution_layer.hpp +++ b/modules/dnn/src/layers/convolution_layer.hpp @@ -22,6 +22,7 @@ namespace dnn int inpGroupCn, outGroupCn; int ksize; + bool useOpenCL; Mat colMat, biasOnesMat; inline bool is1x1() const; @@ -47,4 +48,4 @@ namespace dnn }; } } -#endif \ No newline at end of file +#endif diff --git a/modules/dnn/src/layers/im2col.cpp b/modules/dnn/src/layers/im2col.cpp index 519c9c66f..6a400bd9c 100644 --- a/modules/dnn/src/layers/im2col.cpp +++ b/modules/dnn/src/layers/im2col.cpp @@ -20,22 +20,23 @@ void im2col_ocl(UMat &img, CV_Assert(img.isContinuous() && col.isContinuous()); CV_Assert(img.total() == (size_t)channels * height * width); - CV_Assert(col.total() == (size_t)h_out * w_out * kernel_h * kernel_w); + CV_Assert(col.total() == (size_t)channels * kernel_h * kernel_w * h_out * w_out); ocl::Kernel im2col_ker("im2col", ocl::dnn::im2col_oclsrc); + CV_Assert(!im2col_ker.empty()); - im2col_ker.args(ocl::KernelArg::PtrReadOnly(img), + im2col_ker.args(ocl::KernelArg::PtrReadOnly(img), (int)img.offset, channels, height, width, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, h_out, w_out, - ocl::KernelArg::PtrWriteOnly(col) + ocl::KernelArg::PtrWriteOnly(col), (int)col.offset ); - size_t globalSize[] = { (size_t)channels * h_out * w_out }; - size_t localSize[] = { ocl::Device::getDefault().maxWorkGroupSize() }; + size_t localSize = ocl::Device::getDefault().maxWorkGroupSize(); + size_t globalSize = (size_t)channels * h_out * w_out; - CV_Assert(im2col_ker.run(1, globalSize, localSize, false)); + CV_Assert(im2col_ker.run(1, &globalSize, &localSize, true)); } } -} \ No newline at end of file +} diff --git a/modules/dnn/src/layers/im2col.hpp b/modules/dnn/src/layers/im2col.hpp index bdc253751..b82ef706e 100644 --- a/modules/dnn/src/layers/im2col.hpp +++ b/modules/dnn/src/layers/im2col.hpp @@ -80,4 +80,4 @@ void im2col_ocl(UMat &img, } } -#endif \ No newline at end of file +#endif diff --git a/modules/dnn/src/opencl/im2col.cl b/modules/dnn/src/opencl/im2col.cl index 301826c40..653eb9f21 100644 --- a/modules/dnn/src/opencl/im2col.cl +++ b/modules/dnn/src/opencl/im2col.cl @@ -1,11 +1,13 @@ -__kernel void im2col(__global const float *im_src, - int channels, int height_inp, int width_inp, +__kernel void im2col(__global const float *im_src, int im_src_offset, + int channels, int height_inp, int width_inp, int kernel_h, int kernel_w, int pad_h, int pad_w, int stride_h, int stride_w, int height_out, int width_out, - __global float *im_col + __global float *im_col, int im_col_offset ) { int index = get_global_id(0); + if (index >= height_out * width_out * channels) + return; int j_out = index % width_out; int i_out = (index / width_out) % height_out; int c_inp = (index / width_out) / height_out; @@ -14,17 +16,15 @@ __kernel void im2col(__global const float *im_src, int i_inp = i_out * stride_h - pad_h; int j_inp = j_out * stride_w - pad_w; - im_col += (c_out * height_out + i_out) * width_out + j_out; - im_src += (c_inp * height_inp + i_inp) * width_inp + j_inp; + im_src += (c_inp * height_inp + i_inp) * width_inp + j_inp + im_src_offset / sizeof(float); + im_col += (c_out * height_out + i_out) * width_out + j_out + im_col_offset / sizeof(float); for (int ki = 0; ki < kernel_h; ++ki) for (int kj = 0; kj < kernel_w; ++kj) { int i = i_inp + ki; int j = j_inp + kj; - *im_col = (h >= 0 && w >= 0 && h < height_inp && w < width_inp) ? - im_src[i * width_inp + j] : 0; + *im_col = (i >= 0 && j >= 0 && i < height_inp && j < width_inp) ? + im_src[ki * width_inp + kj] : 0; im_col += height_out * width_out; } } - -} \ No newline at end of file