Fixed im2col_ocl bug caused non-zero UMat offset

pull/265/head
Vitaliy Lyudvichenko 10 years ago
parent 23d3ede6ab
commit 7acfda2c11
  1. 7
      modules/dnn/src/dnn.cpp
  2. 13
      modules/dnn/src/layers/convolution_layer.cpp
  3. 3
      modules/dnn/src/layers/convolution_layer.hpp
  4. 15
      modules/dnn/src/layers/im2col.cpp
  5. 2
      modules/dnn/src/layers/im2col.hpp
  6. 18
      modules/dnn/src/opencl/im2col.cl

@ -173,7 +173,7 @@ struct Net::Impl
MapIdToLayerData::iterator it = layers.find(id); MapIdToLayerData::iterator it = layers.find(id);
if (it == layers.end()) if (it == layers.end())
CV_Error(Error::StsError, "Layer with requested id=" + toString(id) + " not found"); CV_Error(Error::StsObjectNotFound, format("Layer with requested id=%d not found", id));
return it->second; return it->second;
} }
@ -536,5 +536,10 @@ int Net::getLayerId(LayerId)
return -1; return -1;
} }
void Net::deleteLayer(LayerId)
{
CV_Error(Error::StsNotImplemented, "");
}
} }
} }

@ -3,6 +3,7 @@
#include "layers_common.hpp" #include "layers_common.hpp"
#include "convolution_layer.hpp" #include "convolution_layer.hpp"
#include "im2col.hpp" #include "im2col.hpp"
#include <iostream>
namespace cv namespace cv
{ {
@ -28,6 +29,9 @@ namespace dnn
Blob &biasBlob = learnedParams[1]; Blob &biasBlob = learnedParams[1];
CV_Assert(biasBlob.total() == (size_t)numOutput); CV_Assert(biasBlob.total() == (size_t)numOutput);
} }
//TBD
useOpenCL = params.has("use_opencl");
} }
void ConvolutionLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs) void ConvolutionLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
@ -108,12 +112,17 @@ namespace dnn
return; return;
} }
if (ocl::useOpenCL() && inpBlob.type() == CV_32F) if (useOpenCL && ocl::useOpenCL() && inpBlob.type() == CV_32F && !is1x1())
{ {
UMat src = inpBlob.getMatRef().getUMat(ACCESS_READ); std::vector<Range> ranges(4, Range::all());
ranges[0] = Range(imNum, imNum+1);
ranges[1] = Range(cnGroup*inpGroupCn, (cnGroup + 1)*inpGroupCn);
UMat src = inpBlob.getMatRef()(&ranges[0]).getUMat(ACCESS_READ);
UMat dst(colMat.size(), colMat.type()); UMat dst(colMat.size(), colMat.type());
im2col_ocl(src, inpGroupCn, inpH, inpW, kerH, kerW, padH, padW, strideH, strideW, dst); im2col_ocl(src, inpGroupCn, inpH, inpW, kerH, kerW, padH, padW, strideH, strideW, dst);
dst.copyTo(colMat); dst.copyTo(colMat);
return;
} }
if (inpBlob.type() == CV_32F) if (inpBlob.type() == CV_32F)

@ -22,6 +22,7 @@ namespace dnn
int inpGroupCn, outGroupCn; int inpGroupCn, outGroupCn;
int ksize; int ksize;
bool useOpenCL;
Mat colMat, biasOnesMat; Mat colMat, biasOnesMat;
inline bool is1x1() const; inline bool is1x1() const;
@ -47,4 +48,4 @@ namespace dnn
}; };
} }
} }
#endif #endif

@ -20,22 +20,23 @@ void im2col_ocl(UMat &img,
CV_Assert(img.isContinuous() && col.isContinuous()); CV_Assert(img.isContinuous() && col.isContinuous());
CV_Assert(img.total() == (size_t)channels * height * width); CV_Assert(img.total() == (size_t)channels * height * width);
CV_Assert(col.total() == (size_t)h_out * w_out * kernel_h * kernel_w); CV_Assert(col.total() == (size_t)channels * kernel_h * kernel_w * h_out * w_out);
ocl::Kernel im2col_ker("im2col", ocl::dnn::im2col_oclsrc); ocl::Kernel im2col_ker("im2col", ocl::dnn::im2col_oclsrc);
CV_Assert(!im2col_ker.empty());
im2col_ker.args(ocl::KernelArg::PtrReadOnly(img), im2col_ker.args(ocl::KernelArg::PtrReadOnly(img), (int)img.offset,
channels, height, width, channels, height, width,
kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
h_out, w_out, h_out, w_out,
ocl::KernelArg::PtrWriteOnly(col) ocl::KernelArg::PtrWriteOnly(col), (int)col.offset
); );
size_t globalSize[] = { (size_t)channels * h_out * w_out }; size_t localSize = ocl::Device::getDefault().maxWorkGroupSize();
size_t localSize[] = { ocl::Device::getDefault().maxWorkGroupSize() }; size_t globalSize = (size_t)channels * h_out * w_out;
CV_Assert(im2col_ker.run(1, globalSize, localSize, false)); CV_Assert(im2col_ker.run(1, &globalSize, &localSize, true));
} }
} }
} }

@ -80,4 +80,4 @@ void im2col_ocl(UMat &img,
} }
} }
#endif #endif

@ -1,11 +1,13 @@
__kernel void im2col(__global const float *im_src, __kernel void im2col(__global const float *im_src, int im_src_offset,
int channels, int height_inp, int width_inp, int channels, int height_inp, int width_inp,
int kernel_h, int kernel_w, int pad_h, int pad_w, int stride_h, int stride_w, int kernel_h, int kernel_w, int pad_h, int pad_w, int stride_h, int stride_w,
int height_out, int width_out, int height_out, int width_out,
__global float *im_col __global float *im_col, int im_col_offset
) )
{ {
int index = get_global_id(0); int index = get_global_id(0);
if (index >= height_out * width_out * channels)
return;
int j_out = index % width_out; int j_out = index % width_out;
int i_out = (index / width_out) % height_out; int i_out = (index / width_out) % height_out;
int c_inp = (index / width_out) / height_out; int c_inp = (index / width_out) / height_out;
@ -14,17 +16,15 @@ __kernel void im2col(__global const float *im_src,
int i_inp = i_out * stride_h - pad_h; int i_inp = i_out * stride_h - pad_h;
int j_inp = j_out * stride_w - pad_w; int j_inp = j_out * stride_w - pad_w;
im_col += (c_out * height_out + i_out) * width_out + j_out; im_src += (c_inp * height_inp + i_inp) * width_inp + j_inp + im_src_offset / sizeof(float);
im_src += (c_inp * height_inp + i_inp) * width_inp + j_inp; im_col += (c_out * height_out + i_out) * width_out + j_out + im_col_offset / sizeof(float);
for (int ki = 0; ki < kernel_h; ++ki) for (int ki = 0; ki < kernel_h; ++ki)
for (int kj = 0; kj < kernel_w; ++kj) { for (int kj = 0; kj < kernel_w; ++kj) {
int i = i_inp + ki; int i = i_inp + ki;
int j = j_inp + kj; int j = j_inp + kj;
*im_col = (h >= 0 && w >= 0 && h < height_inp && w < width_inp) ? *im_col = (i >= 0 && j >= 0 && i < height_inp && j < width_inp) ?
im_src[i * width_inp + j] : 0; im_src[ki * width_inp + kj] : 0;
im_col += height_out * width_out; im_col += height_out * width_out;
} }
} }
}
Loading…
Cancel
Save