Merge pull request #11851 from pengli:3.4

pull/11859/head
Alexander Alekhin 6 years ago
commit f40231af5d
  1. 2
      modules/dnn/src/dnn.cpp
  2. 1
      modules/dnn/src/layers/pooling_layer.cpp
  3. 3
      modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp
  4. 8
      modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp
  5. 115
      modules/dnn/src/opencl/ocl4dnn_pooling.cl

@ -1446,7 +1446,7 @@ struct Net::Impl
// TODO: OpenCL target support more fusion styles.
if ( preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget) &&
(!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" &&
ld.layerInstance->type != "MVN")) )
ld.layerInstance->type != "MVN" && ld.layerInstance->type != "Pooling")) )
continue;
Ptr<Layer>& currLayer = ld.layerInstance;

@ -165,6 +165,7 @@ public:
(type == AVE ? LIBDNN_POOLING_METHOD_AVE :
LIBDNN_POOLING_METHOD_STO);
config.avePoolPaddedArea = avePoolPaddedArea;
config.computeMaxIdx = computeMaxIdx;
config.use_half = use_half;
poolOp = Ptr<OCL4DNNPool<float> >(new OCL4DNNPool<float>(config));
}

@ -352,6 +352,7 @@ struct OCL4DNNPoolConfig
pool_method(LIBDNN_POOLING_METHOD_MAX),
global_pooling(false),
avePoolPaddedArea(true),
computeMaxIdx(true),
use_half(false)
{}
MatShape in_shape;
@ -365,6 +366,7 @@ struct OCL4DNNPoolConfig
ocl4dnnPoolingMethod_t pool_method; // = LIBDNN_POOLING_METHOD_MAX;
bool global_pooling; // = false;
bool avePoolPaddedArea;
bool computeMaxIdx;
bool use_half;
};
@ -399,6 +401,7 @@ class OCL4DNNPool
int32_t pooled_height_;
int32_t pooled_width_;
bool avePoolPaddedArea;
bool computeMaxIdx;
bool use_half;
};

@ -56,6 +56,7 @@ OCL4DNNPool<Dtype>::OCL4DNNPool(OCL4DNNPoolConfig config)
channels_ = config.channels;
pool_method_ = config.pool_method;
avePoolPaddedArea = config.avePoolPaddedArea;
computeMaxIdx = config.computeMaxIdx;
use_half = config.use_half;
for (int i = 0; i < spatial_dims; ++i)
@ -97,7 +98,7 @@ bool OCL4DNNPool<Dtype>::Forward(const UMat& bottom,
UMat& top_mask)
{
bool ret = true;
size_t global[] = { 128 * 128 };
size_t global[] = { (size_t)count_ };
size_t local[] = { 128 };
// support 2D case
@ -105,8 +106,7 @@ bool OCL4DNNPool<Dtype>::Forward(const UMat& bottom,
{
case LIBDNN_POOLING_METHOD_MAX:
{
bool haveMask = !top_mask.empty();
String kname = haveMask ? "max_pool_forward_mask" : "max_pool_forward";
String kname = computeMaxIdx ? "max_pool_forward_mask" : "max_pool_forward";
kname += (use_half) ? "_half" : "_float";
ocl::Kernel oclk_max_pool_forward(
kname.c_str(),
@ -118,7 +118,7 @@ bool OCL4DNNPool<Dtype>::Forward(const UMat& bottom,
kernel_w_, kernel_h_,
stride_w_, stride_h_,
pad_w_, pad_h_,
haveMask ? " -D HAVE_MASK=1" : ""
computeMaxIdx ? " -D HAVE_MASK=1" : ""
));
if (oclk_max_pool_forward.empty())

@ -65,36 +65,40 @@ __kernel void
#endif
)
{
for (int index = get_global_id(0); index < nthreads;
index += get_global_size(0))
int index = get_global_id(0);
if (index >= nthreads)
return;
const int pw = index % pooled_width;
const int xx = index / pooled_width;
const int ph = xx % pooled_height;
const int ch = xx / pooled_height;
int hstart = ph * STRIDE_H - PAD_H;
int wstart = pw * STRIDE_W - PAD_W;
Dtype maxval = -FLT_MAX;
int maxidx = -1;
int in_offset = ch * height * width;
for (int h = 0; h < KERNEL_H; ++h)
{
const int pw = index % pooled_width;
const int ph = (index / pooled_width) % pooled_height;
const int c = (index / pooled_width / pooled_height) % channels;
const int n = index / pooled_width / pooled_height / channels;
int hstart = ph * STRIDE_H - PAD_H;
int wstart = pw * STRIDE_W - PAD_W;
const int hend = min(hstart + KERNEL_H, height);
const int wend = min(wstart + KERNEL_W, width);
hstart = max(hstart, (int)0);
wstart = max(wstart, (int)0);
Dtype maxval = -FLT_MAX;
int maxidx = -1;
__global const Dtype* bottom_slice = bottom_data
+ (n * channels + c) * height * width;
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
if (bottom_slice[h * width + w] > maxval) {
maxidx = h * width + w;
maxval = bottom_slice[maxidx];
int off_y = hstart + h;
if (off_y >= 0 && off_y < height)
{
for (int w = 0; w < KERNEL_W; ++w)
{
int off_x = wstart + w;
if (off_x >= 0 && off_x < width)
{
Dtype val = bottom_data[in_offset + off_y * width + off_x];
maxidx = (val > maxval) ? (off_y * width + off_x) : maxidx;
maxval = fmax(val, maxval);
}
}
}
top_data[index] = maxval;
}
top_data[index] = maxval;
#ifdef HAVE_MASK
mask[index] = maxidx;
mask[index] = maxidx;
#endif
}
}
#elif defined KERNEL_AVE_POOL
@ -105,43 +109,42 @@ __kernel void TEMPLATE(ave_pool_forward, Dtype)(
const int pooled_height, const int pooled_width,
__global Dtype* top_data)
{
for (int index = get_global_id(0); index < nthreads;
index += get_global_size(0))
{
{
const int pw = index % pooled_width;
const int ph = (index / pooled_width) % pooled_height;
const int c = (index / pooled_width / pooled_height) % channels;
const int n = index / pooled_width / pooled_height / channels;
int hstart = ph * STRIDE_H - PAD_H;
int wstart = pw * STRIDE_W - PAD_W;
int hend = min(hstart + KERNEL_H, height + PAD_H);
int wend = min(wstart + KERNEL_W, width + PAD_W);
int pool_size;
int index = get_global_id(0);
if (index >= nthreads)
return;
const int pw = index % pooled_width;
const int xx = index / pooled_width;
const int ph = xx % pooled_height;
const int ch = xx / pooled_height;
int hstart = ph * STRIDE_H - PAD_H;
int wstart = pw * STRIDE_W - PAD_W;
int hend = min(hstart + KERNEL_H, height + PAD_H);
int wend = min(wstart + KERNEL_W, width + PAD_W);
int pool_size;
#ifdef AVE_POOL_PADDING_AREA
pool_size = (hend - hstart) * (wend - wstart);
hstart = max(hstart, (int)0);
wstart = max(wstart, (int)0);
hend = min(hend, height);
wend = min(wend, width);
pool_size = (hend - hstart) * (wend - wstart);
hstart = max(hstart, (int)0);
wstart = max(wstart, (int)0);
hend = min(hend, height);
wend = min(wend, width);
#else
hstart = max(hstart, (int)0);
wstart = max(wstart, (int)0);
hend = min(hend, height);
wend = min(wend, width);
pool_size = (hend - hstart) * (wend - wstart);
hstart = max(hstart, (int)0);
wstart = max(wstart, (int)0);
hend = min(hend, height);
wend = min(wend, width);
pool_size = (hend - hstart) * (wend - wstart);
#endif
Dtype aveval = 0;
__global const Dtype* bottom_slice = bottom_data
+ (n * channels + c) * height * width;
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
aveval += bottom_slice[h * width + w];
}
}
top_data[index] = aveval / pool_size;
Dtype aveval = 0;
int in_offset = ch * height * width;
for (int h = hstart; h < hend; ++h)
{
for (int w = wstart; w < wend; ++w)
{
aveval += bottom_data[in_offset + h * width + w];
}
}
top_data[index] = aveval / pool_size;
}
#elif defined KERNEL_STO_POOL

Loading…
Cancel
Save