Merge pull request #11851 from pengli:3.4

pull/11859/head
Alexander Alekhin 6 years ago
commit f40231af5d
  1. 2
      modules/dnn/src/dnn.cpp
  2. 1
      modules/dnn/src/layers/pooling_layer.cpp
  3. 3
      modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp
  4. 8
      modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp
  5. 115
      modules/dnn/src/opencl/ocl4dnn_pooling.cl

@ -1446,7 +1446,7 @@ struct Net::Impl
// TODO: OpenCL target support more fusion styles. // TODO: OpenCL target support more fusion styles.
if ( preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget) && if ( preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget) &&
(!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" && (!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" &&
ld.layerInstance->type != "MVN")) ) ld.layerInstance->type != "MVN" && ld.layerInstance->type != "Pooling")) )
continue; continue;
Ptr<Layer>& currLayer = ld.layerInstance; Ptr<Layer>& currLayer = ld.layerInstance;

@ -165,6 +165,7 @@ public:
(type == AVE ? LIBDNN_POOLING_METHOD_AVE : (type == AVE ? LIBDNN_POOLING_METHOD_AVE :
LIBDNN_POOLING_METHOD_STO); LIBDNN_POOLING_METHOD_STO);
config.avePoolPaddedArea = avePoolPaddedArea; config.avePoolPaddedArea = avePoolPaddedArea;
config.computeMaxIdx = computeMaxIdx;
config.use_half = use_half; config.use_half = use_half;
poolOp = Ptr<OCL4DNNPool<float> >(new OCL4DNNPool<float>(config)); poolOp = Ptr<OCL4DNNPool<float> >(new OCL4DNNPool<float>(config));
} }

@ -352,6 +352,7 @@ struct OCL4DNNPoolConfig
pool_method(LIBDNN_POOLING_METHOD_MAX), pool_method(LIBDNN_POOLING_METHOD_MAX),
global_pooling(false), global_pooling(false),
avePoolPaddedArea(true), avePoolPaddedArea(true),
computeMaxIdx(true),
use_half(false) use_half(false)
{} {}
MatShape in_shape; MatShape in_shape;
@ -365,6 +366,7 @@ struct OCL4DNNPoolConfig
ocl4dnnPoolingMethod_t pool_method; // = LIBDNN_POOLING_METHOD_MAX; ocl4dnnPoolingMethod_t pool_method; // = LIBDNN_POOLING_METHOD_MAX;
bool global_pooling; // = false; bool global_pooling; // = false;
bool avePoolPaddedArea; bool avePoolPaddedArea;
bool computeMaxIdx;
bool use_half; bool use_half;
}; };
@ -399,6 +401,7 @@ class OCL4DNNPool
int32_t pooled_height_; int32_t pooled_height_;
int32_t pooled_width_; int32_t pooled_width_;
bool avePoolPaddedArea; bool avePoolPaddedArea;
bool computeMaxIdx;
bool use_half; bool use_half;
}; };

@ -56,6 +56,7 @@ OCL4DNNPool<Dtype>::OCL4DNNPool(OCL4DNNPoolConfig config)
channels_ = config.channels; channels_ = config.channels;
pool_method_ = config.pool_method; pool_method_ = config.pool_method;
avePoolPaddedArea = config.avePoolPaddedArea; avePoolPaddedArea = config.avePoolPaddedArea;
computeMaxIdx = config.computeMaxIdx;
use_half = config.use_half; use_half = config.use_half;
for (int i = 0; i < spatial_dims; ++i) for (int i = 0; i < spatial_dims; ++i)
@ -97,7 +98,7 @@ bool OCL4DNNPool<Dtype>::Forward(const UMat& bottom,
UMat& top_mask) UMat& top_mask)
{ {
bool ret = true; bool ret = true;
size_t global[] = { 128 * 128 }; size_t global[] = { (size_t)count_ };
size_t local[] = { 128 }; size_t local[] = { 128 };
// support 2D case // support 2D case
@ -105,8 +106,7 @@ bool OCL4DNNPool<Dtype>::Forward(const UMat& bottom,
{ {
case LIBDNN_POOLING_METHOD_MAX: case LIBDNN_POOLING_METHOD_MAX:
{ {
bool haveMask = !top_mask.empty(); String kname = computeMaxIdx ? "max_pool_forward_mask" : "max_pool_forward";
String kname = haveMask ? "max_pool_forward_mask" : "max_pool_forward";
kname += (use_half) ? "_half" : "_float"; kname += (use_half) ? "_half" : "_float";
ocl::Kernel oclk_max_pool_forward( ocl::Kernel oclk_max_pool_forward(
kname.c_str(), kname.c_str(),
@ -118,7 +118,7 @@ bool OCL4DNNPool<Dtype>::Forward(const UMat& bottom,
kernel_w_, kernel_h_, kernel_w_, kernel_h_,
stride_w_, stride_h_, stride_w_, stride_h_,
pad_w_, pad_h_, pad_w_, pad_h_,
haveMask ? " -D HAVE_MASK=1" : "" computeMaxIdx ? " -D HAVE_MASK=1" : ""
)); ));
if (oclk_max_pool_forward.empty()) if (oclk_max_pool_forward.empty())

@ -65,36 +65,40 @@ __kernel void
#endif #endif
) )
{ {
for (int index = get_global_id(0); index < nthreads; int index = get_global_id(0);
index += get_global_size(0)) if (index >= nthreads)
return;
const int pw = index % pooled_width;
const int xx = index / pooled_width;
const int ph = xx % pooled_height;
const int ch = xx / pooled_height;
int hstart = ph * STRIDE_H - PAD_H;
int wstart = pw * STRIDE_W - PAD_W;
Dtype maxval = -FLT_MAX;
int maxidx = -1;
int in_offset = ch * height * width;
for (int h = 0; h < KERNEL_H; ++h)
{ {
const int pw = index % pooled_width; int off_y = hstart + h;
const int ph = (index / pooled_width) % pooled_height; if (off_y >= 0 && off_y < height)
const int c = (index / pooled_width / pooled_height) % channels; {
const int n = index / pooled_width / pooled_height / channels; for (int w = 0; w < KERNEL_W; ++w)
int hstart = ph * STRIDE_H - PAD_H; {
int wstart = pw * STRIDE_W - PAD_W; int off_x = wstart + w;
const int hend = min(hstart + KERNEL_H, height); if (off_x >= 0 && off_x < width)
const int wend = min(wstart + KERNEL_W, width); {
hstart = max(hstart, (int)0); Dtype val = bottom_data[in_offset + off_y * width + off_x];
wstart = max(wstart, (int)0); maxidx = (val > maxval) ? (off_y * width + off_x) : maxidx;
Dtype maxval = -FLT_MAX; maxval = fmax(val, maxval);
int maxidx = -1;
__global const Dtype* bottom_slice = bottom_data
+ (n * channels + c) * height * width;
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
if (bottom_slice[h * width + w] > maxval) {
maxidx = h * width + w;
maxval = bottom_slice[maxidx];
} }
} }
} }
top_data[index] = maxval; }
top_data[index] = maxval;
#ifdef HAVE_MASK #ifdef HAVE_MASK
mask[index] = maxidx; mask[index] = maxidx;
#endif #endif
}
} }
#elif defined KERNEL_AVE_POOL #elif defined KERNEL_AVE_POOL
@ -105,43 +109,42 @@ __kernel void TEMPLATE(ave_pool_forward, Dtype)(
const int pooled_height, const int pooled_width, const int pooled_height, const int pooled_width,
__global Dtype* top_data) __global Dtype* top_data)
{ {
for (int index = get_global_id(0); index < nthreads; int index = get_global_id(0);
index += get_global_size(0)) if (index >= nthreads)
{ return;
{
const int pw = index % pooled_width; const int pw = index % pooled_width;
const int ph = (index / pooled_width) % pooled_height; const int xx = index / pooled_width;
const int c = (index / pooled_width / pooled_height) % channels; const int ph = xx % pooled_height;
const int n = index / pooled_width / pooled_height / channels; const int ch = xx / pooled_height;
int hstart = ph * STRIDE_H - PAD_H; int hstart = ph * STRIDE_H - PAD_H;
int wstart = pw * STRIDE_W - PAD_W; int wstart = pw * STRIDE_W - PAD_W;
int hend = min(hstart + KERNEL_H, height + PAD_H); int hend = min(hstart + KERNEL_H, height + PAD_H);
int wend = min(wstart + KERNEL_W, width + PAD_W); int wend = min(wstart + KERNEL_W, width + PAD_W);
int pool_size; int pool_size;
#ifdef AVE_POOL_PADDING_AREA #ifdef AVE_POOL_PADDING_AREA
pool_size = (hend - hstart) * (wend - wstart); pool_size = (hend - hstart) * (wend - wstart);
hstart = max(hstart, (int)0); hstart = max(hstart, (int)0);
wstart = max(wstart, (int)0); wstart = max(wstart, (int)0);
hend = min(hend, height); hend = min(hend, height);
wend = min(wend, width); wend = min(wend, width);
#else #else
hstart = max(hstart, (int)0); hstart = max(hstart, (int)0);
wstart = max(wstart, (int)0); wstart = max(wstart, (int)0);
hend = min(hend, height); hend = min(hend, height);
wend = min(wend, width); wend = min(wend, width);
pool_size = (hend - hstart) * (wend - wstart); pool_size = (hend - hstart) * (wend - wstart);
#endif #endif
Dtype aveval = 0; Dtype aveval = 0;
__global const Dtype* bottom_slice = bottom_data int in_offset = ch * height * width;
+ (n * channels + c) * height * width; for (int h = hstart; h < hend; ++h)
for (int h = hstart; h < hend; ++h) { {
for (int w = wstart; w < wend; ++w) { for (int w = wstart; w < wend; ++w)
aveval += bottom_slice[h * width + w]; {
} aveval += bottom_data[in_offset + h * width + w];
}
top_data[index] = aveval / pool_size;
} }
} }
top_data[index] = aveval / pool_size;
} }
#elif defined KERNEL_STO_POOL #elif defined KERNEL_STO_POOL

Loading…
Cancel
Save