ocl fix for detection_output and prior_box layer

Signed-off-by: Li Peng <peng.li@intel.com>
pull/10854/head
Li Peng 7 years ago
parent 7474ad81d9
commit 00d2f34888
  1. 3
      modules/dnn/src/layers/detection_output_layer.cpp
  2. 10
      modules/dnn/src/layers/prior_box_layer.cpp
  3. 18
      modules/dnn/src/opencl/detection_output.cl

@ -249,7 +249,8 @@ public:
kernel.set(6, (int)num_loc_classes); kernel.set(6, (int)num_loc_classes);
kernel.set(7, (int)background_label_id); kernel.set(7, (int)background_label_id);
kernel.set(8, (int)clip); kernel.set(8, (int)clip);
kernel.set(9, ocl::KernelArg::PtrWriteOnly(outmat)); kernel.set(9, (int)_locPredTransposed);
kernel.set(10, ocl::KernelArg::PtrWriteOnly(outmat));
if (!kernel.run(1, &nthreads, NULL, false)) if (!kernel.run(1, &nthreads, NULL, false))
return false; return false;

@ -317,8 +317,18 @@ public:
variance.copyTo(umat_variance); variance.copyTo(umat_variance);
int real_numPriors = _numPriors >> (_offsetsX.size() - 1); int real_numPriors = _numPriors >> (_offsetsX.size() - 1);
if (_scales.empty())
{
_scales.resize(real_numPriors, 1.0f);
umat_scales = UMat(1, &real_numPriors, CV_32F, 1.0f); umat_scales = UMat(1, &real_numPriors, CV_32F, 1.0f);
} }
else
{
CV_Assert(_scales.size() == real_numPriors);
Mat scales(1, _scales.size(), CV_32FC1, &_scales[0]);
scales.copyTo(umat_scales);
}
}
size_t nthreads = _layerHeight * _layerWidth; size_t nthreads = _layerHeight * _layerWidth;

@ -51,6 +51,7 @@ __kernel void DecodeBBoxesCORNER(const int nthreads,
const int num_loc_classes, const int num_loc_classes,
const int background_label_id, const int background_label_id,
const int clip_bbox, const int clip_bbox,
const int locPredTransposed,
__global Dtype* bbox_data) __global Dtype* bbox_data)
{ {
for (int index = get_global_id(0); index < nthreads; index += get_global_size(0)) for (int index = get_global_id(0); index < nthreads; index += get_global_size(0))
@ -75,10 +76,18 @@ __kernel void DecodeBBoxesCORNER(const int nthreads,
bbox_vec = loc_vec * prior_variance; bbox_vec = loc_vec * prior_variance;
} }
if (locPredTransposed)
{
bbox_ymin = bbox_vec.x;
bbox_xmin = bbox_vec.y;
bbox_ymax = bbox_vec.z;
bbox_xmax = bbox_vec.w;
} else {
bbox_xmin = bbox_vec.x; bbox_xmin = bbox_vec.x;
bbox_ymin = bbox_vec.y; bbox_ymin = bbox_vec.y;
bbox_xmax = bbox_vec.z; bbox_xmax = bbox_vec.z;
bbox_ymax = bbox_vec.w; bbox_ymax = bbox_vec.w;
}
Dtype4 prior_vec = vload4(0, prior_data + p); Dtype4 prior_vec = vload4(0, prior_data + p);
Dtype val; Dtype val;
@ -114,6 +123,7 @@ __kernel void DecodeBBoxesCENTER_SIZE(const int nthreads,
const int num_loc_classes, const int num_loc_classes,
const int background_label_id, const int background_label_id,
const int clip_bbox, const int clip_bbox,
const int locPredTransposed,
__global Dtype* bbox_data) __global Dtype* bbox_data)
{ {
for (int index = get_global_id(0); index < nthreads; index += get_global_size(0)) for (int index = get_global_id(0); index < nthreads; index += get_global_size(0))
@ -138,10 +148,18 @@ __kernel void DecodeBBoxesCENTER_SIZE(const int nthreads,
bbox_vec = loc_vec * prior_variance; bbox_vec = loc_vec * prior_variance;
} }
if (locPredTransposed)
{
bbox_ymin = bbox_vec.x;
bbox_xmin = bbox_vec.y;
bbox_ymax = bbox_vec.z;
bbox_xmax = bbox_vec.w;
} else {
bbox_xmin = bbox_vec.x; bbox_xmin = bbox_vec.x;
bbox_ymin = bbox_vec.y; bbox_ymin = bbox_vec.y;
bbox_xmax = bbox_vec.z; bbox_xmax = bbox_vec.z;
bbox_ymax = bbox_vec.w; bbox_ymax = bbox_vec.w;
}
Dtype4 prior_vec = vload4(0, prior_data + p); Dtype4 prior_vec = vload4(0, prior_data + p);
Dtype prior_width = prior_vec.z - prior_vec.x; Dtype prior_width = prior_vec.z - prior_vec.x;

Loading…
Cancel
Save