Merge pull request #21025 from alalek:issue_21004

* dnn(ocl4dnn): fix LRN layer accuracy problems

- FP16 intermediate computation is not accurate and may provide NaN values

* dnn(test): update tolerance for FP16
pull/21051/head^2
Alexander Alekhin 3 years ago committed by GitHub
parent cb286a66be
commit 8041ab8a61
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 25
      modules/dnn/src/opencl/ocl4dnn_lrn.cl
  2. 2
      modules/dnn/test/test_caffe_importer.cpp

@ -64,36 +64,37 @@ __kernel void TEMPLATE(lrn_full_no_scale,Dtype)(const int nthreads, __global con
const int step = height * width;
__global const Dtype* in_off = in + offset;
__global Dtype* out_off = out + offset;
KERNEL_ARG_DTYPE scale_val;
int head = 0;
const int pre_pad = (size - 1) / 2;
const int post_pad = size - pre_pad - 1;
KERNEL_ARG_DTYPE accum_scale = 0;
float accum_scale = 0;
// fill the scale at [n, :, h, w]
// accumulate values
while (head < post_pad && head < channels) {
accum_scale += in_off[head * step] * in_off[head * step];
float v = in_off[head * step];
accum_scale += v * v;
++head;
}
// both add and subtract
while (head < channels) {
accum_scale += in_off[head * step] * in_off[head * step];
float v = in_off[head * step];
accum_scale += v * v;
if (head - size >= 0) {
accum_scale -= in_off[(head - size) * step]
* in_off[(head - size) * step];
v = in_off[(head - size) * step];
accum_scale -= v * v;
}
scale_val = k + accum_scale * alpha_over_size;
out_off[(head - post_pad) * step] = in_off[(head - post_pad) * step] * (Dtype)native_powr(scale_val, negative_beta);
float scale_val = k + accum_scale * alpha_over_size;
out_off[(head - post_pad) * step] = (Dtype)((float)in_off[(head - post_pad) * step] * native_powr(scale_val, negative_beta));
++head;
}
// subtract only
while (head < channels + post_pad) {
if (head - size >= 0) {
accum_scale -= in_off[(head - size) * step]
* in_off[(head - size) * step];
float v = in_off[(head - size) * step];
accum_scale -= v * v;
}
scale_val = k + accum_scale * alpha_over_size;
out_off[(head - post_pad) * step] = in_off[(head - post_pad) * step] * (Dtype)native_powr(scale_val, negative_beta);
float scale_val = k + accum_scale * alpha_over_size;
out_off[(head - post_pad) * step] = (Dtype)((float)in_off[(head - post_pad) * step] * native_powr(scale_val, negative_beta));
++head;
}
}

@ -198,7 +198,7 @@ TEST_P(Reproducibility_AlexNet, Accuracy)
ASSERT_EQ(inLayerShapes[0][3], 227);
const float l1 = 1e-5;
const float lInf = (targetId == DNN_TARGET_OPENCL_FP16) ? 3e-3 : 1e-4;
const float lInf = (targetId == DNN_TARGET_OPENCL_FP16) ? 4e-3 : 1e-4;
net.setPreferableBackend(DNN_BACKEND_OPENCV);
net.setPreferableTarget(targetId);

Loading…
Cancel
Save