diff --git a/modules/dnn/src/opencl/ocl4dnn_lrn.cl b/modules/dnn/src/opencl/ocl4dnn_lrn.cl index 31c9f49451..22370c7303 100644 --- a/modules/dnn/src/opencl/ocl4dnn_lrn.cl +++ b/modules/dnn/src/opencl/ocl4dnn_lrn.cl @@ -64,36 +64,37 @@ __kernel void TEMPLATE(lrn_full_no_scale,Dtype)(const int nthreads, __global con const int step = height * width; __global const Dtype* in_off = in + offset; __global Dtype* out_off = out + offset; - KERNEL_ARG_DTYPE scale_val; int head = 0; const int pre_pad = (size - 1) / 2; const int post_pad = size - pre_pad - 1; - KERNEL_ARG_DTYPE accum_scale = 0; + float accum_scale = 0; // fill the scale at [n, :, h, w] // accumulate values while (head < post_pad && head < channels) { - accum_scale += in_off[head * step] * in_off[head * step]; + float v = in_off[head * step]; + accum_scale += v * v; ++head; } // both add and subtract while (head < channels) { - accum_scale += in_off[head * step] * in_off[head * step]; + float v = in_off[head * step]; + accum_scale += v * v; if (head - size >= 0) { - accum_scale -= in_off[(head - size) * step] - * in_off[(head - size) * step]; + v = in_off[(head - size) * step]; + accum_scale -= v * v; } - scale_val = k + accum_scale * alpha_over_size; - out_off[(head - post_pad) * step] = in_off[(head - post_pad) * step] * (Dtype)native_powr(scale_val, negative_beta); + float scale_val = k + accum_scale * alpha_over_size; + out_off[(head - post_pad) * step] = (Dtype)((float)in_off[(head - post_pad) * step] * native_powr(scale_val, negative_beta)); ++head; } // subtract only while (head < channels + post_pad) { if (head - size >= 0) { - accum_scale -= in_off[(head - size) * step] - * in_off[(head - size) * step]; + float v = in_off[(head - size) * step]; + accum_scale -= v * v; } - scale_val = k + accum_scale * alpha_over_size; - out_off[(head - post_pad) * step] = in_off[(head - post_pad) * step] * (Dtype)native_powr(scale_val, negative_beta); + float scale_val = k + accum_scale * alpha_over_size; + out_off[(head - post_pad) * step] = (Dtype)((float)in_off[(head - post_pad) * step] * native_powr(scale_val, negative_beta)); ++head; } } diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp index 91a68099ce..1f809f5b95 100644 --- a/modules/dnn/test/test_caffe_importer.cpp +++ b/modules/dnn/test/test_caffe_importer.cpp @@ -198,7 +198,7 @@ TEST_P(Reproducibility_AlexNet, Accuracy) ASSERT_EQ(inLayerShapes[0][3], 227); const float l1 = 1e-5; - const float lInf = (targetId == DNN_TARGET_OPENCL_FP16) ? 3e-3 : 1e-4; + const float lInf = (targetId == DNN_TARGET_OPENCL_FP16) ? 4e-3 : 1e-4; net.setPreferableBackend(DNN_BACKEND_OPENCV); net.setPreferableTarget(targetId);