From fd0941356627794efc43ceff5ce6116b871d369a Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Wed, 4 Mar 2020 22:28:04 +0300 Subject: [PATCH] Merge pull request #16731 from alalek:issue_16708 * imgproc(integral): avoid OOB access * imgproc(test): fix integral perf check - FP32 computation is not accurate * imgproc(integral): tune loop limits --- modules/imgproc/perf/perf_integral.cpp | 15 ++++++++++++++- modules/imgproc/src/sumpixels.simd.hpp | 18 +++++++++++++++--- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/modules/imgproc/perf/perf_integral.cpp b/modules/imgproc/perf/perf_integral.cpp index 59a5060937..2b1ab381e7 100644 --- a/modules/imgproc/perf/perf_integral.cpp +++ b/modules/imgproc/perf/perf_integral.cpp @@ -39,10 +39,23 @@ PERF_TEST_P(Size_MatType_OutMatDepth, integral, Mat sum(sz, sdepth); declare.in(src, WARMUP_RNG).out(sum); + if (sdepth == CV_32F) + src *= (1 << 23) / (double)(sz.area() * 256); // FP32 calculations are not accurate (mantissa is 23-bit) TEST_CYCLE() integral(src, sum, sdepth); - SANITY_CHECK(sum, 1e-6); + Mat src_roi; src(Rect(src.cols - 4, src.rows - 4, 4, 4)).convertTo(src_roi, sdepth); + Mat restored_src_roi = + sum(Rect(sum.cols - 4, sum.rows - 4, 4, 4)) + sum(Rect(sum.cols - 5, sum.rows - 5, 4, 4)) - + sum(Rect(sum.cols - 4, sum.rows - 5, 4, 4)) - sum(Rect(sum.cols - 5, sum.rows - 4, 4, 4)); + EXPECT_EQ(0, cvtest::norm(restored_src_roi, src_roi, NORM_INF)) + << src_roi << endl << restored_src_roi << endl + << sum(Rect(sum.cols - 4, sum.rows - 4, 4, 4)); + + if (sdepth == CV_32F) + SANITY_CHECK_NOTHING(); + else + SANITY_CHECK(sum, 1e-6); } PERF_TEST_P(Size_MatType_OutMatDepth, integral_sqsum, diff --git a/modules/imgproc/src/sumpixels.simd.hpp b/modules/imgproc/src/sumpixels.simd.hpp index 2ac02a0c3c..f5f3a92d85 100644 --- a/modules/imgproc/src/sumpixels.simd.hpp +++ b/modules/imgproc/src/sumpixels.simd.hpp @@ -237,7 +237,11 @@ struct Integral_SIMD v_int32 prev_1 = vx_setzero_s32(), prev_2 = vx_setzero_s32(), prev_3 = vx_setzero_s32(); int j = 0; - for ( ; j + v_uint16::nlanes * cn <= width; j += v_uint16::nlanes * cn) + const int j_max = + ((_srcstep * i + (width - v_uint16::nlanes * cn + v_uint8::nlanes * cn)) >= _srcstep * height) + ? width - v_uint8::nlanes * cn // uint8 in v_load_deinterleave() + : width - v_uint16::nlanes * cn; // v_expand_low + for ( ; j <= j_max; j += v_uint16::nlanes * cn) { v_uint8 v_src_row_1, v_src_row_2, v_src_row_3; v_load_deinterleave(src_row + j, v_src_row_1, v_src_row_2, v_src_row_3); @@ -546,7 +550,11 @@ struct Integral_SIMD v_float32 prev_1 = vx_setzero_f32(), prev_2 = vx_setzero_f32(), prev_3 = vx_setzero_f32(); int j = 0; - for (; j + v_uint16::nlanes * cn <= width; j += v_uint16::nlanes * cn) + const int j_max = + ((_srcstep * i + (width - v_uint16::nlanes * cn + v_uint8::nlanes * cn)) >= _srcstep * height) + ? width - v_uint8::nlanes * cn // uint8 in v_load_deinterleave() + : width - v_uint16::nlanes * cn; // v_expand_low + for ( ; j <= j_max; j += v_uint16::nlanes * cn) { v_uint8 v_src_row_1, v_src_row_2, v_src_row_3; v_load_deinterleave(src_row + j, v_src_row_1, v_src_row_2, v_src_row_3); @@ -896,7 +904,11 @@ struct Integral_SIMD v_float64 prev_1 = vx_setzero_f64(), prev_2 = vx_setzero_f64(), prev_3 = vx_setzero_f64(); int j = 0; - for (; j + v_uint16::nlanes * cn <= width; j += v_uint16::nlanes * cn) + const int j_max = + ((_srcstep * i + (width - v_uint16::nlanes * cn + v_uint8::nlanes * cn)) >= _srcstep * height) + ? width - v_uint8::nlanes * cn // uint8 in v_load_deinterleave() + : width - v_uint16::nlanes * cn; // v_expand_low + for ( ; j <= j_max; j += v_uint16::nlanes * cn) { v_uint8 v_src_row_1, v_src_row_2, v_src_row_3; v_load_deinterleave(src_row + j, v_src_row_1, v_src_row_2, v_src_row_3);