Merge pull request #16731 from alalek:issue_16708

* imgproc(integral): avoid OOB access * imgproc(test): fix integral perf check - FP32 computation is not accurate * imgproc(integral): tune loop limits
5 years ago · fd09413566
parent da6ad1c640
commit fd09413566
2 changed files with 29 additions and 4 deletions
--- a/modules/imgproc/perf/perf_integral.cpp
+++ b/modules/imgproc/perf/perf_integral.cpp
@ -39,9 +39,22 @@ PERF_TEST_P(Size_MatType_OutMatDepth, integral,
    Mat sum(sz, sdepth);

    declare.in(src, WARMUP_RNG).out(sum);
+    if (sdepth == CV_32F)
+        src *= (1 << 23) / (double)(sz.area() * 256);  // FP32 calculations are not accurate (mantissa is 23-bit)

    TEST_CYCLE() integral(src, sum, sdepth);

+    Mat src_roi; src(Rect(src.cols - 4, src.rows - 4, 4, 4)).convertTo(src_roi, sdepth);
+    Mat restored_src_roi =
+           sum(Rect(sum.cols - 4, sum.rows - 4, 4, 4)) + sum(Rect(sum.cols - 5, sum.rows - 5, 4, 4)) -
+           sum(Rect(sum.cols - 4, sum.rows - 5, 4, 4)) - sum(Rect(sum.cols - 5, sum.rows - 4, 4, 4));
+    EXPECT_EQ(0, cvtest::norm(restored_src_roi, src_roi, NORM_INF))
+        << src_roi << endl << restored_src_roi << endl
+        << sum(Rect(sum.cols - 4, sum.rows - 4, 4, 4));
+
+    if (sdepth == CV_32F)
+        SANITY_CHECK_NOTHING();
+    else
        SANITY_CHECK(sum, 1e-6);
 }

--- a/modules/imgproc/src/sumpixels.simd.hpp
+++ b/modules/imgproc/src/sumpixels.simd.hpp
@ -237,7 +237,11 @@ struct Integral_SIMD<uchar, int, double>
                v_int32 prev_1 = vx_setzero_s32(), prev_2 = vx_setzero_s32(),
                        prev_3 = vx_setzero_s32();
                int j = 0;
-                for ( ; j + v_uint16::nlanes * cn <= width; j += v_uint16::nlanes * cn)
+                const int j_max =
+                        ((_srcstep * i + (width - v_uint16::nlanes * cn + v_uint8::nlanes * cn)) >= _srcstep * height)
+                        ? width - v_uint8::nlanes * cn    // uint8 in v_load_deinterleave()
+                        : width - v_uint16::nlanes * cn;  // v_expand_low
+                for ( ; j <= j_max; j += v_uint16::nlanes * cn)
                {
                    v_uint8 v_src_row_1, v_src_row_2, v_src_row_3;
                    v_load_deinterleave(src_row + j, v_src_row_1, v_src_row_2, v_src_row_3);
@ -546,7 +550,11 @@ struct Integral_SIMD<uchar, float, double>
                v_float32 prev_1 = vx_setzero_f32(), prev_2 = vx_setzero_f32(),
                          prev_3 = vx_setzero_f32();
                int j = 0;
-                for (; j + v_uint16::nlanes * cn <= width; j += v_uint16::nlanes * cn)
+                const int j_max =
+                        ((_srcstep * i + (width - v_uint16::nlanes * cn + v_uint8::nlanes * cn)) >= _srcstep * height)
+                        ? width - v_uint8::nlanes * cn    // uint8 in v_load_deinterleave()
+                        : width - v_uint16::nlanes * cn;  // v_expand_low
+                for ( ; j <= j_max; j += v_uint16::nlanes * cn)
                {
                    v_uint8 v_src_row_1, v_src_row_2, v_src_row_3;
                    v_load_deinterleave(src_row + j, v_src_row_1, v_src_row_2, v_src_row_3);
@ -896,7 +904,11 @@ struct Integral_SIMD<uchar, double, double>
                v_float64 prev_1 = vx_setzero_f64(), prev_2 = vx_setzero_f64(),
                          prev_3 = vx_setzero_f64();
                int j = 0;
-                for (; j + v_uint16::nlanes * cn <= width; j += v_uint16::nlanes * cn)
+                const int j_max =
+                        ((_srcstep * i + (width - v_uint16::nlanes * cn + v_uint8::nlanes * cn)) >= _srcstep * height)
+                        ? width - v_uint8::nlanes * cn    // uint8 in v_load_deinterleave()
+                        : width - v_uint16::nlanes * cn;  // v_expand_low
+                for ( ; j <= j_max; j += v_uint16::nlanes * cn)
                {
                    v_uint8 v_src_row_1, v_src_row_2, v_src_row_3;
                    v_load_deinterleave(src_row + j, v_src_row_1, v_src_row_2, v_src_row_3);