diff --git a/modules/imgproc/src/pyramids.cpp b/modules/imgproc/src/pyramids.cpp index 0261dd4de3..a5f8a71c2c 100644 --- a/modules/imgproc/src/pyramids.cpp +++ b/modules/imgproc/src/pyramids.cpp @@ -82,6 +82,8 @@ template int PyrDownVecV(T1**, T2*, int) { return 0; } template int PyrUpVecV(T1**, T2**, int) { return 0; } +template int PyrUpVecVOneRow(T1**, T2*, int) { return 0; } + #if CV_SIMD template<> int PyrDownVecH(const uchar* src, int* row, int width) @@ -717,6 +719,120 @@ template <> int PyrUpVecV(float** src, float** dst, int width) return x; } +template <> int PyrUpVecVOneRow(int** src, uchar* dst, int width) +{ + int x = 0; + const int *row0 = src[0], *row1 = src[1], *row2 = src[2]; + + for( ; x <= width - v_uint8::nlanes; x += v_uint8::nlanes) + { + v_int16 v_r00 = v_pack(vx_load(row0 + x), vx_load(row0 + x + v_int32::nlanes)), + v_r01 = v_pack(vx_load(row0 + x + 2 * v_int32::nlanes), vx_load(row0 + x + 3 * v_int32::nlanes)), + v_r10 = v_pack(vx_load(row1 + x), vx_load(row1 + x + v_int32::nlanes)), + v_r11 = v_pack(vx_load(row1 + x + 2 * v_int32::nlanes), vx_load(row1 + x + 3 * v_int32::nlanes)), + v_r20 = v_pack(vx_load(row2 + x), vx_load(row2 + x + v_int32::nlanes)), + v_r21 = v_pack(vx_load(row2 + x + 2 * v_int32::nlanes), vx_load(row2 + x + 3 * v_int32::nlanes)); + v_int16 v_2r10 = v_r10 + v_r10, v_2r11 = (v_r11 + v_r11); + v_store(dst + x, v_rshr_pack_u<6>(v_r00 + v_r20 + (v_2r10 + v_2r10 + v_2r10), v_r01 + v_r21 + (v_2r11 + v_2r11 + v_2r11))); + } + if(x <= width - v_uint16::nlanes) + { + v_int16 v_r00 = v_pack(vx_load(row0 + x), vx_load(row0 + x + v_int32::nlanes)), + v_r10 = v_pack(vx_load(row1 + x), vx_load(row1 + x + v_int32::nlanes)), + v_r20 = v_pack(vx_load(row2 + x), vx_load(row2 + x + v_int32::nlanes)); + v_int16 v_2r10 = v_r10 + v_r10; + v_rshr_pack_u_store<6>(dst + x, v_r00 + v_r20 + (v_2r10 + v_2r10 + v_2r10)); + x += v_uint16::nlanes; + } + typedef int CV_DECL_ALIGNED(1) unaligned_int; + for (; x <= width - v_int32x4::nlanes; x += v_int32x4::nlanes) + { + v_int32 v_r00 = vx_load(row0 + x), + v_r10 = vx_load(row1 + x), + v_r20 = vx_load(row2 + x); + v_int32 v_2r10 = v_r10 + v_r10; + v_int16 d = v_pack(v_r00 + v_r20 + (v_2r10 + v_2r10 + v_2r10), (v_r10 + v_r20) << 2); + *(unaligned_int*)(dst + x) = v_reinterpret_as_s32(v_rshr_pack_u<6>(d, vx_setzero_s16())).get0(); + } + vx_cleanup(); + + return x; +} + +template <> int PyrUpVecVOneRow(int** src, short* dst, int width) +{ + int x = 0; + const int *row0 = src[0], *row1 = src[1], *row2 = src[2]; + + for( ; x <= width - v_int16::nlanes; x += v_int16::nlanes) + { + v_int32 v_r00 = vx_load(row0 + x), + v_r01 = vx_load(row0 + x + v_int32::nlanes), + v_r10 = vx_load(row1 + x), + v_r11 = vx_load(row1 + x + v_int32::nlanes), + v_r20 = vx_load(row2 + x), + v_r21 = vx_load(row2 + x + v_int32::nlanes); + v_store(dst + x, v_rshr_pack<6>(v_r00 + v_r20 + ((v_r10 << 1) + (v_r10 << 2)), v_r01 + v_r21 + ((v_r11 << 1) + (v_r11 << 2)))); + } + if(x <= width - v_int32::nlanes) + { + v_int32 v_r00 = vx_load(row0 + x), + v_r10 = vx_load(row1 + x), + v_r20 = vx_load(row2 + x); + v_rshr_pack_store<6>(dst + x, v_r00 + v_r20 + ((v_r10 << 1) + (v_r10 << 2))); + x += v_int32::nlanes; + } + vx_cleanup(); + + return x; +} + +template <> int PyrUpVecVOneRow(int** src, ushort* dst, int width) +{ + int x = 0; + const int *row0 = src[0], *row1 = src[1], *row2 = src[2]; + + for( ; x <= width - v_uint16::nlanes; x += v_uint16::nlanes) + { + v_int32 v_r00 = vx_load(row0 + x), + v_r01 = vx_load(row0 + x + v_int32::nlanes), + v_r10 = vx_load(row1 + x), + v_r11 = vx_load(row1 + x + v_int32::nlanes), + v_r20 = vx_load(row2 + x), + v_r21 = vx_load(row2 + x + v_int32::nlanes); + v_store(dst + x, v_rshr_pack_u<6>(v_r00 + v_r20 + ((v_r10 << 1) + (v_r10 << 2)), v_r01 + v_r21 + ((v_r11 << 1) + (v_r11 << 2)))); + } + if(x <= width - v_int32::nlanes) + { + v_int32 v_r00 = vx_load(row0 + x), + v_r10 = vx_load(row1 + x), + v_r20 = vx_load(row2 + x); + v_rshr_pack_u_store<6>(dst + x, v_r00 + v_r20 + ((v_r10 << 1) + (v_r10 << 2))); + x += v_int32::nlanes; + } + vx_cleanup(); + + return x; +} + +template <> int PyrUpVecVOneRow(float** src, float* dst, int width) +{ + int x = 0; + const float *row0 = src[0], *row1 = src[1], *row2 = src[2]; + + v_float32 v_6 = vx_setall_f32(6.0f), v_scale = vx_setall_f32(1.f/64.f); + for( ; x <= width - v_float32::nlanes; x += v_float32::nlanes) + { + v_float32 v_r0 = vx_load(row0 + x), + v_r1 = vx_load(row1 + x), + v_r2 = vx_load(row2 + x); + v_store(dst + x, v_scale * (v_muladd(v_6, v_r1, v_r0) + v_r2)); + } + vx_cleanup(); + + return x; +} + #endif template @@ -963,7 +1079,7 @@ pyrUp_( const Mat& _src, Mat& _dst, int) if (dsize.width > ssize.width*2) { - row[(_dst.cols-1) * cn + x] = row[dx + cn]; + row[(_dst.cols-1) + x] = row[dx + cn]; } } @@ -983,12 +1099,24 @@ pyrUp_( const Mat& _src, Mat& _dst, int) row0 = rows[0]; row1 = rows[1]; row2 = rows[2]; dsts[0] = dst0; dsts[1] = dst1; - x = PyrUpVecV(rows, dsts, dsize.width); - for( ; x < dsize.width; x++ ) + if (dst0 != dst1) { - T t1 = castOp((row1[x] + row2[x])*4); - T t0 = castOp(row0[x] + row1[x]*6 + row2[x]); - dst1[x] = t1; dst0[x] = t0; + x = PyrUpVecV(rows, dsts, dsize.width); + for( ; x < dsize.width; x++ ) + { + T t1 = castOp((row1[x] + row2[x])*4); + T t0 = castOp(row0[x] + row1[x]*6 + row2[x]); + dst1[x] = t1; dst0[x] = t0; + } + } + else + { + x = PyrUpVecVOneRow(rows, dst0, dsize.width); + for( ; x < dsize.width; x++ ) + { + T t0 = castOp(row0[x] + row1[x]*6 + row2[x]); + dst0[x] = t0; + } } } diff --git a/modules/imgproc/test/test_pyramid.cpp b/modules/imgproc/test/test_pyramid.cpp index 343d7a2321..e02e5e343d 100644 --- a/modules/imgproc/test/test_pyramid.cpp +++ b/modules/imgproc/test/test_pyramid.cpp @@ -8,12 +8,41 @@ namespace opencv_test { namespace { TEST(Imgproc_PyrUp, pyrUp_regression_22184) { - Mat src(100, 100, CV_16UC3, Scalar::all(255)); - Mat dst(100 * 2 + 1, 100 * 2 + 1, CV_16UC3, Scalar::all(0)); + Mat src(100,100,CV_16UC3,Scalar(255,255,255)); + Mat dst(100 * 2 + 1, 100 * 2 + 1, CV_16UC3, Scalar(0,0,0)); pyrUp(src, dst, Size(dst.cols, dst.rows)); - double min_val = 0; + double min_val; minMaxLoc(dst, &min_val); ASSERT_GT(cvRound(min_val), 0); } -}} // namespace +TEST(Imgproc_PyrUp, pyrUp_regression_22194) +{ + Mat src(13, 13,CV_16UC3,Scalar(0,0,0)); + { + int swidth = src.cols; + int sheight = src.rows; + int cn = src.channels(); + int count = 0; + for (int y = 0; y < sheight; y++) + { + ushort *src_c = src.ptr(y); + for (int x = 0; x < swidth * cn; x++) + { + src_c[x] = (count++) % 10; + } + } + } + Mat dst(src.cols * 2 - 1, src.rows * 2 - 1, CV_16UC3, Scalar(0,0,0)); + pyrUp(src, dst, Size(dst.cols, dst.rows)); + + { + ushort *dst_c = dst.ptr(dst.rows - 1); + ASSERT_EQ(dst_c[0], 6); + ASSERT_EQ(dst_c[1], 6); + ASSERT_EQ(dst_c[2], 1); + } +} + +} +}