From 1b01e1fe6830cb485c0a82d545c362f623c798d2 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Sun, 12 Oct 2014 10:45:46 -0700 Subject: [PATCH] cv::resize (INTER_AREA CV_16S, CV_32F) --- modules/imgproc/src/imgwarp.cpp | 112 +++++++++++++++++++++++++- modules/imgproc/test/test_imgwarp.cpp | 43 +++++++--- 2 files changed, 144 insertions(+), 11 deletions(-) diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp index b8833a9b81..c19707d76a 100644 --- a/modules/imgproc/src/imgwarp.cpp +++ b/modules/imgproc/src/imgwarp.cpp @@ -1645,6 +1645,107 @@ private: int cn, step; }; +class ResizeAreaFastVec_SIMD_16s +{ +public: + ResizeAreaFastVec_SIMD_16s(int _cn, int _step) : + cn(_cn), step(_step) + { + } + + int operator() (const short * S, short * D, int w) const + { + int dx = 0; + const short * S0 = S, * S1 = (const short *)((const uchar *)(S0) + step); + + int32x4_t v_2 = vdupq_n_s32(2); + + if (cn == 1) + { + for ( ; dx <= w - 8; dx += 8, S0 += 16, S1 += 16, D += 8) + { + int16x8x2_t v_row0 = vld2q_s16(S0), v_row1 = vld2q_s16(S1); + + int32x4_t v_dst0 = vaddl_s16(vget_low_s16(v_row0.val[0]), vget_low_s16(v_row0.val[1])); + v_dst0 = vaddq_s32(v_dst0, vaddl_s16(vget_low_s16(v_row1.val[0]), vget_low_s16(v_row1.val[1]))); + v_dst0 = vshrq_n_s32(vaddq_s32(v_dst0, v_2), 2); + + int32x4_t v_dst1 = vaddl_s16(vget_high_s16(v_row0.val[0]), vget_high_s16(v_row0.val[1])); + v_dst1 = vaddq_s32(v_dst1, vaddl_s16(vget_high_s16(v_row1.val[0]), vget_high_s16(v_row1.val[1]))); + v_dst1 = vshrq_n_s32(vaddq_s32(v_dst1, v_2), 2); + + vst1q_s16(D, vcombine_s16(vmovn_s32(v_dst0), vmovn_s32(v_dst1))); + } + } + else if (cn == 4) + { + for ( ; dx <= w - 4; dx += 4, S0 += 8, S1 += 8, D += 4) + { + int16x8_t v_row0 = vld1q_s16(S0), v_row1 = vld1q_s16(S1); + int32x4_t v_dst = vaddq_s32(vaddl_s16(vget_low_s16(v_row0), vget_high_s16(v_row0)), + vaddl_s16(vget_low_s16(v_row1), vget_high_s16(v_row1))); + vst1_s16(D, vmovn_s32(vshrq_n_s32(vaddq_s32(v_dst, v_2), 2))); + } + } + + return dx; + } + +private: + int cn, step; +}; + +struct ResizeAreaFastVec_SIMD_32f +{ + ResizeAreaFastVec_SIMD_32f(int _scale_x, int _scale_y, int _cn, int _step) : + scale_x(_scale_x), scale_y(_scale_y), cn(_cn), step(_step) + { + fast_mode = scale_x == 2 && scale_y == 2 && (cn == 1 || cn == 3 || cn == 4); + } + + int operator() (const float * S, float * D, int w) const + { + if (!fast_mode) + return 0; + + const float * S0 = S, * S1 = (const float *)((const uchar *)(S0) + step); + int dx = 0; + + float32x4_t v_025 = vdupq_n_f32(0.25f); + + if (cn == 1) + { + for ( ; dx <= w - 4; dx += 4, S0 += 8, S1 += 8, D += 4) + { + float32x4x2_t v_row0 = vld2q_f32(S0), v_row1 = vld2q_f32(S1); + + float32x4_t v_dst0 = vaddq_f32(v_row0.val[0], v_row0.val[1]); + float32x4_t v_dst1 = vaddq_f32(v_row1.val[0], v_row1.val[1]); + + vst1q_f32(D, vmulq_f32(vaddq_f32(v_dst0, v_dst1), v_025)); + } + } + else if (cn == 4) + { + for ( ; dx <= w - 4; dx += 4, S0 += 8, S1 += 8, D += 4) + { + float32x4_t v_dst0 = vaddq_f32(vld1q_f32(S0), vld1q_f32(S0 + 4)); + float32x4_t v_dst1 = vaddq_f32(vld1q_f32(S1), vld1q_f32(S1 + 4)); + + vst1q_f32(D, vmulq_f32(vaddq_f32(v_dst0, v_dst1), v_025)); + } + } + + return dx; + } + +private: + int scale_x, scale_y; + int cn; + bool fast_mode; + int step; +}; + #elif CV_SSE2 class ResizeAreaFastVec_SIMD_8u @@ -1834,9 +1935,16 @@ private: bool use_simd; }; +typedef ResizeAreaFastNoVec ResizeAreaFastVec_SIMD_16s; +typedef ResizeAreaFastNoVec ResizeAreaFastVec_SIMD_32f; + #else + typedef ResizeAreaFastNoVec ResizeAreaFastVec_SIMD_8u; typedef ResizeAreaFastNoVec ResizeAreaFastVec_SIMD_16u; +typedef ResizeAreaFastNoVec ResizeAreaFastVec_SIMD_16s; +typedef ResizeAreaFastNoVec ResizeAreaFastVec_SIMD_32f; + #endif template @@ -2679,9 +2787,9 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize, resizeAreaFast_ >, 0, resizeAreaFast_ >, - resizeAreaFast_ > >, + resizeAreaFast_ >, 0, - resizeAreaFast_ >, + resizeAreaFast_, resizeAreaFast_ >, 0 }; diff --git a/modules/imgproc/test/test_imgwarp.cpp b/modules/imgproc/test/test_imgwarp.cpp index eb13d35402..71c932ae16 100644 --- a/modules/imgproc/test/test_imgwarp.cpp +++ b/modules/imgproc/test/test_imgwarp.cpp @@ -1548,9 +1548,28 @@ TEST(Imgproc_GetQuadSubPix, accuracy) { CV_GetQuadSubPixTest test; test.safe_run ////////////////////////////////////////////////////////////////////////// template +struct IntCast +{ + T operator() (WT val) const + { + return cv::saturate_cast(val >> 2); + } +}; + +template +struct FltCast +{ + T operator() (WT val) const + { + return cv::saturate_cast(val * 0.25); + } +}; + +template void resizeArea(const cv::Mat & src, cv::Mat & dst) { int cn = src.channels(); + CastOp castOp; for (int y = 0; y < dst.rows; ++y) { @@ -1565,9 +1584,9 @@ void resizeArea(const cv::Mat & src, cv::Mat & dst) for (int c = 0; c < cn; ++c) { WT sum = WT(sptr0[x1 + c]) + WT(sptr0[x1 + c + cn]); - sum += WT(sptr1[x1 + c]) + WT(sptr1[x1 + c + cn]) + (WT)(2); + sum += WT(sptr1[x1 + c]) + WT(sptr1[x1 + c + cn]) + (WT)(one); - dptr[x + c] = cv::saturate_cast(sum >> 2); + dptr[x + c] = castOp(sum); } } } @@ -1575,32 +1594,38 @@ void resizeArea(const cv::Mat & src, cv::Mat & dst) TEST(Resize, Area_half) { - const int size = 10; - int types[] = { CV_8UC1, CV_8UC4, CV_16UC1, CV_16UC4 }; + const int size = 1000; + int types[] = { CV_8UC1, CV_8UC4, CV_16UC1, CV_16UC4, CV_16SC1, CV_16SC4, CV_32FC1, CV_32FC4 }; cv::RNG rng(17); for (int i = 0, _size = sizeof(types) / sizeof(types[0]); i < _size; ++i) { - int type = types[i], depth = CV_MAT_DEPTH(type); + int type = types[i], depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + const float eps = depth <= CV_32S ? 0 : 5e-5; SCOPED_TRACE(depth); + SCOPED_TRACE(cn); cv::Mat src(size, size, type), dst_actual(size >> 1, size >> 1, type), dst_reference(size >> 1, size >> 1, type); - rng.fill(src, cv::RNG::UNIFORM, 0, 1000, true); + rng.fill(src, cv::RNG::UNIFORM, -1000, 1000, true); if (depth == CV_8U) - resizeArea(src, dst_reference); + resizeArea >(src, dst_reference); else if (depth == CV_16U) - resizeArea(src, dst_reference); + resizeArea >(src, dst_reference); + else if (depth == CV_16S) + resizeArea >(src, dst_reference); + else if (depth == CV_32F) + resizeArea >(src, dst_reference); else CV_Assert(0); cv::resize(src, dst_actual, dst_actual.size(), 0, 0, cv::INTER_AREA); - ASSERT_EQ(0, cvtest::norm(dst_reference, dst_actual, cv::NORM_INF)); + ASSERT_GE(eps, cvtest::norm(dst_reference, dst_actual, cv::NORM_INF)); } }