cv::resize area 2x

pull/3326/head
Ilya Lavrenov 10 years ago
parent 12001a42f9
commit c0b702a994
  1. 16
      modules/imgproc/src/canny.cpp
  2. 18
      modules/imgproc/src/corner.cpp
  3. 67
      modules/imgproc/src/imgwarp.cpp
  4. 48
      modules/imgproc/test/test_imgwarp.cpp

@ -365,8 +365,10 @@ void cv::Canny( InputArray _src, OutputArray _dst,
for ( ; j <= width - 8; j += 8)
{
int16x8_t v_dx = vld1q_s16(_dx + j), v_dy = vld1q_s16(_dy + j);
vst1q_s32(_norm + j, vaddq_s32(vmovl_s16(vget_low_s16(v_dx)), vmovl_s16(vget_low_s16(v_dy))));
vst1q_s32(_norm + j + 4, vaddq_s32(vmovl_s16(vget_high_s16(v_dx)), vmovl_s16(vget_high_s16(v_dy))));
vst1q_s32(_norm + j, vaddq_s32(vabsq_s32(vmovl_s16(vget_low_s16(v_dx))),
vabsq_s32(vmovl_s16(vget_low_s16(v_dy)))));
vst1q_s32(_norm + j + 4, vaddq_s32(vabsq_s32(vmovl_s16(vget_high_s16(v_dx))),
vabsq_s32(vmovl_s16(vget_high_s16(v_dy)))));
}
#endif
for ( ; j < width; ++j)
@ -397,13 +399,13 @@ void cv::Canny( InputArray _src, OutputArray _dst,
for ( ; j <= width - 8; j += 8)
{
int16x8_t v_dx = vld1q_s16(_dx + j), v_dy = vld1q_s16(_dy + j);
int32x4_t v_dxp = vmovl_s16(vget_low_s16(v_dx)), v_dyp = vmovl_s16(vget_low_s16(v_dy));
int32x4_t v_dst = vaddq_s32(vmulq_s32(v_dxp, v_dxp), vmulq_s32(v_dyp, v_dyp));
int16x4_t v_dxp = vget_low_s16(v_dx), v_dyp = vget_low_s16(v_dy);
int32x4_t v_dst = vmlal_s16(vmull_s16(v_dxp, v_dxp), v_dyp, v_dyp);
vst1q_s32(_norm + j, v_dst);
v_dxp = vmovl_s16(vget_high_s16(v_dx)), v_dyp = vmovl_s16(vget_high_s16(v_dy));
v_dst = vaddq_s32(vmulq_s32(v_dxp, v_dxp), vmulq_s32(v_dyp, v_dyp));
vst1q_s32(_norm + j, v_dst);
v_dxp = vget_high_s16(v_dx), v_dyp = vget_high_s16(v_dy);
v_dst = vmlal_s16(vmull_s16(v_dxp, v_dxp), v_dyp, v_dyp);
vst1q_s32(_norm + j + 4, v_dst);
}
#endif
for ( ; j < width; ++j)

@ -147,16 +147,15 @@ static void calcHarris( const Mat& _cov, Mat& _dst, double k )
}
}
#elif CV_NEON
float32x4_t v_k = vdupq_n_f32((float)k));
float32x4_t v_k = vdupq_n_f32((float)k);
for( ; j <= size.width - 4; j += 4 )
{
float32x4x3_t v_src = vld3q_f32(cov + j + 3);
float32x4_t v_a = v_src.val[0], v_b = v_src.val[1], v_c = v_src.val[2];
float32x4_t v_ac_bb = vsubq_f32(vmulq_f32(v_a, v_c), vmulq_f32(v_b, v_b));
float32x4_t v_ac_bb = vmlsq_f32(vmulq_f32(v_a, v_c), v_b, v_b);
float32x4_t v_ac = vaddq_f32(v_a, v_c);
float32x4_t v_prod = vmulq_f32(v_k, vmulq_f32(v_ac, v_ac));
vst1q_f32(dst + j, vsubq_f32(v_ac_bb, v_prod));
vst1q_f32(dst + j, vmlsq_f32(v_ac_bb, v_k, vmulq_f32(v_ac, v_ac)));
}
#endif
@ -619,10 +618,11 @@ void cv::preCornerDetect( InputArray _src, OutputArray _dst, int ksize, int bord
if( src.depth() == CV_8U )
factor *= 255;
factor = 1./(factor * factor * factor);
float factor_f = (float)factor;
#if CV_SSE2
volatile bool haveSSE2 = cv::checkHardwareSupport(CV_CPU_SSE2);
__m128 v_factor = _mm_set1_ps((float)factor), v_m2 = _mm_set1_ps(-2.0f);
__m128 v_factor = _mm_set1_ps(factor_f), v_m2 = _mm_set1_ps(-2.0f);
#endif
Size size = src.size();
@ -657,10 +657,10 @@ void cv::preCornerDetect( InputArray _src, OutputArray _dst, int ksize, int bord
for( ; j <= size.width - 4; j += 4 )
{
float32x4_t v_dx = vld1q_f32(dxdata + j), v_dy = vld1q_f32(dydata + j);
float32x4_t v_s1 = vmulq_f32(v_dx, vmulq_f32(v_dx, vld1q_f32(d2ydata + j)));
float32x4_t v_s2 = vmulq_f32(v_dy, vmulq_f32(v_dy, vld1q_f32(d2xdata + j)));
float32x4_t v_s3 = vmulq_f32(v_dx, vmulq_f32(v_dy, vld1q_f32(dxydata + j)));
vst1q_f32(dstdata + j, vaddq_f32(vaddq_f32(v_s1, v_s2), vmulq_n_f32(v_s3, -2.0f)));
float32x4_t v_s = vmulq_f32(v_dx, vmulq_f32(v_dx, vld1q_f32(d2ydata + j)));
v_s = vmlaq_f32(v_s, vld1q_f32(d2xdata + j), vmulq_f32(v_dy, v_dy));
v_s = vmlaq_f32(v_s, vld1q_f32(dxydata + j), vmulq_n_f32(vmulq_f32(v_dy, v_dx), -2));
vst1q_f32(dstdata + j, vmulq_n_f32(v_s, factor_f));
}
#endif

@ -1322,7 +1322,72 @@ struct ResizeAreaFastNoVec
{ return 0; }
};
#if CV_SSE2
#if CV_NEON
class ResizeAreaFastVec_SIMD_8u
{
public:
ResizeAreaFastVec_SIMD_8u(int _cn, int _step) :
cn(_cn), step(_step)
{
}
int operator() (const uchar* S, uchar* D, int w) const
{
int dx = 0;
const uchar* S0 = S, * S1 = S0 + step;
uint16x8_t v_2 = vdupq_n_u16(2);
if (cn == 1)
{
for ( ; dx <= w - 16; dx += 16, S0 += 32, S1 += 32, D += 16)
{
uint8x16x2_t v_row0 = vld2q_u8(S0), v_row1 = vld2q_u8(S1);
uint16x8_t v_dst0 = vaddl_u8(vget_low_u8(v_row0.val[0]), vget_low_u8(v_row0.val[1]));
v_dst0 = vaddq_u16(v_dst0, vaddl_u8(vget_low_u8(v_row1.val[0]), vget_low_u8(v_row1.val[1])));
v_dst0 = vshrq_n_u16(vaddq_u16(v_dst0, v_2), 2);
uint16x8_t v_dst1 = vaddl_u8(vget_high_u8(v_row0.val[0]), vget_high_u8(v_row0.val[1]));
v_dst1 = vaddq_u16(v_dst1, vaddl_u8(vget_high_u8(v_row1.val[0]), vget_high_u8(v_row1.val[1])));
v_dst1 = vshrq_n_u16(vaddq_u16(v_dst1, v_2), 2);
vst1q_u8(D, vcombine_u8(vmovn_u16(v_dst0), vmovn_u16(v_dst1)));
}
}
else if (cn == 4)
{
for ( ; dx <= w - 8; dx += 8, S0 += 16, S1 += 16, D += 8)
{
uint8x16_t v_row0 = vld1q_u8(S0), v_row1 = vld1q_u8(S1);
uint16x8_t v_row00 = vmovl_u8(vget_low_u8(v_row0));
uint16x8_t v_row01 = vmovl_u8(vget_high_u8(v_row0));
uint16x8_t v_row10 = vmovl_u8(vget_low_u8(v_row1));
uint16x8_t v_row11 = vmovl_u8(vget_high_u8(v_row1));
uint16x4_t v_p0 = vadd_u16(vadd_u16(vget_low_u16(v_row00), vget_high_u16(v_row00)),
vadd_u16(vget_low_u16(v_row10), vget_high_u16(v_row10)));
uint16x4_t v_p1 = vadd_u16(vadd_u16(vget_low_u16(v_row01), vget_high_u16(v_row01)),
vadd_u16(vget_low_u16(v_row11), vget_high_u16(v_row11)));
uint16x8_t v_dst = vshrq_n_u16(vaddq_u16(vcombine_u16(v_p0, v_p1), v_2), 2);
vst1_u8(D, vmovn_u16(v_dst));
}
}
return dx;
}
private:
int cn, step;
};
typedef ResizeAreaFastNoVec<ushort, ushort> ResizeAreaFastVec_SIMD_16u;
#elif CV_SSE2
class ResizeAreaFastVec_SIMD_8u
{
public:

@ -1545,4 +1545,52 @@ TEST(Imgproc_InitUndistortMap, accuracy) { CV_UndistortMapTest test; test.safe_r
TEST(Imgproc_GetRectSubPix, accuracy) { CV_GetRectSubPixTest test; test.safe_run(); }
TEST(Imgproc_GetQuadSubPix, accuracy) { CV_GetQuadSubPixTest test; test.safe_run(); }
//////////////////////////////////////////////////////////////////////////
template <typename T, typename WT>
void resizeArea(const cv::Mat & src, cv::Mat & dst)
{
int cn = src.channels();
for (int y = 0; y < dst.rows; ++y)
{
const T * sptr0 = src.ptr<T>(y << 1);
const T * sptr1 = src.ptr<T>((y << 1) + 1);
T * dptr = dst.ptr<T>(y);
for (int x = 0; x < dst.cols * cn; x += cn)
{
int x1 = x << 1;
for (int c = 0; c < cn; ++c)
{
WT sum = WT(sptr0[x1 + c]) + WT(sptr0[x1 + c + cn]);
sum += WT(sptr1[x1 + c]) + WT(sptr1[x1 + c + cn]) + (WT)(2);
dptr[x + c] = cv::saturate_cast<T>(sum >> 2);
}
}
}
}
TEST(Resize, Area_half)
{
int types[] = { CV_8UC1, CV_8UC4 };
for (int i = 0, size = sizeof(types) / sizeof(types[0]); i < size; ++i)
{
int type = types[i];
cv::Mat src(100, 100, type), dst_actual(50, 50, type), dst_reference(50, 50, type);
if (CV_MAT_DEPTH(type) == CV_8U)
resizeArea<uchar, ushort>(src, dst_reference);
else
CV_Assert(0);
cv::resize(src, dst_actual, dst_actual.size(), 0, 0, cv::INTER_AREA);
ASSERT_EQ(0, cvtest::norm(dst_reference, dst_actual, cv::NORM_INF));
}
}
/* End of file. */

Loading…
Cancel
Save