Merge pull request #26071 from tingboliao:4.x

Remove the redundant codes of cv::convertMaps and mRGBA2RGBA<uchar> #26071

(1) cv::convertMaps: the branch [else if( m1type == CV_32FC2 && dstm1type == CV_16SC2 ) if( nninterpolate )] is unreachable,
    as the condition is satisfied in lines 1959 to 1961, calculated in advance and return directly.
(2) mRGBA2RGBA<uchar>: dst[0], dst[1], dst[2] and dst[3] is calculated repeatedly. Introduced in https://github.com/opencv/opencv/pull/13440

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
pull/26105/head
tingboliao 3 months ago committed by GitHub
parent e9c3e1acb5
commit 88f99edc65
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 5
      modules/imgproc/src/color_rgb.simd.hpp
  2. 85
      modules/imgproc/src/imgwarp.cpp

@ -1088,11 +1088,6 @@ struct mRGBA2RGBA<uchar>
uchar v3_half = v3 / 2;
dst[0] = (v3==0)? 0 : (v0 * max_val + v3_half) / v3;
dst[1] = (v3==0)? 0 : (v1 * max_val + v3_half) / v3;
dst[2] = (v3==0)? 0 : (v2 * max_val + v3_half) / v3;
dst[3] = v3;
dst[0] = (v3==0)? 0 : saturate_cast<uchar>((v0 * max_val + v3_half) / v3);
dst[1] = (v3==0)? 0 : saturate_cast<uchar>((v1 * max_val + v3_half) / v3);
dst[2] = (v3==0)? 0 : saturate_cast<uchar>((v2 * max_val + v3_half) / v3);

@ -2082,65 +2082,46 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
}
else if( m1type == CV_32FC2 && dstm1type == CV_16SC2 )
{
if( nninterpolate )
#if CV_TRY_SSE4_1
if( useSSE4_1 )
opt_SSE4_1::convertMaps_32f2c16s_SSE41(src1f, dst1, dst2, size.width);
else
#endif
{
#if CV_SIMD128
int span = VTraits<v_float32x4>::vlanes();
{
for( ; x <= (size.width << 1) - span * 2; x += span * 2 )
v_store(dst1 + x, v_pack(v_round(v_load(src1f + x)),
v_round(v_load(src1f + x + span))));
v_float32x4 v_scale = v_setall_f32((float)INTER_TAB_SIZE);
v_int32x4 v_mask = v_setall_s32(INTER_TAB_SIZE - 1);
v_int32x4 v_scale3 = v_setall_s32(INTER_TAB_SIZE);
int span = VTraits<v_uint16x8>::vlanes();
for (; x <= size.width - span; x += span )
{
v_float32x4 v_src0[2], v_src1[2];
v_load_deinterleave(src1f + (x << 1), v_src0[0], v_src0[1]);
v_load_deinterleave(src1f + (x << 1) + span, v_src1[0], v_src1[1]);
v_int32x4 v_ix0 = v_round(v_mul(v_src0[0], v_scale));
v_int32x4 v_ix1 = v_round(v_mul(v_src1[0], v_scale));
v_int32x4 v_iy0 = v_round(v_mul(v_src0[1], v_scale));
v_int32x4 v_iy1 = v_round(v_mul(v_src1[1], v_scale));
v_int16x8 v_dst[2];
v_dst[0] = v_pack(v_shr<INTER_BITS>(v_ix0), v_shr<INTER_BITS>(v_ix1));
v_dst[1] = v_pack(v_shr<INTER_BITS>(v_iy0), v_shr<INTER_BITS>(v_iy1));
v_store_interleave(dst1 + (x << 1), v_dst[0], v_dst[1]);
v_store(dst2 + x, v_pack_u(
v_muladd(v_scale3, (v_and(v_iy0, v_mask)), (v_and(v_ix0, v_mask))),
v_muladd(v_scale3, (v_and(v_iy1, v_mask)), (v_and(v_ix1, v_mask)))));
}
}
#endif
for( ; x < size.width; x++ )
{
dst1[x*2] = saturate_cast<short>(src1f[x*2]);
dst1[x*2+1] = saturate_cast<short>(src1f[x*2+1]);
}
}
else
{
#if CV_TRY_SSE4_1
if( useSSE4_1 )
opt_SSE4_1::convertMaps_32f2c16s_SSE41(src1f, dst1, dst2, size.width);
else
#endif
{
#if CV_SIMD128
{
v_float32x4 v_scale = v_setall_f32((float)INTER_TAB_SIZE);
v_int32x4 v_mask = v_setall_s32(INTER_TAB_SIZE - 1);
v_int32x4 v_scale3 = v_setall_s32(INTER_TAB_SIZE);
int span = VTraits<v_uint16x8>::vlanes();
for (; x <= size.width - span; x += span )
{
v_float32x4 v_src0[2], v_src1[2];
v_load_deinterleave(src1f + (x << 1), v_src0[0], v_src0[1]);
v_load_deinterleave(src1f + (x << 1) + span, v_src1[0], v_src1[1]);
v_int32x4 v_ix0 = v_round(v_mul(v_src0[0], v_scale));
v_int32x4 v_ix1 = v_round(v_mul(v_src1[0], v_scale));
v_int32x4 v_iy0 = v_round(v_mul(v_src0[1], v_scale));
v_int32x4 v_iy1 = v_round(v_mul(v_src1[1], v_scale));
v_int16x8 v_dst[2];
v_dst[0] = v_pack(v_shr<INTER_BITS>(v_ix0), v_shr<INTER_BITS>(v_ix1));
v_dst[1] = v_pack(v_shr<INTER_BITS>(v_iy0), v_shr<INTER_BITS>(v_iy1));
v_store_interleave(dst1 + (x << 1), v_dst[0], v_dst[1]);
v_store(dst2 + x, v_pack_u(
v_muladd(v_scale3, (v_and(v_iy0, v_mask)), (v_and(v_ix0, v_mask))),
v_muladd(v_scale3, (v_and(v_iy1, v_mask)), (v_and(v_ix1, v_mask)))));
}
}
#endif
for( ; x < size.width; x++ )
{
int ix = saturate_cast<int>(src1f[x*2]*INTER_TAB_SIZE);
int iy = saturate_cast<int>(src1f[x*2+1]*INTER_TAB_SIZE);
dst1[x*2] = saturate_cast<short>(ix >> INTER_BITS);
dst1[x*2+1] = saturate_cast<short>(iy >> INTER_BITS);
dst2[x] = (ushort)((iy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (ix & (INTER_TAB_SIZE-1)));
}
int ix = saturate_cast<int>(src1f[x*2]*INTER_TAB_SIZE);
int iy = saturate_cast<int>(src1f[x*2+1]*INTER_TAB_SIZE);
dst1[x*2] = saturate_cast<short>(ix >> INTER_BITS);
dst1[x*2+1] = saturate_cast<short>(iy >> INTER_BITS);
dst2[x] = (ushort)((iy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (ix & (INTER_TAB_SIZE-1)));
}
}
}

Loading…
Cancel
Save