diff --git a/modules/imgproc/src/color_rgb.simd.hpp b/modules/imgproc/src/color_rgb.simd.hpp index ca39d8a908..40e3854460 100644 --- a/modules/imgproc/src/color_rgb.simd.hpp +++ b/modules/imgproc/src/color_rgb.simd.hpp @@ -1088,11 +1088,6 @@ struct mRGBA2RGBA uchar v3_half = v3 / 2; - dst[0] = (v3==0)? 0 : (v0 * max_val + v3_half) / v3; - dst[1] = (v3==0)? 0 : (v1 * max_val + v3_half) / v3; - dst[2] = (v3==0)? 0 : (v2 * max_val + v3_half) / v3; - dst[3] = v3; - dst[0] = (v3==0)? 0 : saturate_cast((v0 * max_val + v3_half) / v3); dst[1] = (v3==0)? 0 : saturate_cast((v1 * max_val + v3_half) / v3); dst[2] = (v3==0)? 0 : saturate_cast((v2 * max_val + v3_half) / v3); diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp index 4e4d718da3..289d09febd 100644 --- a/modules/imgproc/src/imgwarp.cpp +++ b/modules/imgproc/src/imgwarp.cpp @@ -2082,65 +2082,46 @@ void cv::convertMaps( InputArray _map1, InputArray _map2, } else if( m1type == CV_32FC2 && dstm1type == CV_16SC2 ) { - if( nninterpolate ) + #if CV_TRY_SSE4_1 + if( useSSE4_1 ) + opt_SSE4_1::convertMaps_32f2c16s_SSE41(src1f, dst1, dst2, size.width); + else + #endif { #if CV_SIMD128 - int span = VTraits::vlanes(); { - for( ; x <= (size.width << 1) - span * 2; x += span * 2 ) - v_store(dst1 + x, v_pack(v_round(v_load(src1f + x)), - v_round(v_load(src1f + x + span)))); + v_float32x4 v_scale = v_setall_f32((float)INTER_TAB_SIZE); + v_int32x4 v_mask = v_setall_s32(INTER_TAB_SIZE - 1); + v_int32x4 v_scale3 = v_setall_s32(INTER_TAB_SIZE); + int span = VTraits::vlanes(); + for (; x <= size.width - span; x += span ) + { + v_float32x4 v_src0[2], v_src1[2]; + v_load_deinterleave(src1f + (x << 1), v_src0[0], v_src0[1]); + v_load_deinterleave(src1f + (x << 1) + span, v_src1[0], v_src1[1]); + v_int32x4 v_ix0 = v_round(v_mul(v_src0[0], v_scale)); + v_int32x4 v_ix1 = v_round(v_mul(v_src1[0], v_scale)); + v_int32x4 v_iy0 = v_round(v_mul(v_src0[1], v_scale)); + v_int32x4 v_iy1 = v_round(v_mul(v_src1[1], v_scale)); + + v_int16x8 v_dst[2]; + v_dst[0] = v_pack(v_shr(v_ix0), v_shr(v_ix1)); + v_dst[1] = v_pack(v_shr(v_iy0), v_shr(v_iy1)); + v_store_interleave(dst1 + (x << 1), v_dst[0], v_dst[1]); + + v_store(dst2 + x, v_pack_u( + v_muladd(v_scale3, (v_and(v_iy0, v_mask)), (v_and(v_ix0, v_mask))), + v_muladd(v_scale3, (v_and(v_iy1, v_mask)), (v_and(v_ix1, v_mask))))); + } } #endif for( ; x < size.width; x++ ) { - dst1[x*2] = saturate_cast(src1f[x*2]); - dst1[x*2+1] = saturate_cast(src1f[x*2+1]); - } - } - else - { - #if CV_TRY_SSE4_1 - if( useSSE4_1 ) - opt_SSE4_1::convertMaps_32f2c16s_SSE41(src1f, dst1, dst2, size.width); - else - #endif - { - #if CV_SIMD128 - { - v_float32x4 v_scale = v_setall_f32((float)INTER_TAB_SIZE); - v_int32x4 v_mask = v_setall_s32(INTER_TAB_SIZE - 1); - v_int32x4 v_scale3 = v_setall_s32(INTER_TAB_SIZE); - int span = VTraits::vlanes(); - for (; x <= size.width - span; x += span ) - { - v_float32x4 v_src0[2], v_src1[2]; - v_load_deinterleave(src1f + (x << 1), v_src0[0], v_src0[1]); - v_load_deinterleave(src1f + (x << 1) + span, v_src1[0], v_src1[1]); - v_int32x4 v_ix0 = v_round(v_mul(v_src0[0], v_scale)); - v_int32x4 v_ix1 = v_round(v_mul(v_src1[0], v_scale)); - v_int32x4 v_iy0 = v_round(v_mul(v_src0[1], v_scale)); - v_int32x4 v_iy1 = v_round(v_mul(v_src1[1], v_scale)); - - v_int16x8 v_dst[2]; - v_dst[0] = v_pack(v_shr(v_ix0), v_shr(v_ix1)); - v_dst[1] = v_pack(v_shr(v_iy0), v_shr(v_iy1)); - v_store_interleave(dst1 + (x << 1), v_dst[0], v_dst[1]); - - v_store(dst2 + x, v_pack_u( - v_muladd(v_scale3, (v_and(v_iy0, v_mask)), (v_and(v_ix0, v_mask))), - v_muladd(v_scale3, (v_and(v_iy1, v_mask)), (v_and(v_ix1, v_mask))))); - } - } - #endif - for( ; x < size.width; x++ ) - { - int ix = saturate_cast(src1f[x*2]*INTER_TAB_SIZE); - int iy = saturate_cast(src1f[x*2+1]*INTER_TAB_SIZE); - dst1[x*2] = saturate_cast(ix >> INTER_BITS); - dst1[x*2+1] = saturate_cast(iy >> INTER_BITS); - dst2[x] = (ushort)((iy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (ix & (INTER_TAB_SIZE-1))); - } + int ix = saturate_cast(src1f[x*2]*INTER_TAB_SIZE); + int iy = saturate_cast(src1f[x*2+1]*INTER_TAB_SIZE); + dst1[x*2] = saturate_cast(ix >> INTER_BITS); + dst1[x*2+1] = saturate_cast(iy >> INTER_BITS); + dst2[x] = (ushort)((iy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (ix & (INTER_TAB_SIZE-1))); } } }