@ -595,7 +595,7 @@ yuv2rgb_1_template_lsx(SwsInternal *c, const int16_t *buf0,
int len_count = ( dstW + 1 ) > > 1 ;
const void * r , * g , * b ;
if ( uvalpha < 2048 ) {
if ( uvalpha = = 0 ) {
int count = 0 ;
int head = YUVRGB_TABLE_HEADROOM ;
__m128i headroom = __lsx_vreplgr2vr_h ( head ) ;
@ -659,61 +659,46 @@ yuv2rgb_1_template_lsx(SwsInternal *c, const int16_t *buf0,
const int16_t * ubuf1 = ubuf [ 1 ] , * vbuf1 = vbuf [ 1 ] ;
int count = 0 ;
int HEADROOM = YUVRGB_TABLE_HEADROOM ;
int uvalpha1 = 4096 - uvalpha ;
__m128i headroom = __lsx_vreplgr2vr_w ( HEADROOM ) ;
__m128i uvalpha_tmp1 = __lsx_vreplgr2vr_h ( uvalpha1 ) ;
__m128i uvalpha_tmp = __lsx_vreplgr2vr_h ( uvalpha ) ;
for ( i = 0 ; i < len ; i + = 8 ) {
int Y1 , Y2 , U , V ;
int i_dex = i < < 1 ;
int c_dex = count < < 1 ;
__m128i src_y , src_u0 , src_v0 , src_u1 , src_v1 ;
__m128i y_l , y_h , u1 , u2 , v1 , v2 ;
__m128i y_l , y_h , u1 , u2 , v1 , v2 , u_ev , v_od ;
DUP4_ARG2 ( __lsx_vldx , buf0 , i_dex , ubuf0 , c_dex , vbuf0 , c_dex ,
ubuf1 , c_dex , src_y , src_u0 , src_v0 , src_u1 ) ;
src_v1 = __lsx_vldx ( vbuf1 , c_dex ) ;
src_y = __lsx_vsrari_h ( src_y , 7 ) ;
u1 = __lsx_vaddwev_w_h ( src_u0 , src_u1 ) ;
v1 = __lsx_vaddwod_w_h ( src_u0 , src_u1 ) ;
u2 = __lsx_vaddwev_w_h ( src_v0 , src_v1 ) ;
v2 = __lsx_vaddwod_w_h ( src_v0 , src_v1 ) ;
u_ev = __lsx_vmulwev_w_h ( src_u0 , uvalpha_tmp1 ) ;
v_od = __lsx_vmulwod_w_h ( src_u0 , uvalpha_tmp1 ) ;
u1 = __lsx_vmaddwev_w_h ( u_ev , src_u1 , uvalpha_tmp ) ;
v1 = __lsx_vmaddwod_w_h ( v_od , src_u1 , uvalpha_tmp ) ;
u_ev = __lsx_vmulwev_w_h ( src_v0 , uvalpha_tmp1 ) ;
v_od = __lsx_vmulwod_w_h ( src_v0 , uvalpha_tmp1 ) ;
u2 = __lsx_vmaddwev_w_h ( u_ev , src_v1 , uvalpha_tmp ) ;
v2 = __lsx_vmaddwod_w_h ( v_od , src_v1 , uvalpha_tmp ) ;
y_l = __lsx_vsllwil_w_h ( src_y , 0 ) ;
y_h = __lsx_vexth_w_h ( src_y ) ;
u1 = __lsx_vsrari_w ( u1 , 8 ) ;
v1 = __lsx_vsrari_w ( v1 , 8 ) ;
u2 = __lsx_vsrari_w ( u2 , 8 ) ;
v2 = __lsx_vsrari_w ( v2 , 8 ) ;
u1 = __lsx_vsrari_w ( u1 , 19 ) ;
v1 = __lsx_vsrari_w ( v1 , 19 ) ;
u2 = __lsx_vsrari_w ( u2 , 19 ) ;
v2 = __lsx_vsrari_w ( v2 , 19 ) ;
u1 = __lsx_vadd_w ( u1 , headroom ) ;
v1 = __lsx_vadd_w ( v1 , headroom ) ;
u2 = __lsx_vadd_w ( u2 , headroom ) ;
v2 = __lsx_vadd_w ( v2 , headroom ) ;
WRITE_YUV2RGB_LSX ( y_l , y_l , u1 , v1 , 0 , 1 , 0 , 0 ) ;
WRITE_YUV2RGB_LSX ( y_l , y_l , u2 , v2 , 2 , 3 , 0 , 0 ) ;
WRITE_YUV2RGB_LSX ( y_h , y_h , u1 , v1 , 0 , 1 , 1 , 1 ) ;
WRITE_YUV2RGB_LSX ( y_h , y_h , u2 , v2 , 2 , 3 , 1 , 1 ) ;
}
if ( dstW - i > = 4 ) {
int Y1 , Y2 , U , V ;
int i_dex = i < < 1 ;
__m128i src_y , src_u0 , src_v0 , src_u1 , src_v1 ;
__m128i uv ;
src_y = __lsx_vldx ( buf0 , i_dex ) ;
src_u0 = __lsx_vldrepl_d ( ( ubuf0 + count ) , 0 ) ;
src_v0 = __lsx_vldrepl_d ( ( vbuf0 + count ) , 0 ) ;
src_u1 = __lsx_vldrepl_d ( ( ubuf1 + count ) , 0 ) ;
src_v1 = __lsx_vldrepl_d ( ( vbuf1 + count ) , 0 ) ;
src_u0 = __lsx_vilvl_h ( src_u1 , src_u0 ) ;
src_v0 = __lsx_vilvl_h ( src_v1 , src_v0 ) ;
src_y = __lsx_vsrari_h ( src_y , 7 ) ;
src_y = __lsx_vsllwil_w_h ( src_y , 0 ) ;
uv = __lsx_vilvl_h ( src_v0 , src_u0 ) ;
uv = __lsx_vhaddw_w_h ( uv , uv ) ;
uv = __lsx_vsrari_w ( uv , 8 ) ;
uv = __lsx_vadd_w ( uv , headroom ) ;
WRITE_YUV2RGB_LSX ( src_y , src_y , uv , uv , 0 , 1 , 0 , 1 ) ;
WRITE_YUV2RGB_LSX ( src_y , src_y , uv , uv , 2 , 3 , 2 , 3 ) ;
i + = 4 ;
WRITE_YUV2RGB_LSX ( y_l , y_l , u1 , u2 , 0 , 1 , 0 , 0 ) ;
WRITE_YUV2RGB_LSX ( y_l , y_l , v1 , v2 , 2 , 3 , 0 , 0 ) ;
WRITE_YUV2RGB_LSX ( y_h , y_h , u1 , u2 , 0 , 1 , 1 , 1 ) ;
WRITE_YUV2RGB_LSX ( y_h , y_h , v1 , v2 , 2 , 3 , 1 , 1 ) ;
}
for ( ; count < len_count ; count + + ) {
int Y1 = ( buf0 [ count * 2 ] + 64 ) > > 7 ;