@ -1,5 +1,5 @@
/*
* Copyright ( c ) 2015 Parag Salasakar ( Parag . Salasakar @ imgtec . com )
* Copyright ( c ) 2015 - 2017 Parag Salasakar ( Parag . Salasakar @ imgtec . com )
*
* This file is part of FFmpeg .
*
@ -644,96 +644,69 @@ static void avc_loopfilter_luma_intra_edge_hor_msa(uint8_t *data,
uint8_t beta_in ,
uint32_t img_width )
{
v16u8 p2_asub_p0 , q2_asub_q0 , p0_asub_q0 ;
v16u8 alpha , beta ;
v16u8 is_less_than , is_less_than_beta , negate_is_less_than_beta ;
v16u8 p2 , p1 , p0 , q0 , q1 , q2 ;
v16u8 p3_org , p2_org , p1_org , p0_org , q0_org , q1_org , q2_org , q3_org ;
v8i16 p1_org_r , p0_org_r , q0_org_r , q1_org_r ;
v8i16 p1_org_l , p0_org_l , q0_org_l , q1_org_l ;
v8i16 p2_r = { 0 } ;
v8i16 p1_r = { 0 } ;
v8i16 p0_r = { 0 } ;
v8i16 q0_r = { 0 } ;
v8i16 q1_r = { 0 } ;
v8i16 q2_r = { 0 } ;
v8i16 p2_l = { 0 } ;
v8i16 p1_l = { 0 } ;
v8i16 p0_l = { 0 } ;
v8i16 q0_l = { 0 } ;
v8i16 q1_l = { 0 } ;
v8i16 q2_l = { 0 } ;
v16u8 tmp_flag ;
v16i8 zero = { 0 } ;
alpha = ( v16u8 ) __msa_fill_b ( alpha_in ) ;
beta = ( v16u8 ) __msa_fill_b ( beta_in ) ;
v16u8 p0_asub_q0 , p1_asub_p0 , q1_asub_q0 ;
v16u8 is_less_than , is_less_than_beta , is_less_than_alpha ;
v16u8 p1_org , p0_org , q0_org , q1_org ;
LD_UB4 ( data - ( img_width < < 1 ) , img_width , p1_org , p0_org , q0_org , q1_org ) ;
{
v16u8 p1_asub_p0 , q1_asub_q0 , is_less_than_alpha ;
p0_asub_q0 = __msa_asub_u_b ( p0_org , q0_org ) ;
p1_asub_p0 = __msa_asub_u_b ( p1_org , p0_org ) ;
q1_asub_q0 = __msa_asub_u_b ( q1_org , q0_org ) ;
p0_asub_q0 = __msa_asub_u_b ( p0_org , q0_org ) ;
p1_asub_p0 = __msa_asub_u_b ( p1_org , p0_org ) ;
q1_asub_q0 = __msa_asub_u_b ( q1_org , q0_org ) ;
is_less_than_alpha = ( p0_asub_q0 < alpha ) ;
is_less_than_beta = ( p1_asub_p0 < beta ) ;
is_less_than = is_less_than_beta & is_less_than_alpha ;
is_less_than_beta = ( q1_asub_q0 < beta ) ;
is_less_than = is_less_than_beta & is_less_than ;
}
is_less_than_alpha = ( p0_asub_q0 < alpha_in ) ;
is_less_than_beta = ( p1_asub_p0 < beta_in ) ;
is_less_than = is_less_than_beta & is_less_than_alpha ;
is_less_than_beta = ( q1_asub_q0 < beta_in ) ;
is_less_than = is_less_than_beta & is_less_than ;
if ( ! __msa_test_bz_v ( is_less_than ) ) {
q2_org = LD_UB ( data + ( 2 * img_width ) ) ;
p3_org = LD_UB ( data - ( img_width < < 2 ) ) ;
p2_org = LD_UB ( data - ( 3 * img_width ) ) ;
v16u8 p2_asub_p0 , q2_asub_q0 , p0 , q0 , negate_is_less_than_beta ;
v8i16 p0_r = { 0 } ;
v8i16 q0_r = { 0 } ;
v8i16 p0_l = { 0 } ;
v8i16 q0_l = { 0 } ;
v16i8 zero = { 0 } ;
v8i16 p1_org_r , p0_org_r , q0_org_r , q1_org_r ;
v8i16 p1_org_l , p0_org_l , q0_org_l , q1_org_l ;
v16u8 q2_org = LD_UB ( data + ( 2 * img_width ) ) ;
v16u8 p2_org = LD_UB ( data - ( 3 * img_width ) ) ;
v16u8 tmp_flag = ( v16u8 ) __msa_fill_b ( ( alpha_in > > 2 ) + 2 ) ;
UNPCK_UB_SH ( p1_org , p1_org_r , p1_org_l ) ;
UNPCK_UB_SH ( p0_org , p0_org_r , p0_org_l ) ;
UNPCK_UB_SH ( q0_org , q0_org_r , q0_org_l ) ;
tmp_flag = alpha > > 2 ;
tmp_flag = tmp_flag + 2 ;
tmp_flag = ( p0_asub_q0 < tmp_flag ) ;
p2_asub_p0 = __msa_asub_u_b ( p2_org , p0_org ) ;
is_less_than_beta = ( p2_asub_p0 < beta ) ;
is_less_than_beta = ( p2_asub_p0 < beta_in ) ;
is_less_than_beta = is_less_than_beta & tmp_flag ;
negate_is_less_than_beta = __msa_xori_b ( is_less_than_beta , 0xff ) ;
is_less_than_beta = is_less_than_beta & is_less_than ;
negate_is_less_than_beta = negate_is_less_than_beta & is_less_than ;
{
v8u16 is_less_than_beta_l , is_less_than_beta_r ;
q1_org_r = ( v8i16 ) __msa_ilvr_b ( zero , ( v16i8 ) q1_org ) ;
is_less_than_beta_r =
( v8u16 ) __msa_sldi_b ( ( v16i8 ) is_less_than_beta , zero , 8 ) ;
if ( ! __msa_test_bz_v ( ( v16u8 ) is_less_than_beta_r ) ) {
v8i16 p3_org_r ;
ILVR_B2_SH ( zero , p3_org , zero , p2_org , p3_org_r , p2_r ) ;
AVC_LPF_P0P1P2_OR_Q0Q1Q2 ( p3_org_r , p0_org_r , q0_org_r , p1_org_r ,
p2_r , q1_org_r , p0_r , p1_r , p2_r ) ;
}
q1_org_l = ( v8i16 ) __msa_ilvl_b ( zero , ( v16i8 ) q1_org ) ;
is_less_than_beta_l =
( v8u 16 ) __msa_sld i_b ( zero , ( v16i8 ) is_less_than_beta , 8 ) ;
q1_org_r = ( v8i16 ) __msa_ilvr_b ( zero , ( v16i8 ) q1_org ) ;
q1_org_l = ( v8i16 ) __msa_ilvl_b ( zero , ( v16i8 ) q1_org ) ;
if ( ! __msa_test_bz_v ( ( v16u8 ) is_less_than_beta_l ) ) {
v8i16 p3_org_l ;
ILVL_B2_SH ( zero , p3_org , zero , p2_org , p3_org_l , p2_l ) ;
AVC_LPF_P0P1P2_OR_Q0Q1Q2 ( p3_org_l , p0_org_l , q0_org_l , p1_org_l ,
p2_l , q1_org_l , p0_l , p1_l , p2_l ) ;
}
}
/* combine and store */
if ( ! __msa_test_bz_v ( is_less_than_beta ) ) {
v8i16 p3_org_l , p3_org_r ;
v16u8 p3_org = LD_UB ( data - ( img_width < < 2 ) ) ;
v16u8 p2 , p1 ;
v8i16 p2_r = { 0 } ;
v8i16 p2_l = { 0 } ;
v8i16 p1_r = { 0 } ;
v8i16 p1_l = { 0 } ;
ILVR_B2_SH ( zero , p3_org , zero , p2_org , p3_org_r , p2_r ) ;
AVC_LPF_P0P1P2_OR_Q0Q1Q2 ( p3_org_r , p0_org_r , q0_org_r , p1_org_r ,
p2_r , q1_org_r , p0_r , p1_r , p2_r ) ;
ILVL_B2_SH ( zero , p3_org , zero , p2_org , p3_org_l , p2_l ) ;
AVC_LPF_P0P1P2_OR_Q0Q1Q2 ( p3_org_l , p0_org_l , q0_org_l , p1_org_l ,
p2_l , q1_org_l , p0_l , p1_l , p2_l ) ;
PCKEV_B3_UB ( p0_l , p0_r , p1_l , p1_r , p2_l , p2_r , p0 , p1 , p2 ) ;
p0_org = __msa_bmnz_v ( p0_org , p0 , is_less_than_beta ) ;
@ -743,62 +716,42 @@ static void avc_loopfilter_luma_intra_edge_hor_msa(uint8_t *data,
ST_UB ( p1_org , data - ( 2 * img_width ) ) ;
ST_UB ( p2_org , data - ( 3 * img_width ) ) ;
}
{
v8u16 negate_is_less_than_beta_r , negate_is_less_than_beta_l ;
negate_is_less_than_beta_r =
( v8u16 ) __msa_sldi_b ( ( v16i8 ) negate_is_less_than_beta , zero , 8 ) ;
if ( ! __msa_test_bz_v ( ( v16u8 ) negate_is_less_than_beta_r ) ) {
AVC_LPF_P0_OR_Q0 ( p0_org_r , q1_org_r , p1_org_r , p0_r ) ;
}
AVC_LPF_P0_OR_Q0 ( p0_org_r , q1_org_r , p1_org_r , p0_r ) ;
AVC_LPF_P0_OR_Q0 ( p0_org_l , q1_org_l , p1_org_l , p0_l ) ;
negate_is_less_than_beta_l =
( v8u16 ) __msa_sldi_b ( zero , ( v16i8 ) negate_is_less_than_beta , 8 ) ;
if ( ! __msa_test_bz_v ( ( v16u8 ) negate_is_less_than_beta_l ) ) {
AVC_LPF_P0_OR_Q0 ( p0_org_l , q1_org_l , p1_org_l , p0_l ) ;
}
}
/* combine */
if ( ! __msa_test_bz_v ( negate_is_less_than_beta ) ) {
p0 = ( v16u8 ) __msa_pckev_b ( ( v16i8 ) p0_l , ( v16i8 ) p0_r ) ;
p0_org = __msa_bmnz_v ( p0_org , p0 , negate_is_less_than_beta ) ;
}
p0 = ( v16u8 ) __msa_pckev_b ( ( v16i8 ) p0_l , ( v16i8 ) p0_r ) ;
p0_org = __msa_bmnz_v ( p0_org , p0 , negate_is_less_than_beta ) ;
ST_UB ( p0_org , data - img_width ) ;
/* if (tmpFlag && (unsigned)ABS(q2-q0) < thresholds->beta_in) */
q3_org = LD_UB ( data + ( 3 * img_width ) ) ;
q2_asub_q0 = __msa_asub_u_b ( q2_org , q0_org ) ;
is_less_than_beta = ( q2_asub_q0 < beta ) ;
is_less_than_beta = ( q2_asub_q0 < beta_in ) ;
is_less_than_beta = is_less_than_beta & tmp_flag ;
negate_is_less_than_beta = __msa_xori_b ( is_less_than_beta , 0xff ) ;
is_less_than_beta = is_less_than_beta & is_less_than ;
negate_is_less_than_beta = negate_is_less_than_beta & is_less_than ;
{
v8u16 is_less_than_beta_l , is_less_than_beta_r ;
is_less_than_beta_r =
( v8u16 ) __msa_sldi_b ( ( v16i8 ) is_less_than_beta , zero , 8 ) ;
if ( ! __msa_test_bz_v ( ( v16u8 ) is_less_than_beta_r ) ) {
v8i16 q3_org_r ;
ILVR_B2_SH ( zero , q3_org , zero , q2_org , q3_org_r , q2_r ) ;
AVC_LPF_P0P1P2_OR_Q0Q1Q2 ( q3_org_r , q0_org_r , p0_org_r , q1_org_r ,
q2_r , p1_org_r , q0_r , q1_r , q2_r ) ;
}
is_less_than_beta_l =
( v8u16 ) __msa_sldi_b ( zero , ( v16i8 ) is_less_than_beta , 8 ) ;
if ( ! __msa_test_bz_v ( ( v16u8 ) is_less_than_beta_l ) ) {
v8i16 q3_org_l ;
ILVL_B2_SH ( zero , q3_org , zero , q2_org , q3_org_l , q2_l ) ;
AVC_LPF_P0P1P2_OR_Q0Q1Q2 ( q3_org_l , q0_org_l , p0_org_l , q1_org_l ,
q2_l , p1_org_l , q0_l , q1_l , q2_l ) ;
}
}
/* combine and store */
if ( ! __msa_test_bz_v ( is_less_than_beta ) ) {
v8i16 q3_org_r , q3_org_l ;
v16u8 q3_org = LD_UB ( data + ( 3 * img_width ) ) ;
v16u8 q1 , q2 ;
v8i16 q2_r = { 0 } ;
v8i16 q2_l = { 0 } ;
v8i16 q1_r = { 0 } ;
v8i16 q1_l = { 0 } ;
ILVR_B2_SH ( zero , q3_org , zero , q2_org , q3_org_r , q2_r ) ;
AVC_LPF_P0P1P2_OR_Q0Q1Q2 ( q3_org_r , q0_org_r , p0_org_r , q1_org_r ,
q2_r , p1_org_r , q0_r , q1_r , q2_r ) ;
ILVL_B2_SH ( zero , q3_org , zero , q2_org , q3_org_l , q2_l ) ;
AVC_LPF_P0P1P2_OR_Q0Q1Q2 ( q3_org_l , q0_org_l , p0_org_l , q1_org_l ,
q2_l , p1_org_l , q0_l , q1_l , q2_l ) ;
PCKEV_B3_UB ( q0_l , q0_r , q1_l , q1_r , q2_l , q2_r , q0 , q1 , q2 ) ;
q0_org = __msa_bmnz_v ( q0_org , q0 , is_less_than_beta ) ;
q1_org = __msa_bmnz_v ( q1_org , q1 , is_less_than_beta ) ;
@ -807,25 +760,14 @@ static void avc_loopfilter_luma_intra_edge_hor_msa(uint8_t *data,
ST_UB ( q1_org , data + img_width ) ;
ST_UB ( q2_org , data + 2 * img_width ) ;
}
{
v8u16 negate_is_less_than_beta_r , negate_is_less_than_beta_l ;
negate_is_less_than_beta_r =
( v8u16 ) __msa_sldi_b ( ( v16i8 ) negate_is_less_than_beta , zero , 8 ) ;
if ( ! __msa_test_bz_v ( ( v16u8 ) negate_is_less_than_beta_r ) ) {
AVC_LPF_P0_OR_Q0 ( q0_org_r , p1_org_r , q1_org_r , q0_r ) ;
}
negate_is_less_than_beta_l =
( v8u16 ) __msa_sldi_b ( zero , ( v16i8 ) negate_is_less_than_beta , 8 ) ;
if ( ! __msa_test_bz_v ( ( v16u8 ) negate_is_less_than_beta_l ) ) {
AVC_LPF_P0_OR_Q0 ( q0_org_l , p1_org_l , q1_org_l , q0_l ) ;
}
}
AVC_LPF_P0_OR_Q0 ( q0_org_r , p1_org_r , q1_org_r , q0_r ) ;
AVC_LPF_P0_OR_Q0 ( q0_org_l , p1_org_l , q1_org_l , q0_l ) ;
/* combine */
if ( ! __msa_test_bz_v ( negate_is_less_than_beta ) ) {
q0 = ( v16u8 ) __msa_pckev_b ( ( v16i8 ) q0_l , ( v16i8 ) q0_r ) ;
q0_org = __msa_bmnz_v ( q0_org , q0 , negate_is_less_than_beta ) ;
}
q0 = ( v16u8 ) __msa_pckev_b ( ( v16i8 ) q0_l , ( v16i8 ) q0_r ) ;
q0_org = __msa_bmnz_v ( q0_org , q0 , negate_is_less_than_beta ) ;
ST_UB ( q0_org , data ) ;
}
}
@ -835,29 +777,13 @@ static void avc_loopfilter_luma_intra_edge_ver_msa(uint8_t *data,
uint8_t beta_in ,
uint32_t img_width )
{
uint8_t * src ;
uint8_t * src = data - 4 ;
v16u8 alpha , beta , p0_asub_q0 ;
v16u8 is_less_than_alpha , is_less_than ;
v16u8 is_less_than_beta , negate_is_less_than_beta ;
v16u8 is_less_than_alpha , is_less_than , is_less_than_beta ;
v16u8 p3_org , p2_org , p1_org , p0_org , q0_org , q1_org , q2_org , q3_org ;
v8i16 p1_org_r , p0_org_r , q0_org_r , q1_org_r ;
v8i16 p1_org_l , p0_org_l , q0_org_l , q1_org_l ;
v8i16 p2_r = { 0 } ;
v8i16 p1_r = { 0 } ;
v8i16 p0_r = { 0 } ;
v8i16 q0_r = { 0 } ;
v8i16 q1_r = { 0 } ;
v8i16 q2_r = { 0 } ;
v8i16 p2_l = { 0 } ;
v8i16 p1_l = { 0 } ;
v8i16 p0_l = { 0 } ;
v8i16 q0_l = { 0 } ;
v8i16 q1_l = { 0 } ;
v8i16 q2_l = { 0 } ;
v16i8 zero = { 0 } ;
v16u8 tmp_flag ;
v16u8 p1_asub_p0 , q1_asub_q0 ;
src = data - 4 ;
{
v16u8 row0 , row1 , row2 , row3 , row4 , row5 , row6 , row7 ;
v16u8 row8 , row9 , row10 , row11 , row12 , row13 , row14 , row15 ;
@ -873,119 +799,77 @@ static void avc_loopfilter_luma_intra_edge_ver_msa(uint8_t *data,
p3_org , p2_org , p1_org , p0_org ,
q0_org , q1_org , q2_org , q3_org ) ;
}
UNPCK_UB_SH ( p1_org , p1_org_r , p1_org_l ) ;
UNPCK_UB_SH ( p0_org , p0_org_r , p0_org_l ) ;
UNPCK_UB_SH ( q0_org , q0_org_r , q0_org_l ) ;
UNPCK_UB_SH ( q1_org , q1_org_r , q1_org_l ) ;
/* if ( ((unsigned)ABS(p0-q0) < thresholds->alpha_in) &&
( ( unsigned ) ABS ( p1 - p0 ) < thresholds - > beta_in ) & &
( ( unsigned ) ABS ( q1 - q0 ) < thresholds - > beta_in ) ) */
{
v16u8 p1_asub_p0 , q1_asub_q0 ;
p0_asub_q0 = __msa_asub_u_b ( p0_org , q0_org ) ;
p1_asub_p0 = __msa_asub_u_b ( p1_org , p0_org ) ;
q1_asub_q0 = __msa_asub_u_b ( q1_org , q0_org ) ;
p0_asub_q0 = __msa_asub_u_b ( p0_org , q0_org ) ;
p1_asub_p0 = __msa_asub_u_b ( p1_org , p0_org ) ;
q1_asub_q0 = __msa_asub_u_b ( q1_org , q0_org ) ;
alpha = ( v16u8 ) __msa_fill_b ( alpha_in ) ;
beta = ( v16u8 ) __msa_fill_b ( beta_in ) ;
alpha = ( v16u8 ) __msa_fill_b ( alpha_in ) ;
beta = ( v16u8 ) __msa_fill_b ( beta_in ) ;
is_less_than_alpha = ( p0_asub_q0 < alpha ) ;
is_less_than_beta = ( p1_asub_p0 < beta ) ;
is_less_than = is_less_than_beta & is_less_than_alpha ;
is_less_than_beta = ( q1_asub_q0 < beta ) ;
is_less_than = is_less_than_beta & is_less_than ;
}
is_less_than_alpha = ( p0_asub_q0 < alpha ) ;
is_less_than_beta = ( p1_asub_p0 < beta ) ;
is_less_than = is_less_than_beta & is_less_than_alpha ;
is_less_than_beta = ( q1_asub_q0 < beta ) ;
is_less_than = is_less_than_beta & is_less_than ;
if ( ! __msa_test_bz_v ( is_less_than ) ) {
v8i16 p0_r = { 0 } ;
v8i16 q0_r = { 0 } ;
v8i16 p0_l = { 0 } ;
v8i16 q0_l = { 0 } ;
v16i8 zero = { 0 } ;
v16u8 tmp_flag , p0 , q0 , p2_asub_p0 , q2_asub_q0 ;
v16u8 negate_is_less_than_beta ;
v8i16 p1_org_r , p0_org_r , q0_org_r , q1_org_r ;
v8i16 p1_org_l , p0_org_l , q0_org_l , q1_org_l ;
UNPCK_UB_SH ( p1_org , p1_org_r , p1_org_l ) ;
UNPCK_UB_SH ( p0_org , p0_org_r , p0_org_l ) ;
UNPCK_UB_SH ( q0_org , q0_org_r , q0_org_l ) ;
UNPCK_UB_SH ( q1_org , q1_org_r , q1_org_l ) ;
tmp_flag = alpha > > 2 ;
tmp_flag = tmp_flag + 2 ;
tmp_flag = ( p0_asub_q0 < tmp_flag ) ;
{
v16u8 p2_asub_p0 ;
p2_asub_p0 = __msa_asub_u_b ( p2_org , p0_org ) ;
is_less_than_beta = ( p2_asub_p0 < beta ) ;
}
p2_asub_p0 = __msa_asub_u_b ( p2_org , p0_org ) ;
is_less_than_beta = ( p2_asub_p0 < beta ) ;
is_less_than_beta = tmp_flag & is_less_than_beta ;
negate_is_less_than_beta = __msa_xori_b ( is_less_than_beta , 0xff ) ;
is_less_than_beta = is_less_than_beta & is_less_than ;
negate_is_less_than_beta = negate_is_less_than_beta & is_less_than ;
/* right */
{
v16u8 is_less_than_beta_r ;
is_less_than_beta_r =
( v16u8 ) __msa_sldi_b ( ( v16i8 ) is_less_than_beta , zero , 8 ) ;
if ( ! __msa_test_bz_v ( is_less_than_beta_r ) ) {
v8i16 p3_org_r ;
ILVR_B2_SH ( zero , p3_org , zero , p2_org , p3_org_r , p2_r ) ;
AVC_LPF_P0P1P2_OR_Q0Q1Q2 ( p3_org_r , p0_org_r , q0_org_r , p1_org_r ,
p2_r , q1_org_r , p0_r , p1_r , p2_r ) ;
}
}
/* left */
{
v16u8 is_less_than_beta_l ;
is_less_than_beta_l =
( v16u8 ) __msa_sldi_b ( zero , ( v16i8 ) is_less_than_beta , 8 ) ;
if ( ! __msa_test_bz_v ( is_less_than_beta_l ) ) {
v8i16 p3_org_l ;
ILVL_B2_SH ( zero , p3_org , zero , p2_org , p3_org_l , p2_l ) ;
AVC_LPF_P0P1P2_OR_Q0Q1Q2 ( p3_org_l , p0_org_l , q0_org_l , p1_org_l ,
p2_l , q1_org_l , p0_l , p1_l , p2_l ) ;
}
}
/* combine and store */
if ( ! __msa_test_bz_v ( is_less_than_beta ) ) {
v16u8 p0 , p2 , p1 ;
v16u8 p2 , p1 ;
v8i16 p3_org_r , p3_org_l ;
v8i16 p2_l = { 0 } ;
v8i16 p2_r = { 0 } ;
v8i16 p1_l = { 0 } ;
v8i16 p1_r = { 0 } ;
ILVR_B2_SH ( zero , p3_org , zero , p2_org , p3_org_r , p2_r ) ;
AVC_LPF_P0P1P2_OR_Q0Q1Q2 ( p3_org_r , p0_org_r , q0_org_r , p1_org_r ,
p2_r , q1_org_r , p0_r , p1_r , p2_r ) ;
ILVL_B2_SH ( zero , p3_org , zero , p2_org , p3_org_l , p2_l ) ;
AVC_LPF_P0P1P2_OR_Q0Q1Q2 ( p3_org_l , p0_org_l , q0_org_l , p1_org_l ,
p2_l , q1_org_l , p0_l , p1_l , p2_l ) ;
PCKEV_B3_UB ( p0_l , p0_r , p1_l , p1_r , p2_l , p2_r , p0 , p1 , p2 ) ;
p0_org = __msa_bmnz_v ( p0_org , p0 , is_less_than_beta ) ;
p1_org = __msa_bmnz_v ( p1_org , p1 , is_less_than_beta ) ;
p2_org = __msa_bmnz_v ( p2_org , p2 , is_less_than_beta ) ;
}
/* right */
{
v16u8 negate_is_less_than_beta_r ;
negate_is_less_than_beta_r =
( v16u8 ) __msa_sldi_b ( ( v16i8 ) negate_is_less_than_beta , zero , 8 ) ;
if ( ! __msa_test_bz_v ( negate_is_less_than_beta_r ) ) {
AVC_LPF_P0_OR_Q0 ( p0_org_r , q1_org_r , p1_org_r , p0_r ) ;
}
}
/* left */
{
v16u8 negate_is_less_than_beta_l ;
negate_is_less_than_beta_l =
( v16u8 ) __msa_sldi_b ( zero , ( v16i8 ) negate_is_less_than_beta , 8 ) ;
if ( ! __msa_test_bz_v ( negate_is_less_than_beta_l ) ) {
AVC_LPF_P0_OR_Q0 ( p0_org_l , q1_org_l , p1_org_l , p0_l ) ;
}
}
AVC_LPF_P0_OR_Q0 ( p0_org_r , q1_org_r , p1_org_r , p0_r ) ;
AVC_LPF_P0_OR_Q0 ( p0_org_l , q1_org_l , p1_org_l , p0_l ) ;
if ( ! __msa_test_bz_v ( negate_is_less_than_beta ) ) {
v16u8 p0 ;
p0 = ( v16u8 ) __msa_pckev_b ( ( v16i8 ) p0_l , ( v16i8 ) p0_r ) ;
p0_org = __msa_bmnz_v ( p0_org , p0 , negate_is_less_than_beta ) ;
p0 = ( v16u8 ) __msa_pckev_b ( ( v16i8 ) p0_l , ( v16i8 ) p0_r ) ;
p0_org = __msa_bmnz_v ( p0_org , p0 , negate_is_less_than_beta ) ;
}
{
v16u8 q2_asub_q0 ;
q2_asub_q0 = __msa_asub_u_b ( q2_org , q0_org ) ;
is_less_than_beta = ( q2_asub_q0 < beta ) ;
}
q2_asub_q0 = __msa_asub_u_b ( q2_org , q0_org ) ;
is_less_than_beta = ( q2_asub_q0 < beta ) ;
is_less_than_beta = is_less_than_beta & tmp_flag ;
negate_is_less_than_beta = __msa_xori_b ( is_less_than_beta , 0xff ) ;
@ -993,37 +877,21 @@ static void avc_loopfilter_luma_intra_edge_ver_msa(uint8_t *data,
is_less_than_beta = is_less_than_beta & is_less_than ;
negate_is_less_than_beta = negate_is_less_than_beta & is_less_than ;
/* right */
{
v16u8 is_less_than_beta_r ;
is_less_than_beta_r =
( v16u8 ) __msa_sldi_b ( ( v16i8 ) is_less_than_beta , zero , 8 ) ;
if ( ! __msa_test_bz_v ( is_less_than_beta_r ) ) {
v8i16 q3_org_r ;
ILVR_B2_SH ( zero , q3_org , zero , q2_org , q3_org_r , q2_r ) ;
AVC_LPF_P0P1P2_OR_Q0Q1Q2 ( q3_org_r , q0_org_r , p0_org_r , q1_org_r ,
q2_r , p1_org_r , q0_r , q1_r , q2_r ) ;
}
}
/* left */
{
v16u8 is_less_than_beta_l ;
if ( ! __msa_test_bz_v ( is_less_than_beta ) ) {
v16u8 q1 , q2 ;
v8i16 q3_org_r , q3_org_l ;
v8i16 q1_l = { 0 } ;
v8i16 q1_r = { 0 } ;
v8i16 q2_l = { 0 } ;
v8i16 q2_r = { 0 } ;
is_less_than_beta_l =
( v16u8 ) __msa_sldi_b ( zero , ( v16i8 ) is_less_than_beta , 8 ) ;
if ( ! __msa_test_bz_v ( is_less_than_beta_l ) ) {
v8i16 q3_org_l ;
ILVR_B2_SH ( zero , q3_org , zero , q2_org , q3_org_r , q2_r ) ;
AVC_LPF_P0P1P2_OR_Q0Q1Q2 ( q3_org_r , q0_org_r , p0_org_r , q1_org_r ,
q2_r , p1_org_r , q0_r , q1_r , q2_r ) ;
ILVL_B2_SH ( zero , q3_org , zero , q2_org , q3_org_l , q2_l ) ;
AVC_LPF_P0P1P2_OR_Q0Q1Q2 ( q3_org_l , q0_org_l , p0_org_l , q1_org_l ,
q2_l , p1_org_l , q0_l , q1_l , q2_l ) ;
}
}
/* combine and store */
if ( ! __msa_test_bz_v ( is_less_than_beta ) ) {
v16u8 q0 , q1 , q2 ;
ILVL_B2_SH ( zero , q3_org , zero , q2_org , q3_org_l , q2_l ) ;
AVC_LPF_P0P1P2_OR_Q0Q1Q2 ( q3_org_l , q0_org_l , p0_org_l , q1_org_l ,
q2_l , p1_org_l , q0_l , q1_l , q2_l ) ;
PCKEV_B3_UB ( q0_l , q0_r , q1_l , q1_r , q2_l , q2_r , q0 , q1 , q2 ) ;
q0_org = __msa_bmnz_v ( q0_org , q0 , is_less_than_beta ) ;
@ -1031,33 +899,12 @@ static void avc_loopfilter_luma_intra_edge_ver_msa(uint8_t *data,
q2_org = __msa_bmnz_v ( q2_org , q2 , is_less_than_beta ) ;
}
/* right */
{
v16u8 negate_is_less_than_beta_r ;
AVC_LPF_P0_OR_Q0 ( q0_org_r , p1_org_r , q1_org_r , q0_r ) ;
AVC_LPF_P0_OR_Q0 ( q0_org_l , p1_org_l , q1_org_l , q0_l ) ;
negate_is_less_than_beta_r =
( v16u8 ) __msa_sldi_b ( ( v16i8 ) negate_is_less_than_beta , zero , 8 ) ;
if ( ! __msa_test_bz_v ( negate_is_less_than_beta_r ) ) {
AVC_LPF_P0_OR_Q0 ( q0_org_r , p1_org_r , q1_org_r , q0_r ) ;
}
}
/* left */
{
v16u8 negate_is_less_than_beta_l ;
q0 = ( v16u8 ) __msa_pckev_b ( ( v16i8 ) q0_l , ( v16i8 ) q0_r ) ;
q0_org = __msa_bmnz_v ( q0_org , q0 , negate_is_less_than_beta ) ;
negate_is_less_than_beta_l =
( v16u8 ) __msa_sldi_b ( zero , ( v16i8 ) negate_is_less_than_beta , 8 ) ;
if ( ! __msa_test_bz_v ( negate_is_less_than_beta_l ) ) {
AVC_LPF_P0_OR_Q0 ( q0_org_l , p1_org_l , q1_org_l , q0_l ) ;
}
}
if ( ! __msa_test_bz_v ( negate_is_less_than_beta ) ) {
v16u8 q0 ;
q0 = ( v16u8 ) __msa_pckev_b ( ( v16i8 ) q0_l , ( v16i8 ) q0_r ) ;
q0_org = __msa_bmnz_v ( q0_org , q0 , negate_is_less_than_beta ) ;
}
}
{
v8i16 tp0 , tp1 , tp2 , tp3 , tmp2 , tmp3 , tmp4 , tmp5 , tmp6 , tmp7 ;
@ -1082,6 +929,7 @@ static void avc_loopfilter_luma_intra_edge_ver_msa(uint8_t *data,
ST4x4_UB ( tmp7 , tmp7 , 0 , 1 , 2 , 3 , src , img_width ) ;
ST2x4_UB ( tmp5 , 4 , src + 4 , img_width ) ;
}
}
}
static void avc_h_loop_filter_luma_mbaff_intra_msa ( uint8_t * src , int32_t stride ,