|
|
|
@ -21,7 +21,7 @@ |
|
|
|
|
#include "libavutil/mips/generic_macros_msa.h" |
|
|
|
|
#include "h264dsp_mips.h" |
|
|
|
|
|
|
|
|
|
static void avc_wgt_4x2_msa(uint8_t *data, int32_t stride, |
|
|
|
|
static void avc_wgt_4x2_msa(uint8_t *data, ptrdiff_t stride, |
|
|
|
|
int32_t log2_denom, int32_t src_weight, |
|
|
|
|
int32_t offset_in) |
|
|
|
|
{ |
|
|
|
@ -48,8 +48,9 @@ static void avc_wgt_4x2_msa(uint8_t *data, int32_t stride, |
|
|
|
|
ST_W2(src0, 0, 1, data, stride); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void avc_wgt_4x4_msa(uint8_t *data, int32_t stride, int32_t log2_denom, |
|
|
|
|
int32_t src_weight, int32_t offset_in) |
|
|
|
|
static void avc_wgt_4x4_msa(uint8_t *data, ptrdiff_t stride, |
|
|
|
|
int32_t log2_denom, int32_t src_weight, |
|
|
|
|
int32_t offset_in) |
|
|
|
|
{ |
|
|
|
|
uint32_t tp0, tp1, tp2, tp3, offset_val; |
|
|
|
|
v16u8 src0 = { 0 }; |
|
|
|
@ -74,8 +75,9 @@ static void avc_wgt_4x4_msa(uint8_t *data, int32_t stride, int32_t log2_denom, |
|
|
|
|
ST_W4(src0, 0, 1, 2, 3, data, stride); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void avc_wgt_4x8_msa(uint8_t *data, int32_t stride, int32_t log2_denom, |
|
|
|
|
int32_t src_weight, int32_t offset_in) |
|
|
|
|
static void avc_wgt_4x8_msa(uint8_t *data, ptrdiff_t stride, |
|
|
|
|
int32_t log2_denom, int32_t src_weight, |
|
|
|
|
int32_t offset_in) |
|
|
|
|
{ |
|
|
|
|
uint32_t tp0, tp1, tp2, tp3, offset_val; |
|
|
|
|
v16u8 src0 = { 0 }, src1 = { 0 }; |
|
|
|
@ -105,8 +107,9 @@ static void avc_wgt_4x8_msa(uint8_t *data, int32_t stride, int32_t log2_denom, |
|
|
|
|
ST_W8(src0, src1, 0, 1, 2, 3, 0, 1, 2, 3, data, stride); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void avc_wgt_8x4_msa(uint8_t *data, int32_t stride, int32_t log2_denom, |
|
|
|
|
int32_t src_weight, int32_t offset_in) |
|
|
|
|
static void avc_wgt_8x4_msa(uint8_t *data, ptrdiff_t stride, |
|
|
|
|
int32_t log2_denom, int32_t src_weight, |
|
|
|
|
int32_t offset_in) |
|
|
|
|
{ |
|
|
|
|
uint32_t offset_val; |
|
|
|
|
uint64_t tp0, tp1, tp2, tp3; |
|
|
|
@ -136,7 +139,7 @@ static void avc_wgt_8x4_msa(uint8_t *data, int32_t stride, int32_t log2_denom, |
|
|
|
|
ST_D4(src0, src1, 0, 1, 0, 1, data, stride); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void avc_wgt_8x8_msa(uint8_t *data, int32_t stride, int32_t log2_denom, |
|
|
|
|
static void avc_wgt_8x8_msa(uint8_t *data, ptrdiff_t stride, int32_t log2_denom, |
|
|
|
|
int32_t src_weight, int32_t offset_in) |
|
|
|
|
{ |
|
|
|
|
uint32_t offset_val; |
|
|
|
@ -178,8 +181,9 @@ static void avc_wgt_8x8_msa(uint8_t *data, int32_t stride, int32_t log2_denom, |
|
|
|
|
ST_D8(src0, src1, src2, src3, 0, 1, 0, 1, 0, 1, 0, 1, data, stride); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void avc_wgt_8x16_msa(uint8_t *data, int32_t stride, int32_t log2_denom, |
|
|
|
|
int32_t src_weight, int32_t offset_in) |
|
|
|
|
static void avc_wgt_8x16_msa(uint8_t *data, ptrdiff_t stride, |
|
|
|
|
int32_t log2_denom, int32_t src_weight, |
|
|
|
|
int32_t offset_in) |
|
|
|
|
{ |
|
|
|
|
uint32_t offset_val, cnt; |
|
|
|
|
uint64_t tp0, tp1, tp2, tp3; |
|
|
|
@ -223,7 +227,7 @@ static void avc_wgt_8x16_msa(uint8_t *data, int32_t stride, int32_t log2_denom, |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void avc_biwgt_4x2_msa(uint8_t *src, uint8_t *dst, int32_t stride, |
|
|
|
|
static void avc_biwgt_4x2_msa(uint8_t *src, uint8_t *dst, ptrdiff_t stride, |
|
|
|
|
int32_t log2_denom, int32_t src_weight, |
|
|
|
|
int32_t dst_weight, int32_t offset_in) |
|
|
|
|
{ |
|
|
|
@ -256,7 +260,7 @@ static void avc_biwgt_4x2_msa(uint8_t *src, uint8_t *dst, int32_t stride, |
|
|
|
|
ST_W2(dst0, 0, 1, dst, stride); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void avc_biwgt_4x4_msa(uint8_t *src, uint8_t *dst, int32_t stride, |
|
|
|
|
static void avc_biwgt_4x4_msa(uint8_t *src, uint8_t *dst, ptrdiff_t stride, |
|
|
|
|
int32_t log2_denom, int32_t src_weight, |
|
|
|
|
int32_t dst_weight, int32_t offset_in) |
|
|
|
|
{ |
|
|
|
@ -290,7 +294,7 @@ static void avc_biwgt_4x4_msa(uint8_t *src, uint8_t *dst, int32_t stride, |
|
|
|
|
ST_W4(dst0, 0, 1, 2, 3, dst, stride); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void avc_biwgt_4x8_msa(uint8_t *src, uint8_t *dst, int32_t stride, |
|
|
|
|
static void avc_biwgt_4x8_msa(uint8_t *src, uint8_t *dst, ptrdiff_t stride, |
|
|
|
|
int32_t log2_denom, int32_t src_weight, |
|
|
|
|
int32_t dst_weight, int32_t offset_in) |
|
|
|
|
{ |
|
|
|
@ -330,7 +334,7 @@ static void avc_biwgt_4x8_msa(uint8_t *src, uint8_t *dst, int32_t stride, |
|
|
|
|
ST_W8(dst0, dst1, 0, 1, 2, 3, 0, 1, 2, 3, dst, stride); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void avc_biwgt_8x4_msa(uint8_t *src, uint8_t *dst, int32_t stride, |
|
|
|
|
static void avc_biwgt_8x4_msa(uint8_t *src, uint8_t *dst, ptrdiff_t stride, |
|
|
|
|
int32_t log2_denom, int32_t src_weight, |
|
|
|
|
int32_t dst_weight, int32_t offset_in) |
|
|
|
|
{ |
|
|
|
@ -368,7 +372,7 @@ static void avc_biwgt_8x4_msa(uint8_t *src, uint8_t *dst, int32_t stride, |
|
|
|
|
ST_D4(dst0, dst1, 0, 1, 0, 1, dst, stride); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void avc_biwgt_8x8_msa(uint8_t *src, uint8_t *dst, int32_t stride, |
|
|
|
|
static void avc_biwgt_8x8_msa(uint8_t *src, uint8_t *dst, ptrdiff_t stride, |
|
|
|
|
int32_t log2_denom, int32_t src_weight, |
|
|
|
|
int32_t dst_weight, int32_t offset_in) |
|
|
|
|
{ |
|
|
|
@ -419,7 +423,7 @@ static void avc_biwgt_8x8_msa(uint8_t *src, uint8_t *dst, int32_t stride, |
|
|
|
|
ST_D8(dst0, dst1, dst2, dst3, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void avc_biwgt_8x16_msa(uint8_t *src, uint8_t *dst, int32_t stride, |
|
|
|
|
static void avc_biwgt_8x16_msa(uint8_t *src, uint8_t *dst, ptrdiff_t stride, |
|
|
|
|
int32_t log2_denom, int32_t src_weight, |
|
|
|
|
int32_t dst_weight, int32_t offset_in) |
|
|
|
|
{ |
|
|
|
@ -679,7 +683,7 @@ static void avc_biwgt_8x16_msa(uint8_t *src, uint8_t *dst, int32_t stride, |
|
|
|
|
static void avc_loopfilter_luma_intra_edge_hor_msa(uint8_t *data, |
|
|
|
|
uint8_t alpha_in, |
|
|
|
|
uint8_t beta_in, |
|
|
|
|
uint32_t img_width) |
|
|
|
|
ptrdiff_t img_width) |
|
|
|
|
{ |
|
|
|
|
v16u8 p0_asub_q0, p1_asub_p0, q1_asub_q0; |
|
|
|
|
v16u8 is_less_than, is_less_than_beta, is_less_than_alpha; |
|
|
|
@ -812,7 +816,7 @@ static void avc_loopfilter_luma_intra_edge_hor_msa(uint8_t *data, |
|
|
|
|
static void avc_loopfilter_luma_intra_edge_ver_msa(uint8_t *data, |
|
|
|
|
uint8_t alpha_in, |
|
|
|
|
uint8_t beta_in, |
|
|
|
|
uint32_t img_width) |
|
|
|
|
ptrdiff_t img_width) |
|
|
|
|
{ |
|
|
|
|
uint8_t *src = data - 4; |
|
|
|
|
v16u8 alpha, beta, p0_asub_q0; |
|
|
|
@ -969,7 +973,8 @@ static void avc_loopfilter_luma_intra_edge_ver_msa(uint8_t *data, |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void avc_h_loop_filter_luma_mbaff_intra_msa(uint8_t *src, int32_t stride, |
|
|
|
|
static void avc_h_loop_filter_luma_mbaff_intra_msa(uint8_t *src, |
|
|
|
|
ptrdiff_t stride, |
|
|
|
|
int32_t alpha_in, |
|
|
|
|
int32_t beta_in) |
|
|
|
|
{ |
|
|
|
@ -1171,7 +1176,7 @@ static void avc_h_loop_filter_luma_mbaff_intra_msa(uint8_t *src, int32_t stride, |
|
|
|
|
static void avc_loopfilter_cb_or_cr_intra_edge_hor_msa(uint8_t *data_cb_or_cr, |
|
|
|
|
uint8_t alpha_in, |
|
|
|
|
uint8_t beta_in, |
|
|
|
|
uint32_t img_width) |
|
|
|
|
ptrdiff_t img_width) |
|
|
|
|
{ |
|
|
|
|
v16u8 alpha, beta; |
|
|
|
|
v16u8 is_less_than; |
|
|
|
@ -1220,7 +1225,7 @@ static void avc_loopfilter_cb_or_cr_intra_edge_hor_msa(uint8_t *data_cb_or_cr, |
|
|
|
|
static void avc_loopfilter_cb_or_cr_intra_edge_ver_msa(uint8_t *data_cb_or_cr, |
|
|
|
|
uint8_t alpha_in, |
|
|
|
|
uint8_t beta_in, |
|
|
|
|
uint32_t img_width) |
|
|
|
|
ptrdiff_t img_width) |
|
|
|
|
{ |
|
|
|
|
v8i16 tmp1; |
|
|
|
|
v16u8 alpha, beta, is_less_than; |
|
|
|
@ -1286,7 +1291,7 @@ static void avc_loopfilter_luma_inter_edge_ver_msa(uint8_t *data, |
|
|
|
|
uint8_t tc2, uint8_t tc3, |
|
|
|
|
uint8_t alpha_in, |
|
|
|
|
uint8_t beta_in, |
|
|
|
|
uint32_t img_width) |
|
|
|
|
ptrdiff_t img_width) |
|
|
|
|
{ |
|
|
|
|
v16u8 tmp_vec, bs = { 0 }; |
|
|
|
|
|
|
|
|
@ -1566,7 +1571,7 @@ static void avc_loopfilter_luma_inter_edge_hor_msa(uint8_t *data, |
|
|
|
|
uint8_t tc2, uint8_t tc3, |
|
|
|
|
uint8_t alpha_in, |
|
|
|
|
uint8_t beta_in, |
|
|
|
|
uint32_t image_width) |
|
|
|
|
ptrdiff_t image_width) |
|
|
|
|
{ |
|
|
|
|
v16u8 tmp_vec; |
|
|
|
|
v16u8 bs = { 0 }; |
|
|
|
@ -1715,7 +1720,7 @@ static void avc_loopfilter_luma_inter_edge_hor_msa(uint8_t *data, |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void avc_h_loop_filter_luma_mbaff_msa(uint8_t *in, int32_t stride, |
|
|
|
|
static void avc_h_loop_filter_luma_mbaff_msa(uint8_t *in, ptrdiff_t stride, |
|
|
|
|
int32_t alpha_in, int32_t beta_in, |
|
|
|
|
int8_t *tc0) |
|
|
|
|
{ |
|
|
|
@ -1941,7 +1946,7 @@ static void avc_loopfilter_cb_or_cr_inter_edge_hor_msa(uint8_t *data, |
|
|
|
|
uint8_t tc2, uint8_t tc3, |
|
|
|
|
uint8_t alpha_in, |
|
|
|
|
uint8_t beta_in, |
|
|
|
|
uint32_t img_width) |
|
|
|
|
ptrdiff_t img_width) |
|
|
|
|
{ |
|
|
|
|
v16u8 alpha, beta; |
|
|
|
|
v8i16 tmp_vec; |
|
|
|
@ -2027,7 +2032,7 @@ static void avc_loopfilter_cb_or_cr_inter_edge_ver_msa(uint8_t *data, |
|
|
|
|
uint8_t tc2, uint8_t tc3, |
|
|
|
|
uint8_t alpha_in, |
|
|
|
|
uint8_t beta_in, |
|
|
|
|
uint32_t img_width) |
|
|
|
|
ptrdiff_t img_width) |
|
|
|
|
{ |
|
|
|
|
uint8_t *src; |
|
|
|
|
v16u8 alpha, beta; |
|
|
|
@ -2115,7 +2120,7 @@ static void avc_loopfilter_cb_or_cr_inter_edge_ver_msa(uint8_t *data, |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void avc_h_loop_filter_chroma422_msa(uint8_t *src, int32_t stride, |
|
|
|
|
static void avc_h_loop_filter_chroma422_msa(uint8_t *src, ptrdiff_t stride, |
|
|
|
|
int32_t alpha_in, int32_t beta_in, |
|
|
|
|
int8_t *tc0) |
|
|
|
|
{ |
|
|
|
@ -2139,7 +2144,8 @@ static void avc_h_loop_filter_chroma422_msa(uint8_t *src, int32_t stride, |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void avc_h_loop_filter_chroma422_mbaff_msa(uint8_t *src, int32_t stride, |
|
|
|
|
static void avc_h_loop_filter_chroma422_mbaff_msa(uint8_t *src, |
|
|
|
|
ptrdiff_t stride, |
|
|
|
|
int32_t alpha_in, |
|
|
|
|
int32_t beta_in, |
|
|
|
|
int8_t *tc0) |
|
|
|
@ -2171,7 +2177,7 @@ static void avc_h_loop_filter_chroma422_mbaff_msa(uint8_t *src, int32_t stride, |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void ff_h264_h_lpf_luma_inter_msa(uint8_t *data, int img_width, |
|
|
|
|
void ff_h264_h_lpf_luma_inter_msa(uint8_t *data, ptrdiff_t img_width, |
|
|
|
|
int alpha, int beta, int8_t *tc) |
|
|
|
|
{ |
|
|
|
|
uint8_t bs0 = 1; |
|
|
|
@ -2193,7 +2199,7 @@ void ff_h264_h_lpf_luma_inter_msa(uint8_t *data, int img_width, |
|
|
|
|
alpha, beta, img_width); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void ff_h264_v_lpf_luma_inter_msa(uint8_t *data, int img_width, |
|
|
|
|
void ff_h264_v_lpf_luma_inter_msa(uint8_t *data, ptrdiff_t img_width, |
|
|
|
|
int alpha, int beta, int8_t *tc) |
|
|
|
|
{ |
|
|
|
|
|
|
|
|
@ -2216,7 +2222,7 @@ void ff_h264_v_lpf_luma_inter_msa(uint8_t *data, int img_width, |
|
|
|
|
alpha, beta, img_width); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void ff_h264_h_lpf_chroma_inter_msa(uint8_t *data, int img_width, |
|
|
|
|
void ff_h264_h_lpf_chroma_inter_msa(uint8_t *data, ptrdiff_t img_width, |
|
|
|
|
int alpha, int beta, int8_t *tc) |
|
|
|
|
{ |
|
|
|
|
uint8_t bs0 = 1; |
|
|
|
@ -2238,7 +2244,7 @@ void ff_h264_h_lpf_chroma_inter_msa(uint8_t *data, int img_width, |
|
|
|
|
alpha, beta, img_width); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void ff_h264_v_lpf_chroma_inter_msa(uint8_t *data, int img_width, |
|
|
|
|
void ff_h264_v_lpf_chroma_inter_msa(uint8_t *data, ptrdiff_t img_width, |
|
|
|
|
int alpha, int beta, int8_t *tc) |
|
|
|
|
{ |
|
|
|
|
uint8_t bs0 = 1; |
|
|
|
@ -2260,40 +2266,40 @@ void ff_h264_v_lpf_chroma_inter_msa(uint8_t *data, int img_width, |
|
|
|
|
alpha, beta, img_width); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void ff_h264_h_lpf_luma_intra_msa(uint8_t *data, int img_width, |
|
|
|
|
void ff_h264_h_lpf_luma_intra_msa(uint8_t *data, ptrdiff_t img_width, |
|
|
|
|
int alpha, int beta) |
|
|
|
|
{ |
|
|
|
|
avc_loopfilter_luma_intra_edge_ver_msa(data, (uint8_t) alpha, |
|
|
|
|
(uint8_t) beta, |
|
|
|
|
(unsigned int) img_width); |
|
|
|
|
img_width); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void ff_h264_v_lpf_luma_intra_msa(uint8_t *data, int img_width, |
|
|
|
|
void ff_h264_v_lpf_luma_intra_msa(uint8_t *data, ptrdiff_t img_width, |
|
|
|
|
int alpha, int beta) |
|
|
|
|
{ |
|
|
|
|
avc_loopfilter_luma_intra_edge_hor_msa(data, (uint8_t) alpha, |
|
|
|
|
(uint8_t) beta, |
|
|
|
|
(unsigned int) img_width); |
|
|
|
|
img_width); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void ff_h264_h_lpf_chroma_intra_msa(uint8_t *data, int img_width, |
|
|
|
|
void ff_h264_h_lpf_chroma_intra_msa(uint8_t *data, ptrdiff_t img_width, |
|
|
|
|
int alpha, int beta) |
|
|
|
|
{ |
|
|
|
|
avc_loopfilter_cb_or_cr_intra_edge_ver_msa(data, (uint8_t) alpha, |
|
|
|
|
(uint8_t) beta, |
|
|
|
|
(unsigned int) img_width); |
|
|
|
|
img_width); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void ff_h264_v_lpf_chroma_intra_msa(uint8_t *data, int img_width, |
|
|
|
|
void ff_h264_v_lpf_chroma_intra_msa(uint8_t *data, ptrdiff_t img_width, |
|
|
|
|
int alpha, int beta) |
|
|
|
|
{ |
|
|
|
|
avc_loopfilter_cb_or_cr_intra_edge_hor_msa(data, (uint8_t) alpha, |
|
|
|
|
(uint8_t) beta, |
|
|
|
|
(unsigned int) img_width); |
|
|
|
|
img_width); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void ff_h264_h_loop_filter_chroma422_msa(uint8_t *src, |
|
|
|
|
int32_t ystride, |
|
|
|
|
ptrdiff_t ystride, |
|
|
|
|
int32_t alpha, int32_t beta, |
|
|
|
|
int8_t *tc0) |
|
|
|
|
{ |
|
|
|
@ -2301,7 +2307,7 @@ void ff_h264_h_loop_filter_chroma422_msa(uint8_t *src, |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void ff_h264_h_loop_filter_chroma422_mbaff_msa(uint8_t *src, |
|
|
|
|
int32_t ystride, |
|
|
|
|
ptrdiff_t ystride, |
|
|
|
|
int32_t alpha, |
|
|
|
|
int32_t beta, |
|
|
|
|
int8_t *tc0) |
|
|
|
@ -2310,7 +2316,7 @@ void ff_h264_h_loop_filter_chroma422_mbaff_msa(uint8_t *src, |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void ff_h264_h_loop_filter_luma_mbaff_msa(uint8_t *src, |
|
|
|
|
int32_t ystride, |
|
|
|
|
ptrdiff_t ystride, |
|
|
|
|
int32_t alpha, |
|
|
|
|
int32_t beta, |
|
|
|
|
int8_t *tc0) |
|
|
|
@ -2319,7 +2325,7 @@ void ff_h264_h_loop_filter_luma_mbaff_msa(uint8_t *src, |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void ff_h264_h_loop_filter_luma_mbaff_intra_msa(uint8_t *src, |
|
|
|
|
int32_t ystride, |
|
|
|
|
ptrdiff_t ystride, |
|
|
|
|
int32_t alpha, |
|
|
|
|
int32_t beta) |
|
|
|
|
{ |
|
|
|
|