diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm index 7ab7231f45..d01b134635 100644 --- a/libavcodec/x86/h264_intrapred.asm +++ b/libavcodec/x86/h264_intrapred.asm @@ -1605,6 +1605,123 @@ cglobal pred8x8l_down_right_mmxext, 4,5 por mm0, mm1 movq [r0+r3*1], mm0 RET + +%macro PRED8x8L_DOWN_RIGHT 1 +cglobal pred8x8l_down_right_%1, 4,5 + sub r0, r3 + lea r4, [r0+r3*2] + movq mm0, [r0+r3*1-8] + punpckhbw mm0, [r0+r3*0-8] + movq mm1, [r4+r3*1-8] + punpckhbw mm1, [r0+r3*2-8] + mov r4, r0 + punpckhwd mm1, mm0 + lea r0, [r0+r3*4] + movq mm2, [r0+r3*1-8] + punpckhbw mm2, [r0+r3*0-8] + lea r0, [r0+r3*2] + movq mm3, [r0+r3*1-8] + punpckhbw mm3, [r0+r3*0-8] + punpckhwd mm3, mm2 + punpckhdq mm3, mm1 + lea r0, [r0+r3*2] + movq mm0, [r0+r3*0-8] + movq mm1, [r4] + mov r0, r4 + movq mm4, mm3 + movq mm2, mm3 + PALIGNR mm4, mm0, 7, mm0 + PALIGNR mm1, mm2, 1, mm2 + test r1, r1 + jz .fix_lt_1 + jmp .do_left +.fix_lt_1: + movq mm5, mm3 + pxor mm5, mm4 + psrlq mm5, 56 + psllq mm5, 48 + pxor mm1, mm5 + jmp .do_left +.fix_lt_2: + movq mm5, mm3 + pxor mm5, mm2 + psllq mm5, 56 + psrlq mm5, 56 + pxor mm2, mm5 + test r2, r2 + jnz .do_top +.fix_tr_1: + movq mm5, mm3 + pxor mm5, mm1 + psrlq mm5, 56 + psllq mm5, 56 + pxor mm1, mm5 + jmp .do_top +.do_left: + movq mm0, mm4 + PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5 + movq mm4, mm0 + movq mm7, mm2 + movq2dq xmm3, mm2 + PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5 + psllq mm1, 56 + PALIGNR mm7, mm1, 7, mm3 + movq2dq xmm1, mm7 + movq mm0, [r0-8] + movq mm3, [r0] + movq mm1, [r0+8] + movq mm2, mm3 + movq mm4, mm3 + PALIGNR mm2, mm0, 7, mm0 + PALIGNR mm1, mm4, 1, mm4 + test r1, r1 + jz .fix_lt_2 + test r2, r2 + jz .fix_tr_1 +.do_top: + PRED4x4_LOWPASS mm4, mm2, mm1, mm3, mm5 + movq2dq xmm4, mm4 + lea r1, [r0+r3*2] + movdqa xmm0, xmm3 + pslldq xmm4, 8 + por xmm3, xmm4 + lea r2, [r1+r3*2] + pslldq xmm4, 1 + por xmm1, xmm4 + psrldq xmm0, 7 + pslldq xmm0, 15 + psrldq xmm0, 7 + por xmm1, xmm0 + lea r0, [r2+r3*2] + movdqa xmm2, xmm3 + psrldq xmm2, 1 +INIT_XMM + PRED4x4_LOWPASS xmm0, xmm1, xmm2, xmm3, xmm4 + movdqa xmm1, xmm0 + psrldq xmm1, 1 + movq [r0+r3*2], xmm0 + movq [r0+r3*1], xmm1 + psrldq xmm0, 2 + psrldq xmm1, 2 + movq [r2+r3*2], xmm0 + movq [r2+r3*1], xmm1 + psrldq xmm0, 2 + psrldq xmm1, 2 + movq [r1+r3*2], xmm0 + movq [r1+r3*1], xmm1 + psrldq xmm0, 2 + psrldq xmm1, 2 + movq [r4+r3*2], xmm0 + movq [r4+r3*1], xmm1 + RET +%endmacro + +INIT_MMX +%define PALIGNR PALIGNR_MMX +PRED8x8L_DOWN_RIGHT sse2 +INIT_MMX +%define PALIGNR PALIGNR_SSSE3 +PRED8x8L_DOWN_RIGHT ssse3 %endif ;----------------------------------------------------------------------------- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c index 4f7f2aa0f0..5068a81215 100644 --- a/libavcodec/x86/h264_intrapred_init.c +++ b/libavcodec/x86/h264_intrapred_init.c @@ -70,6 +70,8 @@ void ff_pred8x8l_vertical_ssse3 (uint8_t *src, int has_topleft, int has_topri void ff_pred8x8l_down_left_sse2 (uint8_t *src, int has_topleft, int has_topright, int stride); void ff_pred8x8l_down_left_ssse3 (uint8_t *src, int has_topleft, int has_topright, int stride); void ff_pred8x8l_down_right_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride); +void ff_pred8x8l_down_right_sse2 (uint8_t *src, int has_topleft, int has_topright, int stride); +void ff_pred8x8l_down_right_ssse3 (uint8_t *src, int has_topleft, int has_topright, int stride); void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride); void ff_pred4x4_down_left_mmxext (uint8_t *src, const uint8_t *topright, int stride); void ff_pred4x4_tm_vp8_mmx (uint8_t *src, const uint8_t *topright, int stride); @@ -149,6 +151,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id) h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_sse2; #if CONFIG_GPL h->pred8x8l [DIAG_DOWN_LEFT_PRED] = ff_pred8x8l_down_left_sse2; + h->pred8x8l [DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_sse2; #endif if (codec_id == CODEC_ID_VP8) { h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_sse2; @@ -175,6 +178,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id) h->pred8x8l [HOR_PRED ] = ff_pred8x8l_horizontal_ssse3; h->pred8x8l [VERT_PRED ] = ff_pred8x8l_vertical_ssse3; h->pred8x8l [DIAG_DOWN_LEFT_PRED] = ff_pred8x8l_down_left_ssse3; + h->pred8x8l [DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_ssse3; #endif if (codec_id == CODEC_ID_VP8) { h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_ssse3;