|
|
|
@ -360,50 +360,50 @@ static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, |
|
|
|
|
src -= 2*srcStride;\
|
|
|
|
|
\
|
|
|
|
|
while(w--){\
|
|
|
|
|
__asm__ volatile(\
|
|
|
|
|
"pxor %%mm7, %%mm7 \n\t"\
|
|
|
|
|
"movd (%0), %%mm0 \n\t"\
|
|
|
|
|
"add %2, %0 \n\t"\
|
|
|
|
|
"movd (%0), %%mm1 \n\t"\
|
|
|
|
|
"add %2, %0 \n\t"\
|
|
|
|
|
"movd (%0), %%mm2 \n\t"\
|
|
|
|
|
"add %2, %0 \n\t"\
|
|
|
|
|
"movd (%0), %%mm3 \n\t"\
|
|
|
|
|
"add %2, %0 \n\t"\
|
|
|
|
|
"movd (%0), %%mm4 \n\t"\
|
|
|
|
|
"add %2, %0 \n\t"\
|
|
|
|
|
"punpcklbw %%mm7, %%mm0 \n\t"\
|
|
|
|
|
"punpcklbw %%mm7, %%mm1 \n\t"\
|
|
|
|
|
"punpcklbw %%mm7, %%mm2 \n\t"\
|
|
|
|
|
"punpcklbw %%mm7, %%mm3 \n\t"\
|
|
|
|
|
"punpcklbw %%mm7, %%mm4 \n\t"\
|
|
|
|
|
QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
|
|
|
|
|
QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
|
|
|
|
|
QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
|
|
|
|
|
QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
|
|
|
|
|
QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
|
|
|
|
|
QPEL_H264V(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
|
|
|
|
|
QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
|
|
|
|
|
QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
|
|
|
|
|
"cmpl $16, %4 \n\t"\
|
|
|
|
|
"jne 2f \n\t"\
|
|
|
|
|
__asm__ volatile(\
|
|
|
|
|
"pxor %%mm7, %%mm7 \n\t"\
|
|
|
|
|
"movd (%0), %%mm0 \n\t"\
|
|
|
|
|
"add %2, %0 \n\t"\
|
|
|
|
|
"movd (%0), %%mm1 \n\t"\
|
|
|
|
|
"add %2, %0 \n\t"\
|
|
|
|
|
"movd (%0), %%mm2 \n\t"\
|
|
|
|
|
"add %2, %0 \n\t"\
|
|
|
|
|
"movd (%0), %%mm3 \n\t"\
|
|
|
|
|
"add %2, %0 \n\t"\
|
|
|
|
|
"movd (%0), %%mm4 \n\t"\
|
|
|
|
|
"add %2, %0 \n\t"\
|
|
|
|
|
"punpcklbw %%mm7, %%mm0 \n\t"\
|
|
|
|
|
"punpcklbw %%mm7, %%mm1 \n\t"\
|
|
|
|
|
"punpcklbw %%mm7, %%mm2 \n\t"\
|
|
|
|
|
"punpcklbw %%mm7, %%mm3 \n\t"\
|
|
|
|
|
"punpcklbw %%mm7, %%mm4 \n\t"\
|
|
|
|
|
QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
|
|
|
|
|
QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
|
|
|
|
|
QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
|
|
|
|
|
QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
|
|
|
|
|
QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
|
|
|
|
|
QPEL_H264V(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
|
|
|
|
|
QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
|
|
|
|
|
QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
|
|
|
|
|
"cmpl $16, %4 \n\t"\
|
|
|
|
|
"jne 2f \n\t"\
|
|
|
|
|
QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
|
|
|
|
|
QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
|
|
|
|
|
"2: \n\t"\
|
|
|
|
|
QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
|
|
|
|
|
QPEL_H264V(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
|
|
|
|
|
QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
|
|
|
|
|
QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
|
|
|
|
|
QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
|
|
|
|
|
QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
|
|
|
|
|
"2: \n\t"\
|
|
|
|
|
\
|
|
|
|
|
: "+a"(src), "+c"(dst)\
|
|
|
|
|
: "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "g"(h)\
|
|
|
|
|
: "memory"\
|
|
|
|
|
: "+a"(src), "+c"(dst)\
|
|
|
|
|
: "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "g"(h)\
|
|
|
|
|
: "memory"\
|
|
|
|
|
);\
|
|
|
|
|
src += 4-(h+5)*srcStride;\
|
|
|
|
|
dst += 4-h*dstStride;\
|
|
|
|
|
}\
|
|
|
|
|
src += 4-(h+5)*srcStride;\
|
|
|
|
|
dst += 4-h*dstStride;\
|
|
|
|
|
}\
|
|
|
|
|
}\
|
|
|
|
|
static av_always_inline void OPNAME ## h264_qpel8or16_hv1_lowpass_ ## MMX(int16_t *tmp, uint8_t *src, int tmpStride, int srcStride, int size){\
|
|
|
|
|
int w = (size+8)>>2;\
|
|
|
|
@ -436,18 +436,18 @@ static av_always_inline void OPNAME ## h264_qpel8or16_hv1_lowpass_ ## MMX(int16_ |
|
|
|
|
QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 7*48)\
|
|
|
|
|
"cmpl $16, %3 \n\t"\
|
|
|
|
|
"jne 2f \n\t"\
|
|
|
|
|
QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 8*48)\
|
|
|
|
|
QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 9*48)\
|
|
|
|
|
QPEL_H264HV(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, 10*48)\
|
|
|
|
|
QPEL_H264HV(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, 11*48)\
|
|
|
|
|
QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 12*48)\
|
|
|
|
|
QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 13*48)\
|
|
|
|
|
QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 14*48)\
|
|
|
|
|
QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 15*48)\
|
|
|
|
|
QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 8*48)\
|
|
|
|
|
QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 9*48)\
|
|
|
|
|
QPEL_H264HV(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, 10*48)\
|
|
|
|
|
QPEL_H264HV(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, 11*48)\
|
|
|
|
|
QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 12*48)\
|
|
|
|
|
QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 13*48)\
|
|
|
|
|
QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 14*48)\
|
|
|
|
|
QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 15*48)\
|
|
|
|
|
"2: \n\t"\
|
|
|
|
|
: "+a"(src)\
|
|
|
|
|
: "c"(tmp), "S"((x86_reg)srcStride), "g"(size)\
|
|
|
|
|
: "memory"\
|
|
|
|
|
: "+a"(src)\
|
|
|
|
|
: "c"(tmp), "S"((x86_reg)srcStride), "g"(size)\
|
|
|
|
|
: "memory"\
|
|
|
|
|
);\
|
|
|
|
|
tmp += 4;\
|
|
|
|
|
src += 4 - (size+5)*srcStride;\
|
|
|
|
@ -804,20 +804,20 @@ static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, |
|
|
|
|
QPEL_H264V_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, OP)\
|
|
|
|
|
"cmpl $16, %4 \n\t"\
|
|
|
|
|
"jne 2f \n\t"\
|
|
|
|
|
QPEL_H264V_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, OP)\
|
|
|
|
|
QPEL_H264V_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, OP)\
|
|
|
|
|
QPEL_H264V_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, OP)\
|
|
|
|
|
QPEL_H264V_XMM(%%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, OP)\
|
|
|
|
|
QPEL_H264V_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, OP)\
|
|
|
|
|
QPEL_H264V_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, OP)\
|
|
|
|
|
QPEL_H264V_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, OP)\
|
|
|
|
|
QPEL_H264V_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, OP)\
|
|
|
|
|
QPEL_H264V_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, OP)\
|
|
|
|
|
QPEL_H264V_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, OP)\
|
|
|
|
|
QPEL_H264V_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, OP)\
|
|
|
|
|
QPEL_H264V_XMM(%%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, OP)\
|
|
|
|
|
QPEL_H264V_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, OP)\
|
|
|
|
|
QPEL_H264V_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, OP)\
|
|
|
|
|
QPEL_H264V_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, OP)\
|
|
|
|
|
QPEL_H264V_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, OP)\
|
|
|
|
|
"2: \n\t"\
|
|
|
|
|
\
|
|
|
|
|
: "+a"(src), "+c"(dst)\
|
|
|
|
|
: "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "g"(h)\
|
|
|
|
|
: "memory"\
|
|
|
|
|
);\
|
|
|
|
|
\
|
|
|
|
|
: "+a"(src), "+c"(dst)\
|
|
|
|
|
: "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "g"(h)\
|
|
|
|
|
: "memory"\
|
|
|
|
|
);\
|
|
|
|
|
}\
|
|
|
|
|
static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
|
|
|
|
|
OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 8);\
|
|
|
|
@ -858,19 +858,19 @@ static av_always_inline void put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp, u |
|
|
|
|
QPEL_H264HV_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, 7*48) |
|
|
|
|
"cmpl $16, %3 \n\t" |
|
|
|
|
"jne 2f \n\t" |
|
|
|
|
QPEL_H264HV_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, 8*48) |
|
|
|
|
QPEL_H264HV_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, 9*48) |
|
|
|
|
QPEL_H264HV_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, 10*48) |
|
|
|
|
QPEL_H264HV_XMM(%%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, 11*48) |
|
|
|
|
QPEL_H264HV_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, 12*48) |
|
|
|
|
QPEL_H264HV_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, 13*48) |
|
|
|
|
QPEL_H264HV_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, 14*48) |
|
|
|
|
QPEL_H264HV_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, 15*48) |
|
|
|
|
QPEL_H264HV_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, 8*48) |
|
|
|
|
QPEL_H264HV_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, 9*48) |
|
|
|
|
QPEL_H264HV_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, 10*48) |
|
|
|
|
QPEL_H264HV_XMM(%%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, 11*48) |
|
|
|
|
QPEL_H264HV_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, 12*48) |
|
|
|
|
QPEL_H264HV_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, 13*48) |
|
|
|
|
QPEL_H264HV_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, 14*48) |
|
|
|
|
QPEL_H264HV_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, 15*48) |
|
|
|
|
"2: \n\t" |
|
|
|
|
: "+a"(src) |
|
|
|
|
: "c"(tmp), "S"((x86_reg)srcStride), "g"(size) |
|
|
|
|
: "memory" |
|
|
|
|
); |
|
|
|
|
: "+a"(src) |
|
|
|
|
: "c"(tmp), "S"((x86_reg)srcStride), "g"(size) |
|
|
|
|
: "memory" |
|
|
|
|
); |
|
|
|
|
tmp += 8; |
|
|
|
|
src += 8 - (size+5)*srcStride; |
|
|
|
|
} |
|
|
|
|