@ -25,44 +25,6 @@
SECTION .text
;-------------------------------------------------------------------------------
; int ff_pixelutils_sad_8x8_mmx(const uint8_t *src1, ptrdiff_t stride1,
; const uint8_t *src2, ptrdiff_t stride2);
;-------------------------------------------------------------------------------
INIT_MMX mmx
cglobal pixelutils_sad_8x8 , 4 , 4 , 0 , src1 , stride1 , src2 , stride2
pxor m7 , m7
pxor m6 , m6
% rep 4
mova m0 , [ src1q ]
mova m2 , [ src1q + stride1q ]
mova m1 , [ src2q ]
mova m3 , [ src2q + stride2q ]
psubusb m4 , m0 , m1
psubusb m5 , m2 , m3
psubusb m1 , m0
psubusb m3 , m2
por m1 , m4
por m3 , m5
punpcklbw m0 , m1 , m7
punpcklbw m2 , m3 , m7
punpckhbw m1 , m7
punpckhbw m3 , m7
paddw m0 , m1
paddw m2 , m3
paddw m0 , m2
paddw m6 , m0
lea src1q , [ src1q + 2 * stride1q ]
lea src2q , [ src2q + 2 * stride2q ]
% endrep
psrlq m0 , m6 , 32
paddw m6 , m0
psrlq m0 , m6 , 16
paddw m6 , m0
movd eax , m6
movzx eax , ax
RET
;-------------------------------------------------------------------------------
; int ff_pixelutils_sad_8x8_mmxext(const uint8_t *src1, ptrdiff_t stride1,
; const uint8_t *src2, ptrdiff_t stride2);
@ -83,26 +45,6 @@ cglobal pixelutils_sad_8x8, 4,4,0, src1, stride1, src2, stride2
movd eax , m2
RET
;-------------------------------------------------------------------------------
; int ff_pixelutils_sad_16x16_mmxext(const uint8_t *src1, ptrdiff_t stride1,
; const uint8_t *src2, ptrdiff_t stride2);
;-------------------------------------------------------------------------------
INIT_MMX mmxext
cglobal pixelutils_sad_16x16 , 4 , 4 , 0 , src1 , stride1 , src2 , stride2
pxor m2 , m2
% rep 16
mova m0 , [ src1q ]
mova m1 , [ src1q + 8 ]
psadbw m0 , [ src2q ]
psadbw m1 , [ src2q + 8 ]
paddw m2 , m0
paddw m2 , m1
add src1q , stride1q
add src2q , stride2q
% endrep
movd eax , m2
RET
;-------------------------------------------------------------------------------
; int ff_pixelutils_sad_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1,
; const uint8_t *src2, ptrdiff_t stride2);