|
|
|
@ -32,8 +32,8 @@ SECTION_TEXT |
|
|
|
|
|
|
|
|
|
; %1 = aligned/unaligned |
|
|
|
|
%macro BSWAP_LOOPS 1 |
|
|
|
|
mov r3, r2 |
|
|
|
|
sar r2, 3 |
|
|
|
|
mov r3d, r2d |
|
|
|
|
sar r2d, 3 |
|
|
|
|
jz .left4_%1 |
|
|
|
|
.loop8_%1: |
|
|
|
|
mov%1 m0, [r1 + 0] |
|
|
|
@ -61,11 +61,11 @@ SECTION_TEXT |
|
|
|
|
%endif |
|
|
|
|
add r0, 32 |
|
|
|
|
add r1, 32 |
|
|
|
|
dec r2 |
|
|
|
|
dec r2d |
|
|
|
|
jnz .loop8_%1 |
|
|
|
|
.left4_%1: |
|
|
|
|
mov r2, r3 |
|
|
|
|
and r3, 4 |
|
|
|
|
mov r2d, r3d |
|
|
|
|
test r3d, 4 |
|
|
|
|
jz .left |
|
|
|
|
mov%1 m0, [r1] |
|
|
|
|
%if cpuflag(ssse3) |
|
|
|
@ -95,7 +95,7 @@ cglobal bswap32_buf, 3,4,5 |
|
|
|
|
mov r3, r1 |
|
|
|
|
%endif |
|
|
|
|
or r3, r0 |
|
|
|
|
and r3, 15 |
|
|
|
|
test r3, 15 |
|
|
|
|
jz .start_align |
|
|
|
|
BSWAP_LOOPS u |
|
|
|
|
jmp .left |
|
|
|
@ -103,8 +103,7 @@ cglobal bswap32_buf, 3,4,5 |
|
|
|
|
BSWAP_LOOPS a |
|
|
|
|
.left: |
|
|
|
|
%if cpuflag(ssse3) |
|
|
|
|
mov r3, r2 |
|
|
|
|
and r2, 2 |
|
|
|
|
test r2d, 2 |
|
|
|
|
jz .left1 |
|
|
|
|
movq m0, [r1] |
|
|
|
|
pshufb m0, m2 |
|
|
|
@ -112,13 +111,13 @@ cglobal bswap32_buf, 3,4,5 |
|
|
|
|
add r1, 8 |
|
|
|
|
add r0, 8 |
|
|
|
|
.left1: |
|
|
|
|
and r3, 1 |
|
|
|
|
test r2d, 1 |
|
|
|
|
jz .end |
|
|
|
|
mov r2d, [r1] |
|
|
|
|
bswap r2d |
|
|
|
|
mov [r0], r2d |
|
|
|
|
%else |
|
|
|
|
and r2, 3 |
|
|
|
|
and r2d, 3 |
|
|
|
|
jz .end |
|
|
|
|
.loop2: |
|
|
|
|
mov r3d, [r1] |
|
|
|
@ -126,7 +125,7 @@ cglobal bswap32_buf, 3,4,5 |
|
|
|
|
mov [r0], r3d |
|
|
|
|
add r1, 4 |
|
|
|
|
add r0, 4 |
|
|
|
|
dec r2 |
|
|
|
|
dec r2d |
|
|
|
|
jnz .loop2 |
|
|
|
|
%endif |
|
|
|
|
.end: |
|
|
|
|