|
|
|
@ -20,39 +20,29 @@ |
|
|
|
|
|
|
|
|
|
%include "libavutil/x86/x86util.asm" |
|
|
|
|
|
|
|
|
|
SECTION_RODATA |
|
|
|
|
|
|
|
|
|
chr_to_mult: times 4 dw 4663, 0 |
|
|
|
|
chr_to_offset: times 4 dd -9289992 |
|
|
|
|
%define chr_to_shift 12 |
|
|
|
|
|
|
|
|
|
chr_from_mult: times 4 dw 1799, 0 |
|
|
|
|
chr_from_offset: times 4 dd 4081085 |
|
|
|
|
%define chr_from_shift 11 |
|
|
|
|
|
|
|
|
|
lum_to_mult: times 4 dw 19077, 0 |
|
|
|
|
lum_to_offset: times 4 dd -39057361 |
|
|
|
|
%define lum_to_shift 14 |
|
|
|
|
|
|
|
|
|
lum_from_mult: times 4 dw 14071, 0 |
|
|
|
|
lum_from_offset: times 4 dd 33561947 |
|
|
|
|
%define lum_from_shift 14 |
|
|
|
|
|
|
|
|
|
SECTION .text |
|
|
|
|
|
|
|
|
|
;----------------------------------------------------------------------------- |
|
|
|
|
; lumConvertRange |
|
|
|
|
; |
|
|
|
|
; void ff_lumRangeToJpeg_<opt>(int16_t *dst, int width); |
|
|
|
|
; void ff_lumRangeFromJpeg_<opt>(int16_t *dst, int width); |
|
|
|
|
; void ff_lumRangeToJpeg_<opt>(int16_t *dst, int width, |
|
|
|
|
; uint32_t coeff, int64_t offset); |
|
|
|
|
; void ff_lumRangeFromJpeg_<opt>(int16_t *dst, int width, |
|
|
|
|
; uint32_t coeff, int64_t offset); |
|
|
|
|
; |
|
|
|
|
;----------------------------------------------------------------------------- |
|
|
|
|
|
|
|
|
|
%macro LUMCONVERTRANGE 4 |
|
|
|
|
cglobal %1, 2, 2, 5, dst, width |
|
|
|
|
%macro LUMCONVERTRANGE 1 |
|
|
|
|
cglobal lumRange%1Jpeg, 4, 4, 5, dst, width, coeff, offset |
|
|
|
|
shl widthd, 1 |
|
|
|
|
VBROADCASTI128 m2, [%2] |
|
|
|
|
VBROADCASTI128 m3, [%3] |
|
|
|
|
movd xm2, coeffd |
|
|
|
|
VBROADCASTSS m2, xm2 |
|
|
|
|
%if ARCH_X86_64 |
|
|
|
|
movq xm3, offsetq |
|
|
|
|
%else |
|
|
|
|
movq xm3, offsetm |
|
|
|
|
%endif |
|
|
|
|
VBROADCASTSS m3, xm3 |
|
|
|
|
pxor m4, m4 |
|
|
|
|
add dstq, widthq |
|
|
|
|
neg widthq |
|
|
|
@ -64,8 +54,8 @@ cglobal %1, 2, 2, 5, dst, width |
|
|
|
|
pmaddwd m1, m2 |
|
|
|
|
paddd m0, m3 |
|
|
|
|
paddd m1, m3 |
|
|
|
|
psrad m0, %4 |
|
|
|
|
psrad m1, %4 |
|
|
|
|
psrad m0, 14 |
|
|
|
|
psrad m1, 14 |
|
|
|
|
packssdw m0, m1 |
|
|
|
|
movu [dstq+widthq], m0 |
|
|
|
|
add widthq, mmsize |
|
|
|
@ -76,16 +66,24 @@ cglobal %1, 2, 2, 5, dst, width |
|
|
|
|
;----------------------------------------------------------------------------- |
|
|
|
|
; chrConvertRange |
|
|
|
|
; |
|
|
|
|
; void ff_chrRangeToJpeg_<opt>(int16_t *dstU, int16_t *dstV, int width); |
|
|
|
|
; void ff_chrRangeFromJpeg_<opt>(int16_t *dstU, int16_t *dstV, int width); |
|
|
|
|
; void ff_chrRangeToJpeg_<opt>(int16_t *dstU, int16_t *dstV, int width, |
|
|
|
|
; uint32_t coeff, int64_t offset); |
|
|
|
|
; void ff_chrRangeFromJpeg_<opt>(int16_t *dstU, int16_t *dstV, int width, |
|
|
|
|
; uint32_t coeff, int64_t offset); |
|
|
|
|
; |
|
|
|
|
;----------------------------------------------------------------------------- |
|
|
|
|
|
|
|
|
|
%macro CHRCONVERTRANGE 4 |
|
|
|
|
cglobal %1, 3, 3, 7, dstU, dstV, width |
|
|
|
|
%macro CHRCONVERTRANGE 1 |
|
|
|
|
cglobal chrRange%1Jpeg, 5, 5, 7, dstU, dstV, width, coeff, offset |
|
|
|
|
shl widthd, 1 |
|
|
|
|
VBROADCASTI128 m4, [%2] |
|
|
|
|
VBROADCASTI128 m5, [%3] |
|
|
|
|
movd xm4, coeffd |
|
|
|
|
VBROADCASTSS m4, xm4 |
|
|
|
|
%if ARCH_X86_64 |
|
|
|
|
movq xm5, offsetq |
|
|
|
|
%else |
|
|
|
|
movq xm5, offsetm |
|
|
|
|
%endif |
|
|
|
|
VBROADCASTSS m5, xm5 |
|
|
|
|
pxor m6, m6 |
|
|
|
|
add dstUq, widthq |
|
|
|
|
add dstVq, widthq |
|
|
|
@ -105,10 +103,10 @@ cglobal %1, 3, 3, 7, dstU, dstV, width |
|
|
|
|
paddd m1, m5 |
|
|
|
|
paddd m2, m5 |
|
|
|
|
paddd m3, m5 |
|
|
|
|
psrad m0, %4 |
|
|
|
|
psrad m1, %4 |
|
|
|
|
psrad m2, %4 |
|
|
|
|
psrad m3, %4 |
|
|
|
|
psrad m0, 14 |
|
|
|
|
psrad m1, 14 |
|
|
|
|
psrad m2, 14 |
|
|
|
|
psrad m3, 14 |
|
|
|
|
packssdw m0, m1 |
|
|
|
|
packssdw m2, m3 |
|
|
|
|
movu [dstUq+widthq], m0 |
|
|
|
@ -119,15 +117,15 @@ cglobal %1, 3, 3, 7, dstU, dstV, width |
|
|
|
|
%endmacro |
|
|
|
|
|
|
|
|
|
INIT_XMM sse2 |
|
|
|
|
LUMCONVERTRANGE lumRangeToJpeg, lum_to_mult, lum_to_offset, lum_to_shift |
|
|
|
|
CHRCONVERTRANGE chrRangeToJpeg, chr_to_mult, chr_to_offset, chr_to_shift |
|
|
|
|
LUMCONVERTRANGE lumRangeFromJpeg, lum_from_mult, lum_from_offset, lum_from_shift |
|
|
|
|
CHRCONVERTRANGE chrRangeFromJpeg, chr_from_mult, chr_from_offset, chr_from_shift |
|
|
|
|
LUMCONVERTRANGE To |
|
|
|
|
CHRCONVERTRANGE To |
|
|
|
|
LUMCONVERTRANGE From |
|
|
|
|
CHRCONVERTRANGE From |
|
|
|
|
|
|
|
|
|
%if HAVE_AVX2_EXTERNAL |
|
|
|
|
INIT_YMM avx2 |
|
|
|
|
LUMCONVERTRANGE lumRangeToJpeg, lum_to_mult, lum_to_offset, lum_to_shift |
|
|
|
|
CHRCONVERTRANGE chrRangeToJpeg, chr_to_mult, chr_to_offset, chr_to_shift |
|
|
|
|
LUMCONVERTRANGE lumRangeFromJpeg, lum_from_mult, lum_from_offset, lum_from_shift |
|
|
|
|
CHRCONVERTRANGE chrRangeFromJpeg, chr_from_mult, chr_from_offset, chr_from_shift |
|
|
|
|
LUMCONVERTRANGE To |
|
|
|
|
CHRCONVERTRANGE To |
|
|
|
|
LUMCONVERTRANGE From |
|
|
|
|
CHRCONVERTRANGE From |
|
|
|
|
%endif |
|
|
|
|