|
|
|
@ -2239,6 +2239,7 @@ static void int32_to_float_fmul_scalar_sse2(float *dst, const int *src, float mu |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void float_to_int16_3dnow(int16_t *dst, const float *src, long len){ |
|
|
|
|
x86_reg reglen = len; |
|
|
|
|
// not bit-exact: pf2id uses different rounding than C and SSE
|
|
|
|
|
__asm__ volatile( |
|
|
|
|
"add %0 , %0 \n\t" |
|
|
|
@ -2257,10 +2258,11 @@ static void float_to_int16_3dnow(int16_t *dst, const float *src, long len){ |
|
|
|
|
"add $16 , %0 \n\t" |
|
|
|
|
" js 1b \n\t" |
|
|
|
|
"femms \n\t" |
|
|
|
|
:"+r"(len), "+r"(dst), "+r"(src) |
|
|
|
|
:"+r"(reglen), "+r"(dst), "+r"(src) |
|
|
|
|
); |
|
|
|
|
} |
|
|
|
|
static void float_to_int16_sse(int16_t *dst, const float *src, long len){ |
|
|
|
|
x86_reg reglen = len; |
|
|
|
|
__asm__ volatile( |
|
|
|
|
"add %0 , %0 \n\t" |
|
|
|
|
"lea (%2,%0,2) , %2 \n\t" |
|
|
|
@ -2278,11 +2280,12 @@ static void float_to_int16_sse(int16_t *dst, const float *src, long len){ |
|
|
|
|
"add $16 , %0 \n\t" |
|
|
|
|
" js 1b \n\t" |
|
|
|
|
"emms \n\t" |
|
|
|
|
:"+r"(len), "+r"(dst), "+r"(src) |
|
|
|
|
:"+r"(reglen), "+r"(dst), "+r"(src) |
|
|
|
|
); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void float_to_int16_sse2(int16_t *dst, const float *src, long len){ |
|
|
|
|
x86_reg reglen = len; |
|
|
|
|
__asm__ volatile( |
|
|
|
|
"add %0 , %0 \n\t" |
|
|
|
|
"lea (%2,%0,2) , %2 \n\t" |
|
|
|
@ -2295,7 +2298,7 @@ static void float_to_int16_sse2(int16_t *dst, const float *src, long len){ |
|
|
|
|
"movdqa %%xmm0 , (%1,%0) \n\t" |
|
|
|
|
"add $16 , %0 \n\t" |
|
|
|
|
" js 1b \n\t" |
|
|
|
|
:"+r"(len), "+r"(dst), "+r"(src) |
|
|
|
|
:"+r"(reglen), "+r"(dst), "+r"(src) |
|
|
|
|
); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -2326,6 +2329,7 @@ static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, lon |
|
|
|
|
if(channels==1)\
|
|
|
|
|
float_to_int16_##cpu(dst, src[0], len);\
|
|
|
|
|
else if(channels==2){\
|
|
|
|
|
x86_reg reglen = len; \
|
|
|
|
|
const float *src0 = src[0];\
|
|
|
|
|
const float *src1 = src[1];\
|
|
|
|
|
__asm__ volatile(\
|
|
|
|
@ -2335,7 +2339,7 @@ static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, lon |
|
|
|
|
"add %0, %3 \n"\
|
|
|
|
|
"neg %0 \n"\
|
|
|
|
|
body\
|
|
|
|
|
:"+r"(len), "+r"(dst), "+r"(src0), "+r"(src1)\
|
|
|
|
|
:"+r"(reglen), "+r"(dst), "+r"(src0), "+r"(src1)\
|
|
|
|
|
);\
|
|
|
|
|
}else if(channels==6){\
|
|
|
|
|
ff_float_to_int16_interleave6_##cpu(dst, src, len);\
|
|
|
|
|