mirror of https://github.com/FFmpeg/FFmpeg.git
parent
ca1e36a8e4
commit
9a9e2f1c8a
35 changed files with 694 additions and 404 deletions
@ -0,0 +1,26 @@ |
||||
/*
|
||||
* This file is part of Libav. |
||||
* |
||||
* Libav is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* Libav is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with Libav; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#ifndef AVCODEC_ARM_AUDIODSP_ARM_H |
||||
#define AVCODEC_ARM_AUDIODSP_ARM_H |
||||
|
||||
#include "libavcodec/audiodsp.h" |
||||
|
||||
void ff_audiodsp_init_neon(AudioDSPContext *c); |
||||
|
||||
#endif /* AVCODEC_ARM_AUDIODSP_ARM_H */ |
@ -0,0 +1,33 @@ |
||||
/*
|
||||
* ARM optimized audio functions |
||||
* |
||||
* This file is part of Libav. |
||||
* |
||||
* Libav is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* Libav is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with Libav; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "libavutil/attributes.h" |
||||
#include "libavutil/cpu.h" |
||||
#include "libavutil/arm/cpu.h" |
||||
#include "libavcodec/audiodsp.h" |
||||
#include "audiodsp_arm.h" |
||||
|
||||
av_cold void ff_audiodsp_init_arm(AudioDSPContext *c) |
||||
{ |
||||
int cpu_flags = av_get_cpu_flags(); |
||||
|
||||
if (have_neon(cpu_flags)) |
||||
ff_audiodsp_init_neon(c); |
||||
} |
@ -0,0 +1,41 @@ |
||||
/*
|
||||
* ARM NEON optimised audio functions |
||||
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com> |
||||
* |
||||
* This file is part of Libav. |
||||
* |
||||
* Libav is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* Libav is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with Libav; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include <stdint.h> |
||||
|
||||
#include "libavutil/attributes.h" |
||||
#include "libavcodec/audiodsp.h" |
||||
#include "audiodsp_arm.h" |
||||
|
||||
void ff_vector_clipf_neon(float *dst, const float *src, float min, float max, |
||||
int len); |
||||
void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min, |
||||
int32_t max, unsigned int len); |
||||
|
||||
int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int len); |
||||
|
||||
av_cold void ff_audiodsp_init_neon(AudioDSPContext *c) |
||||
{ |
||||
c->vector_clip_int32 = ff_vector_clip_int32_neon; |
||||
c->vector_clipf = ff_vector_clipf_neon; |
||||
|
||||
c->scalarproduct_int16 = ff_scalarproduct_int16_neon; |
||||
} |
@ -0,0 +1,64 @@ |
||||
/* |
||||
* ARM NEON optimised audio functions |
||||
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
|
||||
* |
||||
* This file is part of Libav. |
||||
* |
||||
* Libav is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* Libav is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with Libav; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "libavutil/arm/asm.S" |
||||
|
||||
function ff_vector_clipf_neon, export=1 |
||||
VFP vdup.32 q1, d0[1] |
||||
VFP vdup.32 q0, d0[0] |
||||
NOVFP vdup.32 q0, r2 |
||||
NOVFP vdup.32 q1, r3 |
||||
NOVFP ldr r2, [sp] |
||||
vld1.f32 {q2},[r1,:128]! |
||||
vmin.f32 q10, q2, q1 |
||||
vld1.f32 {q3},[r1,:128]! |
||||
vmin.f32 q11, q3, q1 |
||||
1: vmax.f32 q8, q10, q0 |
||||
vmax.f32 q9, q11, q0 |
||||
subs r2, r2, #8 |
||||
beq 2f |
||||
vld1.f32 {q2},[r1,:128]! |
||||
vmin.f32 q10, q2, q1 |
||||
vld1.f32 {q3},[r1,:128]! |
||||
vmin.f32 q11, q3, q1 |
||||
vst1.f32 {q8},[r0,:128]! |
||||
vst1.f32 {q9},[r0,:128]! |
||||
b 1b |
||||
2: vst1.f32 {q8},[r0,:128]! |
||||
vst1.f32 {q9},[r0,:128]! |
||||
bx lr |
||||
endfunc |
||||
|
||||
function ff_vector_clip_int32_neon, export=1 |
||||
vdup.32 q0, r2 |
||||
vdup.32 q1, r3 |
||||
ldr r2, [sp] |
||||
1: |
||||
vld1.32 {q2-q3}, [r1,:128]! |
||||
vmin.s32 q2, q2, q1 |
||||
vmin.s32 q3, q3, q1 |
||||
vmax.s32 q2, q2, q0 |
||||
vmax.s32 q3, q3, q0 |
||||
vst1.32 {q2-q3}, [r0,:128]! |
||||
subs r2, r2, #8 |
||||
bgt 1b |
||||
bx lr |
||||
endfunc |
@ -0,0 +1,118 @@ |
||||
/*
|
||||
* This file is part of Libav. |
||||
* |
||||
* Libav is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* Libav is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with Libav; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include <stdint.h> |
||||
|
||||
#include "libavutil/attributes.h" |
||||
#include "libavutil/common.h" |
||||
#include "audiodsp.h" |
||||
|
||||
static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini, |
||||
uint32_t maxi, uint32_t maxisign) |
||||
{ |
||||
if (a > mini) |
||||
return mini; |
||||
else if ((a ^ (1U << 31)) > maxisign) |
||||
return maxi; |
||||
else |
||||
return a; |
||||
} |
||||
|
||||
static void vector_clipf_c_opposite_sign(float *dst, const float *src, |
||||
float *min, float *max, int len) |
||||
{ |
||||
int i; |
||||
uint32_t mini = *(uint32_t *) min; |
||||
uint32_t maxi = *(uint32_t *) max; |
||||
uint32_t maxisign = maxi ^ (1U << 31); |
||||
uint32_t *dsti = (uint32_t *) dst; |
||||
const uint32_t *srci = (const uint32_t *) src; |
||||
|
||||
for (i = 0; i < len; i += 8) { |
||||
dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign); |
||||
dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign); |
||||
dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign); |
||||
dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign); |
||||
dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign); |
||||
dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign); |
||||
dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign); |
||||
dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign); |
||||
} |
||||
} |
||||
|
||||
static void vector_clipf_c(float *dst, const float *src, |
||||
float min, float max, int len) |
||||
{ |
||||
int i; |
||||
|
||||
if (min < 0 && max > 0) { |
||||
vector_clipf_c_opposite_sign(dst, src, &min, &max, len); |
||||
} else { |
||||
for (i = 0; i < len; i += 8) { |
||||
dst[i] = av_clipf(src[i], min, max); |
||||
dst[i + 1] = av_clipf(src[i + 1], min, max); |
||||
dst[i + 2] = av_clipf(src[i + 2], min, max); |
||||
dst[i + 3] = av_clipf(src[i + 3], min, max); |
||||
dst[i + 4] = av_clipf(src[i + 4], min, max); |
||||
dst[i + 5] = av_clipf(src[i + 5], min, max); |
||||
dst[i + 6] = av_clipf(src[i + 6], min, max); |
||||
dst[i + 7] = av_clipf(src[i + 7], min, max); |
||||
} |
||||
} |
||||
} |
||||
|
||||
static int32_t scalarproduct_int16_c(const int16_t *v1, const int16_t *v2, |
||||
int order) |
||||
{ |
||||
int res = 0; |
||||
|
||||
while (order--) |
||||
res += *v1++ **v2++; |
||||
|
||||
return res; |
||||
} |
||||
|
||||
static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min, |
||||
int32_t max, unsigned int len) |
||||
{ |
||||
do { |
||||
*dst++ = av_clip(*src++, min, max); |
||||
*dst++ = av_clip(*src++, min, max); |
||||
*dst++ = av_clip(*src++, min, max); |
||||
*dst++ = av_clip(*src++, min, max); |
||||
*dst++ = av_clip(*src++, min, max); |
||||
*dst++ = av_clip(*src++, min, max); |
||||
*dst++ = av_clip(*src++, min, max); |
||||
*dst++ = av_clip(*src++, min, max); |
||||
len -= 8; |
||||
} while (len > 0); |
||||
} |
||||
|
||||
av_cold void ff_audiodsp_init(AudioDSPContext *c) |
||||
{ |
||||
c->scalarproduct_int16 = scalarproduct_int16_c; |
||||
c->vector_clip_int32 = vector_clip_int32_c; |
||||
c->vector_clipf = vector_clipf_c; |
||||
|
||||
if (ARCH_ARM) |
||||
ff_audiodsp_init_arm(c); |
||||
if (ARCH_PPC) |
||||
ff_audiodsp_init_ppc(c); |
||||
if (ARCH_X86) |
||||
ff_audiodsp_init_x86(c); |
||||
} |
@ -0,0 +1,59 @@ |
||||
/*
|
||||
* This file is part of Libav. |
||||
* |
||||
* Libav is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* Libav is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with Libav; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#ifndef AVCODEC_AUDIODSP_H |
||||
#define AVCODEC_AUDIODSP_H |
||||
|
||||
#include <stdint.h> |
||||
|
||||
typedef struct AudioDSPContext { |
||||
/**
|
||||
* Calculate scalar product of two vectors. |
||||
* @param len length of vectors, should be multiple of 16 |
||||
*/ |
||||
int32_t (*scalarproduct_int16)(const int16_t *v1, |
||||
const int16_t *v2 /* align 16 */, int len); |
||||
|
||||
/**
|
||||
* Clip each element in an array of int32_t to a given minimum and |
||||
* maximum value. |
||||
* @param dst destination array |
||||
* constraints: 16-byte aligned |
||||
* @param src source array |
||||
* constraints: 16-byte aligned |
||||
* @param min minimum value |
||||
* constraints: must be in the range [-(1 << 24), 1 << 24] |
||||
* @param max maximum value |
||||
* constraints: must be in the range [-(1 << 24), 1 << 24] |
||||
* @param len number of elements in the array |
||||
* constraints: multiple of 32 greater than zero |
||||
*/ |
||||
void (*vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min, |
||||
int32_t max, unsigned int len); |
||||
/* assume len is a multiple of 8, and arrays are 16-byte aligned */ |
||||
void (*vector_clipf)(float *dst /* align 16 */, |
||||
const float *src /* align 16 */, |
||||
float min, float max, int len /* align 16 */); |
||||
} AudioDSPContext; |
||||
|
||||
void ff_audiodsp_init(AudioDSPContext *c); |
||||
void ff_audiodsp_init_arm(AudioDSPContext *c); |
||||
void ff_audiodsp_init_ppc(AudioDSPContext *c); |
||||
void ff_audiodsp_init_x86(AudioDSPContext *c); |
||||
|
||||
#endif /* AVCODEC_AUDIODSP_H */ |
@ -0,0 +1,137 @@ |
||||
;****************************************************************************** |
||||
;* optimized audio functions |
||||
;* Copyright (c) 2008 Loren Merritt |
||||
;* |
||||
;* This file is part of Libav. |
||||
;* |
||||
;* Libav is free software; you can redistribute it and/or |
||||
;* modify it under the terms of the GNU Lesser General Public |
||||
;* License as published by the Free Software Foundation; either |
||||
;* version 2.1 of the License, or (at your option) any later version. |
||||
;* |
||||
;* Libav is distributed in the hope that it will be useful, |
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
;* Lesser General Public License for more details. |
||||
;* |
||||
;* You should have received a copy of the GNU Lesser General Public |
||||
;* License along with Libav; if not, write to the Free Software |
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
;****************************************************************************** |
||||
|
||||
%include "libavutil/x86/x86util.asm" |
||||
|
||||
SECTION_TEXT |
||||
|
||||
%macro SCALARPRODUCT 0 |
||||
; int ff_scalarproduct_int16(int16_t *v1, int16_t *v2, int order) |
||||
cglobal scalarproduct_int16, 3,3,3, v1, v2, order |
||||
shl orderq, 1 |
||||
add v1q, orderq |
||||
add v2q, orderq |
||||
neg orderq |
||||
pxor m2, m2 |
||||
.loop: |
||||
movu m0, [v1q + orderq] |
||||
movu m1, [v1q + orderq + mmsize] |
||||
pmaddwd m0, [v2q + orderq] |
||||
pmaddwd m1, [v2q + orderq + mmsize] |
||||
paddd m2, m0 |
||||
paddd m2, m1 |
||||
add orderq, mmsize*2 |
||||
jl .loop |
||||
%if mmsize == 16 |
||||
movhlps m0, m2 |
||||
paddd m2, m0 |
||||
pshuflw m0, m2, 0x4e |
||||
%else |
||||
pshufw m0, m2, 0x4e |
||||
%endif |
||||
paddd m2, m0 |
||||
movd eax, m2 |
||||
RET |
||||
%endmacro |
||||
|
||||
INIT_MMX mmxext |
||||
SCALARPRODUCT |
||||
INIT_XMM sse2 |
||||
SCALARPRODUCT |
||||
|
||||
|
||||
;----------------------------------------------------------------------------- |
||||
; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min, |
||||
; int32_t max, unsigned int len) |
||||
;----------------------------------------------------------------------------- |
||||
|
||||
; %1 = number of xmm registers used |
||||
; %2 = number of inline load/process/store loops per asm loop |
||||
; %3 = process 4*mmsize (%3=0) or 8*mmsize (%3=1) bytes per loop |
||||
; %4 = CLIPD function takes min/max as float instead of int (CLIPD_SSE2) |
||||
; %5 = suffix |
||||
%macro VECTOR_CLIP_INT32 4-5 |
||||
cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len |
||||
%if %4 |
||||
cvtsi2ss m4, minm |
||||
cvtsi2ss m5, maxm |
||||
%else |
||||
movd m4, minm |
||||
movd m5, maxm |
||||
%endif |
||||
SPLATD m4 |
||||
SPLATD m5 |
||||
.loop: |
||||
%assign %%i 1 |
||||
%rep %2 |
||||
mova m0, [srcq+mmsize*0*%%i] |
||||
mova m1, [srcq+mmsize*1*%%i] |
||||
mova m2, [srcq+mmsize*2*%%i] |
||||
mova m3, [srcq+mmsize*3*%%i] |
||||
%if %3 |
||||
mova m7, [srcq+mmsize*4*%%i] |
||||
mova m8, [srcq+mmsize*5*%%i] |
||||
mova m9, [srcq+mmsize*6*%%i] |
||||
mova m10, [srcq+mmsize*7*%%i] |
||||
%endif |
||||
CLIPD m0, m4, m5, m6 |
||||
CLIPD m1, m4, m5, m6 |
||||
CLIPD m2, m4, m5, m6 |
||||
CLIPD m3, m4, m5, m6 |
||||
%if %3 |
||||
CLIPD m7, m4, m5, m6 |
||||
CLIPD m8, m4, m5, m6 |
||||
CLIPD m9, m4, m5, m6 |
||||
CLIPD m10, m4, m5, m6 |
||||
%endif |
||||
mova [dstq+mmsize*0*%%i], m0 |
||||
mova [dstq+mmsize*1*%%i], m1 |
||||
mova [dstq+mmsize*2*%%i], m2 |
||||
mova [dstq+mmsize*3*%%i], m3 |
||||
%if %3 |
||||
mova [dstq+mmsize*4*%%i], m7 |
||||
mova [dstq+mmsize*5*%%i], m8 |
||||
mova [dstq+mmsize*6*%%i], m9 |
||||
mova [dstq+mmsize*7*%%i], m10 |
||||
%endif |
||||
%assign %%i %%i+1 |
||||
%endrep |
||||
add srcq, mmsize*4*(%2+%3) |
||||
add dstq, mmsize*4*(%2+%3) |
||||
sub lend, mmsize*(%2+%3) |
||||
jg .loop |
||||
REP_RET |
||||
%endmacro |
||||
|
||||
INIT_MMX mmx |
||||
%define CLIPD CLIPD_MMX |
||||
VECTOR_CLIP_INT32 0, 1, 0, 0 |
||||
INIT_XMM sse2 |
||||
VECTOR_CLIP_INT32 6, 1, 0, 0, _int |
||||
%define CLIPD CLIPD_SSE2 |
||||
VECTOR_CLIP_INT32 6, 2, 0, 1 |
||||
INIT_XMM sse4 |
||||
%define CLIPD CLIPD_SSE41 |
||||
%ifdef m8 |
||||
VECTOR_CLIP_INT32 11, 1, 1, 0 |
||||
%else |
||||
VECTOR_CLIP_INT32 6, 1, 0, 0 |
||||
%endif |
@ -0,0 +1,25 @@ |
||||
/*
|
||||
* This file is part of Libav. |
||||
* |
||||
* Libav is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* Libav is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with Libav; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#ifndef AVCODEC_X86_AUDIODSP_H |
||||
#define AVCODEC_X86_AUDIODSP_H |
||||
|
||||
void ff_vector_clipf_sse(float *dst, const float *src, |
||||
float min, float max, int len); |
||||
|
||||
#endif /* AVCODEC_X86_AUDIODSP_H */ |
@ -0,0 +1,66 @@ |
||||
/*
|
||||
* This file is part of Libav. |
||||
* |
||||
* Libav is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* Libav is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with Libav; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include <stdint.h> |
||||
|
||||
#include "config.h" |
||||
#include "libavutil/attributes.h" |
||||
#include "libavutil/cpu.h" |
||||
#include "libavutil/x86/asm.h" |
||||
#include "libavutil/x86/cpu.h" |
||||
#include "libavcodec/audiodsp.h" |
||||
#include "audiodsp.h" |
||||
|
||||
int32_t ff_scalarproduct_int16_mmxext(const int16_t *v1, const int16_t *v2, |
||||
int order); |
||||
int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2, |
||||
int order); |
||||
|
||||
void ff_vector_clip_int32_mmx(int32_t *dst, const int32_t *src, |
||||
int32_t min, int32_t max, unsigned int len); |
||||
void ff_vector_clip_int32_sse2(int32_t *dst, const int32_t *src, |
||||
int32_t min, int32_t max, unsigned int len); |
||||
void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src, |
||||
int32_t min, int32_t max, unsigned int len); |
||||
void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src, |
||||
int32_t min, int32_t max, unsigned int len); |
||||
|
||||
av_cold void ff_audiodsp_init_x86(AudioDSPContext *c) |
||||
{ |
||||
int cpu_flags = av_get_cpu_flags(); |
||||
|
||||
if (EXTERNAL_MMX(cpu_flags)) |
||||
c->vector_clip_int32 = ff_vector_clip_int32_mmx; |
||||
|
||||
if (EXTERNAL_MMXEXT(cpu_flags)) |
||||
c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext; |
||||
|
||||
if (INLINE_SSE(cpu_flags)) |
||||
c->vector_clipf = ff_vector_clipf_sse; |
||||
|
||||
if (EXTERNAL_SSE2(cpu_flags)) { |
||||
c->scalarproduct_int16 = ff_scalarproduct_int16_sse2; |
||||
if (cpu_flags & AV_CPU_FLAG_ATOM) |
||||
c->vector_clip_int32 = ff_vector_clip_int32_int_sse2; |
||||
else |
||||
c->vector_clip_int32 = ff_vector_clip_int32_sse2; |
||||
} |
||||
|
||||
if (EXTERNAL_SSE4(cpu_flags)) |
||||
c->vector_clip_int32 = ff_vector_clip_int32_sse4; |
||||
} |
@ -0,0 +1,58 @@ |
||||
/*
|
||||
* This file is part of Libav. |
||||
* |
||||
* Libav is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* Libav is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with Libav; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "config.h" |
||||
#include "libavutil/x86/asm.h" |
||||
#include "audiodsp.h" |
||||
|
||||
#if HAVE_INLINE_ASM |
||||
|
||||
void ff_vector_clipf_sse(float *dst, const float *src, |
||||
float min, float max, int len) |
||||
{ |
||||
x86_reg i = (len - 16) * 4; |
||||
__asm__ volatile ( |
||||
"movss %3, %%xmm4 \n\t" |
||||
"movss %4, %%xmm5 \n\t" |
||||
"shufps $0, %%xmm4, %%xmm4 \n\t" |
||||
"shufps $0, %%xmm5, %%xmm5 \n\t" |
||||
"1: \n\t" |
||||
"movaps (%2, %0), %%xmm0 \n\t" // 3/1 on intel
|
||||
"movaps 16(%2, %0), %%xmm1 \n\t" |
||||
"movaps 32(%2, %0), %%xmm2 \n\t" |
||||
"movaps 48(%2, %0), %%xmm3 \n\t" |
||||
"maxps %%xmm4, %%xmm0 \n\t" |
||||
"maxps %%xmm4, %%xmm1 \n\t" |
||||
"maxps %%xmm4, %%xmm2 \n\t" |
||||
"maxps %%xmm4, %%xmm3 \n\t" |
||||
"minps %%xmm5, %%xmm0 \n\t" |
||||
"minps %%xmm5, %%xmm1 \n\t" |
||||
"minps %%xmm5, %%xmm2 \n\t" |
||||
"minps %%xmm5, %%xmm3 \n\t" |
||||
"movaps %%xmm0, (%1, %0) \n\t" |
||||
"movaps %%xmm1, 16(%1, %0) \n\t" |
||||
"movaps %%xmm2, 32(%1, %0) \n\t" |
||||
"movaps %%xmm3, 48(%1, %0) \n\t" |
||||
"sub $64, %0 \n\t" |
||||
"jge 1b \n\t" |
||||
: "+&r" (i) |
||||
: "r" (dst), "r" (src), "m" (min), "m" (max) |
||||
: "memory"); |
||||
} |
||||
|
||||
#endif /* HAVE_INLINE_ASM */ |
Loading…
Reference in new issue