mirror of https://github.com/FFmpeg/FFmpeg.git
parent
ca1e36a8e4
commit
9a9e2f1c8a
35 changed files with 694 additions and 404 deletions
@ -0,0 +1,26 @@ |
|||||||
|
/*
|
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#ifndef AVCODEC_ARM_AUDIODSP_ARM_H |
||||||
|
#define AVCODEC_ARM_AUDIODSP_ARM_H |
||||||
|
|
||||||
|
#include "libavcodec/audiodsp.h" |
||||||
|
|
||||||
|
void ff_audiodsp_init_neon(AudioDSPContext *c); |
||||||
|
|
||||||
|
#endif /* AVCODEC_ARM_AUDIODSP_ARM_H */ |
@ -0,0 +1,33 @@ |
|||||||
|
/*
|
||||||
|
* ARM optimized audio functions |
||||||
|
* |
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "libavutil/attributes.h" |
||||||
|
#include "libavutil/cpu.h" |
||||||
|
#include "libavutil/arm/cpu.h" |
||||||
|
#include "libavcodec/audiodsp.h" |
||||||
|
#include "audiodsp_arm.h" |
||||||
|
|
||||||
|
av_cold void ff_audiodsp_init_arm(AudioDSPContext *c) |
||||||
|
{ |
||||||
|
int cpu_flags = av_get_cpu_flags(); |
||||||
|
|
||||||
|
if (have_neon(cpu_flags)) |
||||||
|
ff_audiodsp_init_neon(c); |
||||||
|
} |
@ -0,0 +1,41 @@ |
|||||||
|
/*
|
||||||
|
* ARM NEON optimised audio functions |
||||||
|
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com> |
||||||
|
* |
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include <stdint.h> |
||||||
|
|
||||||
|
#include "libavutil/attributes.h" |
||||||
|
#include "libavcodec/audiodsp.h" |
||||||
|
#include "audiodsp_arm.h" |
||||||
|
|
||||||
|
void ff_vector_clipf_neon(float *dst, const float *src, float min, float max, |
||||||
|
int len); |
||||||
|
void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min, |
||||||
|
int32_t max, unsigned int len); |
||||||
|
|
||||||
|
int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int len); |
||||||
|
|
||||||
|
av_cold void ff_audiodsp_init_neon(AudioDSPContext *c) |
||||||
|
{ |
||||||
|
c->vector_clip_int32 = ff_vector_clip_int32_neon; |
||||||
|
c->vector_clipf = ff_vector_clipf_neon; |
||||||
|
|
||||||
|
c->scalarproduct_int16 = ff_scalarproduct_int16_neon; |
||||||
|
} |
@ -0,0 +1,64 @@ |
|||||||
|
/* |
||||||
|
* ARM NEON optimised audio functions |
||||||
|
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
|
||||||
|
* |
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "libavutil/arm/asm.S" |
||||||
|
|
||||||
|
function ff_vector_clipf_neon, export=1 |
||||||
|
VFP vdup.32 q1, d0[1] |
||||||
|
VFP vdup.32 q0, d0[0] |
||||||
|
NOVFP vdup.32 q0, r2 |
||||||
|
NOVFP vdup.32 q1, r3 |
||||||
|
NOVFP ldr r2, [sp] |
||||||
|
vld1.f32 {q2},[r1,:128]! |
||||||
|
vmin.f32 q10, q2, q1 |
||||||
|
vld1.f32 {q3},[r1,:128]! |
||||||
|
vmin.f32 q11, q3, q1 |
||||||
|
1: vmax.f32 q8, q10, q0 |
||||||
|
vmax.f32 q9, q11, q0 |
||||||
|
subs r2, r2, #8 |
||||||
|
beq 2f |
||||||
|
vld1.f32 {q2},[r1,:128]! |
||||||
|
vmin.f32 q10, q2, q1 |
||||||
|
vld1.f32 {q3},[r1,:128]! |
||||||
|
vmin.f32 q11, q3, q1 |
||||||
|
vst1.f32 {q8},[r0,:128]! |
||||||
|
vst1.f32 {q9},[r0,:128]! |
||||||
|
b 1b |
||||||
|
2: vst1.f32 {q8},[r0,:128]! |
||||||
|
vst1.f32 {q9},[r0,:128]! |
||||||
|
bx lr |
||||||
|
endfunc |
||||||
|
|
||||||
|
function ff_vector_clip_int32_neon, export=1 |
||||||
|
vdup.32 q0, r2 |
||||||
|
vdup.32 q1, r3 |
||||||
|
ldr r2, [sp] |
||||||
|
1: |
||||||
|
vld1.32 {q2-q3}, [r1,:128]! |
||||||
|
vmin.s32 q2, q2, q1 |
||||||
|
vmin.s32 q3, q3, q1 |
||||||
|
vmax.s32 q2, q2, q0 |
||||||
|
vmax.s32 q3, q3, q0 |
||||||
|
vst1.32 {q2-q3}, [r0,:128]! |
||||||
|
subs r2, r2, #8 |
||||||
|
bgt 1b |
||||||
|
bx lr |
||||||
|
endfunc |
@ -0,0 +1,118 @@ |
|||||||
|
/*
|
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include <stdint.h> |
||||||
|
|
||||||
|
#include "libavutil/attributes.h" |
||||||
|
#include "libavutil/common.h" |
||||||
|
#include "audiodsp.h" |
||||||
|
|
||||||
|
static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini, |
||||||
|
uint32_t maxi, uint32_t maxisign) |
||||||
|
{ |
||||||
|
if (a > mini) |
||||||
|
return mini; |
||||||
|
else if ((a ^ (1U << 31)) > maxisign) |
||||||
|
return maxi; |
||||||
|
else |
||||||
|
return a; |
||||||
|
} |
||||||
|
|
||||||
|
static void vector_clipf_c_opposite_sign(float *dst, const float *src, |
||||||
|
float *min, float *max, int len) |
||||||
|
{ |
||||||
|
int i; |
||||||
|
uint32_t mini = *(uint32_t *) min; |
||||||
|
uint32_t maxi = *(uint32_t *) max; |
||||||
|
uint32_t maxisign = maxi ^ (1U << 31); |
||||||
|
uint32_t *dsti = (uint32_t *) dst; |
||||||
|
const uint32_t *srci = (const uint32_t *) src; |
||||||
|
|
||||||
|
for (i = 0; i < len; i += 8) { |
||||||
|
dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign); |
||||||
|
dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign); |
||||||
|
dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign); |
||||||
|
dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign); |
||||||
|
dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign); |
||||||
|
dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign); |
||||||
|
dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign); |
||||||
|
dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
static void vector_clipf_c(float *dst, const float *src, |
||||||
|
float min, float max, int len) |
||||||
|
{ |
||||||
|
int i; |
||||||
|
|
||||||
|
if (min < 0 && max > 0) { |
||||||
|
vector_clipf_c_opposite_sign(dst, src, &min, &max, len); |
||||||
|
} else { |
||||||
|
for (i = 0; i < len; i += 8) { |
||||||
|
dst[i] = av_clipf(src[i], min, max); |
||||||
|
dst[i + 1] = av_clipf(src[i + 1], min, max); |
||||||
|
dst[i + 2] = av_clipf(src[i + 2], min, max); |
||||||
|
dst[i + 3] = av_clipf(src[i + 3], min, max); |
||||||
|
dst[i + 4] = av_clipf(src[i + 4], min, max); |
||||||
|
dst[i + 5] = av_clipf(src[i + 5], min, max); |
||||||
|
dst[i + 6] = av_clipf(src[i + 6], min, max); |
||||||
|
dst[i + 7] = av_clipf(src[i + 7], min, max); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
static int32_t scalarproduct_int16_c(const int16_t *v1, const int16_t *v2, |
||||||
|
int order) |
||||||
|
{ |
||||||
|
int res = 0; |
||||||
|
|
||||||
|
while (order--) |
||||||
|
res += *v1++ **v2++; |
||||||
|
|
||||||
|
return res; |
||||||
|
} |
||||||
|
|
||||||
|
static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min, |
||||||
|
int32_t max, unsigned int len) |
||||||
|
{ |
||||||
|
do { |
||||||
|
*dst++ = av_clip(*src++, min, max); |
||||||
|
*dst++ = av_clip(*src++, min, max); |
||||||
|
*dst++ = av_clip(*src++, min, max); |
||||||
|
*dst++ = av_clip(*src++, min, max); |
||||||
|
*dst++ = av_clip(*src++, min, max); |
||||||
|
*dst++ = av_clip(*src++, min, max); |
||||||
|
*dst++ = av_clip(*src++, min, max); |
||||||
|
*dst++ = av_clip(*src++, min, max); |
||||||
|
len -= 8; |
||||||
|
} while (len > 0); |
||||||
|
} |
||||||
|
|
||||||
|
av_cold void ff_audiodsp_init(AudioDSPContext *c) |
||||||
|
{ |
||||||
|
c->scalarproduct_int16 = scalarproduct_int16_c; |
||||||
|
c->vector_clip_int32 = vector_clip_int32_c; |
||||||
|
c->vector_clipf = vector_clipf_c; |
||||||
|
|
||||||
|
if (ARCH_ARM) |
||||||
|
ff_audiodsp_init_arm(c); |
||||||
|
if (ARCH_PPC) |
||||||
|
ff_audiodsp_init_ppc(c); |
||||||
|
if (ARCH_X86) |
||||||
|
ff_audiodsp_init_x86(c); |
||||||
|
} |
@ -0,0 +1,59 @@ |
|||||||
|
/*
|
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#ifndef AVCODEC_AUDIODSP_H |
||||||
|
#define AVCODEC_AUDIODSP_H |
||||||
|
|
||||||
|
#include <stdint.h> |
||||||
|
|
||||||
|
typedef struct AudioDSPContext { |
||||||
|
/**
|
||||||
|
* Calculate scalar product of two vectors. |
||||||
|
* @param len length of vectors, should be multiple of 16 |
||||||
|
*/ |
||||||
|
int32_t (*scalarproduct_int16)(const int16_t *v1, |
||||||
|
const int16_t *v2 /* align 16 */, int len); |
||||||
|
|
||||||
|
/**
|
||||||
|
* Clip each element in an array of int32_t to a given minimum and |
||||||
|
* maximum value. |
||||||
|
* @param dst destination array |
||||||
|
* constraints: 16-byte aligned |
||||||
|
* @param src source array |
||||||
|
* constraints: 16-byte aligned |
||||||
|
* @param min minimum value |
||||||
|
* constraints: must be in the range [-(1 << 24), 1 << 24] |
||||||
|
* @param max maximum value |
||||||
|
* constraints: must be in the range [-(1 << 24), 1 << 24] |
||||||
|
* @param len number of elements in the array |
||||||
|
* constraints: multiple of 32 greater than zero |
||||||
|
*/ |
||||||
|
void (*vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min, |
||||||
|
int32_t max, unsigned int len); |
||||||
|
/* assume len is a multiple of 8, and arrays are 16-byte aligned */ |
||||||
|
void (*vector_clipf)(float *dst /* align 16 */, |
||||||
|
const float *src /* align 16 */, |
||||||
|
float min, float max, int len /* align 16 */); |
||||||
|
} AudioDSPContext; |
||||||
|
|
||||||
|
void ff_audiodsp_init(AudioDSPContext *c); |
||||||
|
void ff_audiodsp_init_arm(AudioDSPContext *c); |
||||||
|
void ff_audiodsp_init_ppc(AudioDSPContext *c); |
||||||
|
void ff_audiodsp_init_x86(AudioDSPContext *c); |
||||||
|
|
||||||
|
#endif /* AVCODEC_AUDIODSP_H */ |
@ -0,0 +1,137 @@ |
|||||||
|
;****************************************************************************** |
||||||
|
;* optimized audio functions |
||||||
|
;* Copyright (c) 2008 Loren Merritt |
||||||
|
;* |
||||||
|
;* This file is part of Libav. |
||||||
|
;* |
||||||
|
;* Libav is free software; you can redistribute it and/or |
||||||
|
;* modify it under the terms of the GNU Lesser General Public |
||||||
|
;* License as published by the Free Software Foundation; either |
||||||
|
;* version 2.1 of the License, or (at your option) any later version. |
||||||
|
;* |
||||||
|
;* Libav is distributed in the hope that it will be useful, |
||||||
|
;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
;* Lesser General Public License for more details. |
||||||
|
;* |
||||||
|
;* You should have received a copy of the GNU Lesser General Public |
||||||
|
;* License along with Libav; if not, write to the Free Software |
||||||
|
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
;****************************************************************************** |
||||||
|
|
||||||
|
%include "libavutil/x86/x86util.asm" |
||||||
|
|
||||||
|
SECTION_TEXT |
||||||
|
|
||||||
|
%macro SCALARPRODUCT 0 |
||||||
|
; int ff_scalarproduct_int16(int16_t *v1, int16_t *v2, int order) |
||||||
|
cglobal scalarproduct_int16, 3,3,3, v1, v2, order |
||||||
|
shl orderq, 1 |
||||||
|
add v1q, orderq |
||||||
|
add v2q, orderq |
||||||
|
neg orderq |
||||||
|
pxor m2, m2 |
||||||
|
.loop: |
||||||
|
movu m0, [v1q + orderq] |
||||||
|
movu m1, [v1q + orderq + mmsize] |
||||||
|
pmaddwd m0, [v2q + orderq] |
||||||
|
pmaddwd m1, [v2q + orderq + mmsize] |
||||||
|
paddd m2, m0 |
||||||
|
paddd m2, m1 |
||||||
|
add orderq, mmsize*2 |
||||||
|
jl .loop |
||||||
|
%if mmsize == 16 |
||||||
|
movhlps m0, m2 |
||||||
|
paddd m2, m0 |
||||||
|
pshuflw m0, m2, 0x4e |
||||||
|
%else |
||||||
|
pshufw m0, m2, 0x4e |
||||||
|
%endif |
||||||
|
paddd m2, m0 |
||||||
|
movd eax, m2 |
||||||
|
RET |
||||||
|
%endmacro |
||||||
|
|
||||||
|
INIT_MMX mmxext |
||||||
|
SCALARPRODUCT |
||||||
|
INIT_XMM sse2 |
||||||
|
SCALARPRODUCT |
||||||
|
|
||||||
|
|
||||||
|
;----------------------------------------------------------------------------- |
||||||
|
; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min, |
||||||
|
; int32_t max, unsigned int len) |
||||||
|
;----------------------------------------------------------------------------- |
||||||
|
|
||||||
|
; %1 = number of xmm registers used |
||||||
|
; %2 = number of inline load/process/store loops per asm loop |
||||||
|
; %3 = process 4*mmsize (%3=0) or 8*mmsize (%3=1) bytes per loop |
||||||
|
; %4 = CLIPD function takes min/max as float instead of int (CLIPD_SSE2) |
||||||
|
; %5 = suffix |
||||||
|
%macro VECTOR_CLIP_INT32 4-5 |
||||||
|
cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len |
||||||
|
%if %4 |
||||||
|
cvtsi2ss m4, minm |
||||||
|
cvtsi2ss m5, maxm |
||||||
|
%else |
||||||
|
movd m4, minm |
||||||
|
movd m5, maxm |
||||||
|
%endif |
||||||
|
SPLATD m4 |
||||||
|
SPLATD m5 |
||||||
|
.loop: |
||||||
|
%assign %%i 1 |
||||||
|
%rep %2 |
||||||
|
mova m0, [srcq+mmsize*0*%%i] |
||||||
|
mova m1, [srcq+mmsize*1*%%i] |
||||||
|
mova m2, [srcq+mmsize*2*%%i] |
||||||
|
mova m3, [srcq+mmsize*3*%%i] |
||||||
|
%if %3 |
||||||
|
mova m7, [srcq+mmsize*4*%%i] |
||||||
|
mova m8, [srcq+mmsize*5*%%i] |
||||||
|
mova m9, [srcq+mmsize*6*%%i] |
||||||
|
mova m10, [srcq+mmsize*7*%%i] |
||||||
|
%endif |
||||||
|
CLIPD m0, m4, m5, m6 |
||||||
|
CLIPD m1, m4, m5, m6 |
||||||
|
CLIPD m2, m4, m5, m6 |
||||||
|
CLIPD m3, m4, m5, m6 |
||||||
|
%if %3 |
||||||
|
CLIPD m7, m4, m5, m6 |
||||||
|
CLIPD m8, m4, m5, m6 |
||||||
|
CLIPD m9, m4, m5, m6 |
||||||
|
CLIPD m10, m4, m5, m6 |
||||||
|
%endif |
||||||
|
mova [dstq+mmsize*0*%%i], m0 |
||||||
|
mova [dstq+mmsize*1*%%i], m1 |
||||||
|
mova [dstq+mmsize*2*%%i], m2 |
||||||
|
mova [dstq+mmsize*3*%%i], m3 |
||||||
|
%if %3 |
||||||
|
mova [dstq+mmsize*4*%%i], m7 |
||||||
|
mova [dstq+mmsize*5*%%i], m8 |
||||||
|
mova [dstq+mmsize*6*%%i], m9 |
||||||
|
mova [dstq+mmsize*7*%%i], m10 |
||||||
|
%endif |
||||||
|
%assign %%i %%i+1 |
||||||
|
%endrep |
||||||
|
add srcq, mmsize*4*(%2+%3) |
||||||
|
add dstq, mmsize*4*(%2+%3) |
||||||
|
sub lend, mmsize*(%2+%3) |
||||||
|
jg .loop |
||||||
|
REP_RET |
||||||
|
%endmacro |
||||||
|
|
||||||
|
INIT_MMX mmx |
||||||
|
%define CLIPD CLIPD_MMX |
||||||
|
VECTOR_CLIP_INT32 0, 1, 0, 0 |
||||||
|
INIT_XMM sse2 |
||||||
|
VECTOR_CLIP_INT32 6, 1, 0, 0, _int |
||||||
|
%define CLIPD CLIPD_SSE2 |
||||||
|
VECTOR_CLIP_INT32 6, 2, 0, 1 |
||||||
|
INIT_XMM sse4 |
||||||
|
%define CLIPD CLIPD_SSE41 |
||||||
|
%ifdef m8 |
||||||
|
VECTOR_CLIP_INT32 11, 1, 1, 0 |
||||||
|
%else |
||||||
|
VECTOR_CLIP_INT32 6, 1, 0, 0 |
||||||
|
%endif |
@ -0,0 +1,25 @@ |
|||||||
|
/*
|
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#ifndef AVCODEC_X86_AUDIODSP_H |
||||||
|
#define AVCODEC_X86_AUDIODSP_H |
||||||
|
|
||||||
|
void ff_vector_clipf_sse(float *dst, const float *src, |
||||||
|
float min, float max, int len); |
||||||
|
|
||||||
|
#endif /* AVCODEC_X86_AUDIODSP_H */ |
@ -0,0 +1,66 @@ |
|||||||
|
/*
|
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include <stdint.h> |
||||||
|
|
||||||
|
#include "config.h" |
||||||
|
#include "libavutil/attributes.h" |
||||||
|
#include "libavutil/cpu.h" |
||||||
|
#include "libavutil/x86/asm.h" |
||||||
|
#include "libavutil/x86/cpu.h" |
||||||
|
#include "libavcodec/audiodsp.h" |
||||||
|
#include "audiodsp.h" |
||||||
|
|
||||||
|
int32_t ff_scalarproduct_int16_mmxext(const int16_t *v1, const int16_t *v2, |
||||||
|
int order); |
||||||
|
int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2, |
||||||
|
int order); |
||||||
|
|
||||||
|
void ff_vector_clip_int32_mmx(int32_t *dst, const int32_t *src, |
||||||
|
int32_t min, int32_t max, unsigned int len); |
||||||
|
void ff_vector_clip_int32_sse2(int32_t *dst, const int32_t *src, |
||||||
|
int32_t min, int32_t max, unsigned int len); |
||||||
|
void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src, |
||||||
|
int32_t min, int32_t max, unsigned int len); |
||||||
|
void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src, |
||||||
|
int32_t min, int32_t max, unsigned int len); |
||||||
|
|
||||||
|
av_cold void ff_audiodsp_init_x86(AudioDSPContext *c) |
||||||
|
{ |
||||||
|
int cpu_flags = av_get_cpu_flags(); |
||||||
|
|
||||||
|
if (EXTERNAL_MMX(cpu_flags)) |
||||||
|
c->vector_clip_int32 = ff_vector_clip_int32_mmx; |
||||||
|
|
||||||
|
if (EXTERNAL_MMXEXT(cpu_flags)) |
||||||
|
c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext; |
||||||
|
|
||||||
|
if (INLINE_SSE(cpu_flags)) |
||||||
|
c->vector_clipf = ff_vector_clipf_sse; |
||||||
|
|
||||||
|
if (EXTERNAL_SSE2(cpu_flags)) { |
||||||
|
c->scalarproduct_int16 = ff_scalarproduct_int16_sse2; |
||||||
|
if (cpu_flags & AV_CPU_FLAG_ATOM) |
||||||
|
c->vector_clip_int32 = ff_vector_clip_int32_int_sse2; |
||||||
|
else |
||||||
|
c->vector_clip_int32 = ff_vector_clip_int32_sse2; |
||||||
|
} |
||||||
|
|
||||||
|
if (EXTERNAL_SSE4(cpu_flags)) |
||||||
|
c->vector_clip_int32 = ff_vector_clip_int32_sse4; |
||||||
|
} |
@ -0,0 +1,58 @@ |
|||||||
|
/*
|
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "config.h" |
||||||
|
#include "libavutil/x86/asm.h" |
||||||
|
#include "audiodsp.h" |
||||||
|
|
||||||
|
#if HAVE_INLINE_ASM |
||||||
|
|
||||||
|
void ff_vector_clipf_sse(float *dst, const float *src, |
||||||
|
float min, float max, int len) |
||||||
|
{ |
||||||
|
x86_reg i = (len - 16) * 4; |
||||||
|
__asm__ volatile ( |
||||||
|
"movss %3, %%xmm4 \n\t" |
||||||
|
"movss %4, %%xmm5 \n\t" |
||||||
|
"shufps $0, %%xmm4, %%xmm4 \n\t" |
||||||
|
"shufps $0, %%xmm5, %%xmm5 \n\t" |
||||||
|
"1: \n\t" |
||||||
|
"movaps (%2, %0), %%xmm0 \n\t" // 3/1 on intel
|
||||||
|
"movaps 16(%2, %0), %%xmm1 \n\t" |
||||||
|
"movaps 32(%2, %0), %%xmm2 \n\t" |
||||||
|
"movaps 48(%2, %0), %%xmm3 \n\t" |
||||||
|
"maxps %%xmm4, %%xmm0 \n\t" |
||||||
|
"maxps %%xmm4, %%xmm1 \n\t" |
||||||
|
"maxps %%xmm4, %%xmm2 \n\t" |
||||||
|
"maxps %%xmm4, %%xmm3 \n\t" |
||||||
|
"minps %%xmm5, %%xmm0 \n\t" |
||||||
|
"minps %%xmm5, %%xmm1 \n\t" |
||||||
|
"minps %%xmm5, %%xmm2 \n\t" |
||||||
|
"minps %%xmm5, %%xmm3 \n\t" |
||||||
|
"movaps %%xmm0, (%1, %0) \n\t" |
||||||
|
"movaps %%xmm1, 16(%1, %0) \n\t" |
||||||
|
"movaps %%xmm2, 32(%1, %0) \n\t" |
||||||
|
"movaps %%xmm3, 48(%1, %0) \n\t" |
||||||
|
"sub $64, %0 \n\t" |
||||||
|
"jge 1b \n\t" |
||||||
|
: "+&r" (i) |
||||||
|
: "r" (dst), "r" (src), "m" (min), "m" (max) |
||||||
|
: "memory"); |
||||||
|
} |
||||||
|
|
||||||
|
#endif /* HAVE_INLINE_ASM */ |
Loading…
Reference in new issue