mirror of https://github.com/FFmpeg/FFmpeg.git
Also add an SSE2 version Signed-off-by: James Almer <jamrial@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>pull/76/merge
parent
19b79c1429
commit
dad31083ae
6 changed files with 107 additions and 78 deletions
@ -0,0 +1,61 @@ |
||||
;****************************************************************************** |
||||
;* SIMD-optimized SVQ1 encoder functions |
||||
;* Copyright (c) 2007 Loren Merritt |
||||
;* |
||||
;* This file is part of FFmpeg. |
||||
;* |
||||
;* FFmpeg is free software; you can redistribute it and/or |
||||
;* modify it under the terms of the GNU Lesser General Public |
||||
;* License as published by the Free Software Foundation; either |
||||
;* version 2.1 of the License, or (at your option) any later version. |
||||
;* |
||||
;* FFmpeg is distributed in the hope that it will be useful, |
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
;* Lesser General Public License for more details. |
||||
;* |
||||
;* You should have received a copy of the GNU Lesser General Public |
||||
;* License along with FFmpeg; if not, write to the Free Software |
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
;****************************************************************************** |
||||
|
||||
%include "libavutil/x86/x86util.asm" |
||||
|
||||
SECTION_TEXT |
||||
|
||||
%macro SSD_INT8_VS_INT16 0 |
||||
cglobal ssd_int8_vs_int16, 3, 3, 3, pix1, pix2, size |
||||
pxor m0, m0 |
||||
.loop |
||||
sub sizeq, 8 |
||||
movq m1, [pix1q + sizeq] |
||||
mova m2, [pix2q + sizeq*2] |
||||
%if mmsize == 8 |
||||
movq m3, [pix2q + sizeq*2 + mmsize] |
||||
punpckhbw m4, m1 |
||||
punpcklbw m1, m1 |
||||
psraw m4, 8 |
||||
psraw m1, 8 |
||||
psubw m3, m4 |
||||
psubw m2, m1 |
||||
pmaddwd m3, m3 |
||||
pmaddwd m2, m2 |
||||
paddd m0, m3 |
||||
paddd m0, m2 |
||||
%else |
||||
punpcklbw m1, m1 |
||||
psraw m1, 8 |
||||
psubw m2, m1 |
||||
pmaddwd m2, m2 |
||||
paddd m0, m2 |
||||
%endif |
||||
jg .loop |
||||
HADDD m0, m1 |
||||
movd eax, m0 |
||||
RET |
||||
%endmacro |
||||
|
||||
INIT_MMX mmx |
||||
SSD_INT8_VS_INT16 |
||||
INIT_XMM sse2 |
||||
SSD_INT8_VS_INT16 |
@ -1,75 +0,0 @@ |
||||
/*
|
||||
* Copyright (c) 2007 Loren Merritt |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "config.h" |
||||
#include "libavutil/attributes.h" |
||||
#include "libavutil/cpu.h" |
||||
#include "libavutil/x86/asm.h" |
||||
#include "libavutil/x86/cpu.h" |
||||
#include "libavcodec/svq1enc.h" |
||||
|
||||
#if HAVE_INLINE_ASM |
||||
|
||||
static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2, |
||||
int size) |
||||
{ |
||||
int sum; |
||||
x86_reg i = size; |
||||
|
||||
__asm__ volatile ( |
||||
"pxor %%mm4, %%mm4 \n" |
||||
"1: \n" |
||||
"sub $8, %0 \n" |
||||
"movq (%2, %0), %%mm2 \n" |
||||
"movq (%3, %0, 2), %%mm0 \n" |
||||
"movq 8(%3, %0, 2), %%mm1 \n" |
||||
"punpckhbw %%mm2, %%mm3 \n" |
||||
"punpcklbw %%mm2, %%mm2 \n" |
||||
"psraw $8, %%mm3 \n" |
||||
"psraw $8, %%mm2 \n" |
||||
"psubw %%mm3, %%mm1 \n" |
||||
"psubw %%mm2, %%mm0 \n" |
||||
"pmaddwd %%mm1, %%mm1 \n" |
||||
"pmaddwd %%mm0, %%mm0 \n" |
||||
"paddd %%mm1, %%mm4 \n" |
||||
"paddd %%mm0, %%mm4 \n" |
||||
"jg 1b \n" |
||||
"movq %%mm4, %%mm3 \n" |
||||
"psrlq $32, %%mm3 \n" |
||||
"paddd %%mm3, %%mm4 \n" |
||||
"movd %%mm4, %1 \n" |
||||
: "+r" (i), "=r" (sum) |
||||
: "r" (pix1), "r" (pix2)); |
||||
|
||||
return sum; |
||||
} |
||||
|
||||
#endif /* HAVE_INLINE_ASM */ |
||||
|
||||
av_cold void ff_svq1enc_init_x86(SVQ1EncContext *c) |
||||
{ |
||||
#if HAVE_INLINE_ASM |
||||
int cpu_flags = av_get_cpu_flags(); |
||||
|
||||
if (INLINE_MMX(cpu_flags)) { |
||||
c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx; |
||||
} |
||||
#endif /* HAVE_INLINE_ASM */ |
||||
} |
@ -0,0 +1,42 @@ |
||||
/*
|
||||
* Copyright (c) 2007 Loren Merritt |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "config.h" |
||||
#include "libavutil/attributes.h" |
||||
#include "libavutil/cpu.h" |
||||
#include "libavutil/x86/cpu.h" |
||||
#include "libavcodec/svq1enc.h" |
||||
|
||||
int ff_ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2, |
||||
intptr_t size); |
||||
int ff_ssd_int8_vs_int16_sse2(const int8_t *pix1, const int16_t *pix2, |
||||
intptr_t size); |
||||
|
||||
av_cold void ff_svq1enc_init_x86(SVQ1EncContext *c) |
||||
{ |
||||
int cpu_flags = av_get_cpu_flags(); |
||||
|
||||
if (EXTERNAL_MMX(cpu_flags)) { |
||||
c->ssd_int8_vs_int16 = ff_ssd_int8_vs_int16_mmx; |
||||
} |
||||
if (EXTERNAL_SSE2(cpu_flags)) { |
||||
c->ssd_int8_vs_int16 = ff_ssd_int8_vs_int16_sse2; |
||||
} |
||||
} |
Loading…
Reference in new issue