mirror of https://github.com/FFmpeg/FFmpeg.git
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>pull/54/head
parent
78e39aa7ee
commit
f70d7eb20c
16 changed files with 244 additions and 118 deletions
@ -0,0 +1,69 @@ |
||||
/*
|
||||
* Lossless video DSP utils |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
#include "avcodec.h" |
||||
#include "lossless_videodsp.h" |
||||
|
||||
static void add_int16_c(uint16_t *dst, const uint16_t *src, unsigned mask, int w){ |
||||
long i; |
||||
unsigned long pw_lsb = (mask >> 1) * 0x0001000100010001ULL; |
||||
unsigned long pw_msb = pw_lsb + 0x0001000100010001ULL; |
||||
for (i = 0; i <= w - (int)sizeof(long)/2; i += sizeof(long)/2) { |
||||
long a = *(long*)(src+i); |
||||
long b = *(long*)(dst+i); |
||||
*(long*)(dst+i) = ((a&pw_lsb) + (b&pw_lsb)) ^ ((a^b)&pw_msb); |
||||
} |
||||
for(; i<w; i++) |
||||
dst[i] = (dst[i] + src[i]) & mask; |
||||
} |
||||
|
||||
static void diff_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w){ |
||||
long i; |
||||
#if !HAVE_FAST_UNALIGNED |
||||
if((long)src2 & (sizeof(long)-1)){ |
||||
for(i=0; i+7<w; i+=8){ |
||||
dst[i+0] = (src1[i+0]-src2[i+0]) & mask; |
||||
dst[i+1] = (src1[i+1]-src2[i+1]) & mask; |
||||
dst[i+2] = (src1[i+2]-src2[i+2]) & mask; |
||||
dst[i+3] = (src1[i+3]-src2[i+3]) & mask; |
||||
} |
||||
}else |
||||
#endif |
||||
{ |
||||
unsigned long pw_lsb = (mask >> 1) * 0x0001000100010001ULL; |
||||
unsigned long pw_msb = pw_lsb + 0x0001000100010001ULL; |
||||
|
||||
for (i = 0; i <= w - (int)sizeof(long)/2; i += sizeof(long)/2) { |
||||
long a = *(long*)(src1+i); |
||||
long b = *(long*)(src2+i); |
||||
*(long*)(dst+i) = ((a|pw_msb) - (b&pw_lsb)) ^ ((a^b^pw_msb)&pw_msb); |
||||
} |
||||
} |
||||
for (; i<w; i++) |
||||
dst[i] = (src1[i] - src2[i]) & mask; |
||||
} |
||||
|
||||
void ff_llviddsp_init(LLVidDSPContext *c) |
||||
{ |
||||
c->add_int16 = add_int16_c; |
||||
c->diff_int16= diff_int16_c; |
||||
|
||||
if (ARCH_X86) |
||||
ff_llviddsp_init_x86(c); |
||||
} |
@ -0,0 +1,36 @@ |
||||
/*
|
||||
* Lossless video DSP utils |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
|
||||
#ifndef AVCODEC_LOSSLESS_VIDEODSP_H |
||||
#define AVCODEC_LOSSLESS_VIDEODSP_H |
||||
|
||||
#include "avcodec.h" |
||||
#include "libavutil/cpu.h" |
||||
|
||||
typedef struct LLVidDSPContext { |
||||
void (*add_int16)(uint16_t *dst/*align 16*/, const uint16_t *src/*align 16*/, unsigned mask, int w); |
||||
void (*diff_int16)(uint16_t *dst/*align 16*/, const uint16_t *src1/*align 16*/, const uint16_t *src2/*align 1*/, unsigned mask, int w); |
||||
} LLVidDSPContext; |
||||
|
||||
void ff_llviddsp_init(LLVidDSPContext *llviddsp); |
||||
void ff_llviddsp_init_x86(LLVidDSPContext *llviddsp); |
||||
|
||||
#endif //AVCODEC_LOSSLESS_VIDEODSP_H
|
@ -0,0 +1,88 @@ |
||||
;****************************************************************************** |
||||
;* SIMD lossless video DSP utils |
||||
;* Copyright (c) 2014 Michael Niedermayer |
||||
;* |
||||
;* This file is part of FFmpeg. |
||||
;* |
||||
;* FFmpeg is free software; you can redistribute it and/or |
||||
;* modify it under the terms of the GNU Lesser General Public |
||||
;* License as published by the Free Software Foundation; either |
||||
;* version 2.1 of the License, or (at your option) any later version. |
||||
;* |
||||
;* FFmpeg is distributed in the hope that it will be useful, |
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
;* Lesser General Public License for more details. |
||||
;* |
||||
;* You should have received a copy of the GNU Lesser General Public |
||||
;* License along with FFmpeg; if not, write to the Free Software |
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
;****************************************************************************** |
||||
|
||||
%include "libavutil/x86/x86util.asm" |
||||
|
||||
SECTION_TEXT |
||||
|
||||
%macro ADD_INT16_LOOP 1 ; %1 = is_aligned |
||||
movd m4, maskq |
||||
punpcklwd m4, m4 |
||||
punpcklwd m4, m4 |
||||
punpcklwd m4, m4 |
||||
add wq, wq |
||||
test wq, 2*mmsize - 1 |
||||
jz %%.tomainloop |
||||
%%.wordloop: |
||||
sub wq, 2 |
||||
mov ax, [srcq+wq] |
||||
add ax, [dstq+wq] |
||||
and ax, maskw |
||||
mov [dstq+wq], ax |
||||
test wq, 2*mmsize - 1 |
||||
jnz %%.wordloop |
||||
%%.tomainloop: |
||||
add srcq, wq |
||||
add dstq, wq |
||||
neg wq |
||||
jz %%.end |
||||
%%.loop: |
||||
%if %1 |
||||
mova m0, [srcq+wq] |
||||
mova m1, [dstq+wq] |
||||
mova m2, [srcq+wq+mmsize] |
||||
mova m3, [dstq+wq+mmsize] |
||||
%else |
||||
movu m0, [srcq+wq] |
||||
movu m1, [dstq+wq] |
||||
movu m2, [srcq+wq+mmsize] |
||||
movu m3, [dstq+wq+mmsize] |
||||
%endif |
||||
paddw m0, m1 |
||||
paddw m2, m3 |
||||
pand m0, m4 |
||||
pand m2, m4 |
||||
%if %1 |
||||
mova [dstq+wq] , m0 |
||||
mova [dstq+wq+mmsize], m2 |
||||
%else |
||||
movu [dstq+wq] , m0 |
||||
movu [dstq+wq+mmsize], m2 |
||||
%endif |
||||
add wq, 2*mmsize |
||||
jl %%.loop |
||||
%%.end: |
||||
RET |
||||
%endmacro |
||||
|
||||
INIT_MMX mmx |
||||
cglobal add_int16, 4,4,5, dst, src, mask, w |
||||
ADD_INT16_LOOP 1 |
||||
|
||||
INIT_XMM sse2 |
||||
cglobal add_int16, 4,4,5, dst, src, mask, w |
||||
test srcq, mmsize-1 |
||||
jnz .unaligned |
||||
test dstq, mmsize-1 |
||||
jnz .unaligned |
||||
ADD_INT16_LOOP 1 |
||||
.unaligned: |
||||
ADD_INT16_LOOP 0 |
@ -0,0 +1,38 @@ |
||||
/*
|
||||
* Lossless video DSP utils |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "../lossless_videodsp.h" |
||||
#include "libavutil/x86/cpu.h" |
||||
|
||||
void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int w); |
||||
void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int w); |
||||
|
||||
void ff_llviddsp_init_x86(LLVidDSPContext *c) |
||||
{ |
||||
int cpu_flags = av_get_cpu_flags(); |
||||
|
||||
if (EXTERNAL_MMX(cpu_flags)) { |
||||
c->add_int16 = ff_add_int16_mmx; |
||||
} |
||||
|
||||
if (EXTERNAL_SSE2(cpu_flags)) { |
||||
c->add_int16 = ff_add_int16_sse2; |
||||
} |
||||
} |
Loading…
Reference in new issue