mirror of https://github.com/FFmpeg/FFmpeg.git
* commit '0d439fbede03854eac8a978cccf21a3425a3c82d': dsputil: Split off HuffYUV decoding bits into their own context Conflicts: configure libavcodec/dsputil.c libavcodec/dsputil.h libavcodec/huffyuv.h libavcodec/huffyuvdec.c libavcodec/lagarith.c libavcodec/vble.c libavcodec/x86/Makefile libavcodec/x86/dsputil.asm libavcodec/x86/dsputil_init.c libavcodec/x86/dsputil_mmx.c Merged-by: Michael Niedermayer <michaelni@gmx.at>pull/72/merge
commit
e2abc0d5ca
23 changed files with 581 additions and 381 deletions
@ -0,0 +1,132 @@ |
||||
/*
|
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include <stdint.h> |
||||
|
||||
#include "config.h" |
||||
#include "libavutil/attributes.h" |
||||
#include "mathops.h" |
||||
#include "huffyuvdsp.h" |
||||
|
||||
// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
|
||||
#define pb_7f (~0UL / 255 * 0x7f) |
||||
#define pb_80 (~0UL / 255 * 0x80) |
||||
|
||||
static void add_bytes_c(uint8_t *dst, uint8_t *src, int w) |
||||
{ |
||||
long i; |
||||
|
||||
for (i = 0; i <= w - (int) sizeof(long); i += sizeof(long)) { |
||||
long a = *(long *) (src + i); |
||||
long b = *(long *) (dst + i); |
||||
*(long *) (dst + i) = ((a & pb_7f) + (b & pb_7f)) ^ ((a ^ b) & pb_80); |
||||
} |
||||
for (; i < w; i++) |
||||
dst[i + 0] += src[i + 0]; |
||||
} |
||||
|
||||
static void add_hfyu_median_pred_c(uint8_t *dst, const uint8_t *src1, |
||||
const uint8_t *diff, int w, |
||||
int *left, int *left_top) |
||||
{ |
||||
int i; |
||||
uint8_t l, lt; |
||||
|
||||
l = *left; |
||||
lt = *left_top; |
||||
|
||||
for (i = 0; i < w; i++) { |
||||
l = mid_pred(l, src1[i], (l + src1[i] - lt) & 0xFF) + diff[i]; |
||||
lt = src1[i]; |
||||
dst[i] = l; |
||||
} |
||||
|
||||
*left = l; |
||||
*left_top = lt; |
||||
} |
||||
|
||||
static int add_hfyu_left_pred_c(uint8_t *dst, const uint8_t *src, int w, |
||||
int acc) |
||||
{ |
||||
int i; |
||||
|
||||
for (i = 0; i < w - 1; i++) { |
||||
acc += src[i]; |
||||
dst[i] = acc; |
||||
i++; |
||||
acc += src[i]; |
||||
dst[i] = acc; |
||||
} |
||||
|
||||
for (; i < w; i++) { |
||||
acc += src[i]; |
||||
dst[i] = acc; |
||||
} |
||||
|
||||
return acc; |
||||
} |
||||
|
||||
#if HAVE_BIGENDIAN |
||||
#define B 3 |
||||
#define G 2 |
||||
#define R 1 |
||||
#define A 0 |
||||
#else |
||||
#define B 0 |
||||
#define G 1 |
||||
#define R 2 |
||||
#define A 3 |
||||
#endif |
||||
static void add_hfyu_left_pred_bgr32_c(uint8_t *dst, const uint8_t *src, |
||||
int w, int *red, int *green, |
||||
int *blue, int *alpha) |
||||
{ |
||||
int i, r = *red, g = *green, b = *blue, a = *alpha; |
||||
|
||||
for (i = 0; i < w; i++) { |
||||
b += src[4 * i + B]; |
||||
g += src[4 * i + G]; |
||||
r += src[4 * i + R]; |
||||
a += src[4 * i + A]; |
||||
|
||||
dst[4 * i + B] = b; |
||||
dst[4 * i + G] = g; |
||||
dst[4 * i + R] = r; |
||||
dst[4 * i + A] = a; |
||||
} |
||||
|
||||
*red = r; |
||||
*green = g; |
||||
*blue = b; |
||||
*alpha = a; |
||||
} |
||||
#undef B |
||||
#undef G |
||||
#undef R |
||||
#undef A |
||||
|
||||
av_cold void ff_huffyuvdsp_init(HuffYUVDSPContext *c) |
||||
{ |
||||
c->add_bytes = add_bytes_c; |
||||
c->add_hfyu_median_pred = add_hfyu_median_pred_c; |
||||
c->add_hfyu_left_pred = add_hfyu_left_pred_c; |
||||
c->add_hfyu_left_pred_bgr32 = add_hfyu_left_pred_bgr32_c; |
||||
|
||||
if (ARCH_X86) |
||||
ff_huffyuvdsp_init_x86(c); |
||||
} |
@ -0,0 +1,41 @@ |
||||
/*
|
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#ifndef AVCODEC_HUFFYUVDSP_H |
||||
#define AVCODEC_HUFFYUVDSP_H |
||||
|
||||
#include <stdint.h> |
||||
|
||||
typedef struct HuffYUVDSPContext { |
||||
void (*add_bytes)(uint8_t *dst /* align 16 */, uint8_t *src /* align 16 */, |
||||
int w); |
||||
void (*add_hfyu_median_pred)(uint8_t *dst, const uint8_t *top, |
||||
const uint8_t *diff, int w, |
||||
int *left, int *left_top); |
||||
int (*add_hfyu_left_pred)(uint8_t *dst, const uint8_t *src, |
||||
int w, int left); |
||||
void (*add_hfyu_left_pred_bgr32)(uint8_t *dst, const uint8_t *src, |
||||
int w, int *red, int *green, |
||||
int *blue, int *alpha); |
||||
} HuffYUVDSPContext; |
||||
|
||||
void ff_huffyuvdsp_init(HuffYUVDSPContext *c); |
||||
void ff_huffyuvdsp_init_ppc(HuffYUVDSPContext *c); |
||||
void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c); |
||||
|
||||
#endif /* AVCODEC_HUFFYUVDSP_H */ |
@ -0,0 +1,57 @@ |
||||
/*
|
||||
* Copyright (c) 2002 Brian Foley |
||||
* Copyright (c) 2002 Dieter Shirley |
||||
* Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org> |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "config.h" |
||||
#if HAVE_ALTIVEC_H |
||||
#include <altivec.h> |
||||
#endif |
||||
|
||||
#include "libavutil/attributes.h" |
||||
#include "libavutil/ppc/types_altivec.h" |
||||
#include "libavutil/ppc/util_altivec.h" |
||||
#include "libavcodec/huffyuvdsp.h" |
||||
|
||||
#if HAVE_ALTIVEC |
||||
static void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) |
||||
{ |
||||
register int i; |
||||
register vector unsigned char vdst, vsrc; |
||||
|
||||
/* dst and src are 16 bytes-aligned (guaranteed). */ |
||||
for (i = 0; i + 15 < w; i += 16) { |
||||
vdst = vec_ld(i, (unsigned char *) dst); |
||||
vsrc = vec_ld(i, (unsigned char *) src); |
||||
vdst = vec_add(vsrc, vdst); |
||||
vec_st(vdst, i, (unsigned char *) dst); |
||||
} |
||||
/* If w is not a multiple of 16. */ |
||||
for (; i < w; i++) |
||||
dst[i] = src[i]; |
||||
} |
||||
#endif /* HAVE_ALTIVEC */ |
||||
|
||||
av_cold void ff_huffyuvdsp_init_ppc(HuffYUVDSPContext *c) |
||||
{ |
||||
#if HAVE_ALTIVEC |
||||
c->add_bytes = add_bytes_altivec; |
||||
#endif /* HAVE_ALTIVEC */ |
||||
} |
@ -0,0 +1,165 @@ |
||||
;****************************************************************************** |
||||
;* SIMD-optimized HuffYUV functions |
||||
;* Copyright (c) 2008 Loren Merritt |
||||
;* |
||||
;* This file is part of FFmpeg. |
||||
;* |
||||
;* FFmpeg is free software; you can redistribute it and/or |
||||
;* modify it under the terms of the GNU Lesser General Public |
||||
;* License as published by the Free Software Foundation; either |
||||
;* version 2.1 of the License, or (at your option) any later version. |
||||
;* |
||||
;* FFmpeg is distributed in the hope that it will be useful, |
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
;* Lesser General Public License for more details. |
||||
;* |
||||
;* You should have received a copy of the GNU Lesser General Public |
||||
;* License along with FFmpeg; if not, write to the Free Software |
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
;****************************************************************************** |
||||
|
||||
%include "libavutil/x86/x86util.asm" |
||||
|
||||
SECTION_RODATA |
||||
pb_f: times 16 db 15 |
||||
pb_zzzzzzzz77777777: times 8 db -1 |
||||
pb_7: times 8 db 7 |
||||
pb_zzzz3333zzzzbbbb: db -1,-1,-1,-1,3,3,3,3,-1,-1,-1,-1,11,11,11,11 |
||||
pb_zz11zz55zz99zzdd: db -1,-1,1,1,-1,-1,5,5,-1,-1,9,9,-1,-1,13,13 |
||||
|
||||
SECTION_TEXT |
||||
|
||||
; void ff_add_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *top, |
||||
; const uint8_t *diff, int w, |
||||
; int *left, int *left_top) |
||||
INIT_MMX mmxext |
||||
cglobal add_hfyu_median_pred, 6,6,0, dst, top, diff, w, left, left_top |
||||
movq mm0, [topq] |
||||
movq mm2, mm0 |
||||
movd mm4, [left_topq] |
||||
psllq mm2, 8 |
||||
movq mm1, mm0 |
||||
por mm4, mm2 |
||||
movd mm3, [leftq] |
||||
psubb mm0, mm4 ; t-tl |
||||
add dstq, wq |
||||
add topq, wq |
||||
add diffq, wq |
||||
neg wq |
||||
jmp .skip |
||||
.loop: |
||||
movq mm4, [topq+wq] |
||||
movq mm0, mm4 |
||||
psllq mm4, 8 |
||||
por mm4, mm1 |
||||
movq mm1, mm0 ; t |
||||
psubb mm0, mm4 ; t-tl |
||||
.skip: |
||||
movq mm2, [diffq+wq] |
||||
%assign i 0 |
||||
%rep 8 |
||||
movq mm4, mm0 |
||||
paddb mm4, mm3 ; t-tl+l |
||||
movq mm5, mm3 |
||||
pmaxub mm3, mm1 |
||||
pminub mm5, mm1 |
||||
pminub mm3, mm4 |
||||
pmaxub mm3, mm5 ; median |
||||
paddb mm3, mm2 ; +residual |
||||
%if i==0 |
||||
movq mm7, mm3 |
||||
psllq mm7, 56 |
||||
%else |
||||
movq mm6, mm3 |
||||
psrlq mm7, 8 |
||||
psllq mm6, 56 |
||||
por mm7, mm6 |
||||
%endif |
||||
%if i<7 |
||||
psrlq mm0, 8 |
||||
psrlq mm1, 8 |
||||
psrlq mm2, 8 |
||||
%endif |
||||
%assign i i+1 |
||||
%endrep |
||||
movq [dstq+wq], mm7 |
||||
add wq, 8 |
||||
jl .loop |
||||
movzx r2d, byte [dstq-1] |
||||
mov [leftq], r2d |
||||
movzx r2d, byte [topq-1] |
||||
mov [left_topq], r2d |
||||
RET |
||||
|
||||
|
||||
%macro ADD_HFYU_LEFT_LOOP 2 ; %1 = dst_is_aligned, %2 = src_is_aligned |
||||
add srcq, wq |
||||
add dstq, wq |
||||
neg wq |
||||
%%.loop: |
||||
%if %2 |
||||
mova m1, [srcq+wq] |
||||
%else |
||||
movu m1, [srcq+wq] |
||||
%endif |
||||
mova m2, m1 |
||||
psllw m1, 8 |
||||
paddb m1, m2 |
||||
mova m2, m1 |
||||
pshufb m1, m3 |
||||
paddb m1, m2 |
||||
pshufb m0, m5 |
||||
mova m2, m1 |
||||
pshufb m1, m4 |
||||
paddb m1, m2 |
||||
%if mmsize == 16 |
||||
mova m2, m1 |
||||
pshufb m1, m6 |
||||
paddb m1, m2 |
||||
%endif |
||||
paddb m0, m1 |
||||
%if %1 |
||||
mova [dstq+wq], m0 |
||||
%else |
||||
movq [dstq+wq], m0 |
||||
movhps [dstq+wq+8], m0 |
||||
%endif |
||||
add wq, mmsize |
||||
jl %%.loop |
||||
mov eax, mmsize-1 |
||||
sub eax, wd |
||||
movd m1, eax |
||||
pshufb m0, m1 |
||||
movd eax, m0 |
||||
RET |
||||
%endmacro |
||||
|
||||
; int ff_add_hfyu_left_pred(uint8_t *dst, const uint8_t *src, int w, int left) |
||||
INIT_MMX ssse3 |
||||
cglobal add_hfyu_left_pred, 3,3,7, dst, src, w, left |
||||
.skip_prologue: |
||||
mova m5, [pb_7] |
||||
mova m4, [pb_zzzz3333zzzzbbbb] |
||||
mova m3, [pb_zz11zz55zz99zzdd] |
||||
movd m0, leftm |
||||
psllq m0, 56 |
||||
ADD_HFYU_LEFT_LOOP 1, 1 |
||||
|
||||
INIT_XMM sse4 |
||||
cglobal add_hfyu_left_pred, 3,3,7, dst, src, w, left |
||||
mova m5, [pb_f] |
||||
mova m6, [pb_zzzzzzzz77777777] |
||||
mova m4, [pb_zzzz3333zzzzbbbb] |
||||
mova m3, [pb_zz11zz55zz99zzdd] |
||||
movd m0, leftm |
||||
pslldq m0, 15 |
||||
test srcq, 15 |
||||
jnz .src_unaligned |
||||
test dstq, 15 |
||||
jnz .dst_unaligned |
||||
ADD_HFYU_LEFT_LOOP 1, 1 |
||||
.dst_unaligned: |
||||
ADD_HFYU_LEFT_LOOP 0, 1 |
||||
.src_unaligned: |
||||
ADD_HFYU_LEFT_LOOP 0, 0 |
@ -0,0 +1,30 @@ |
||||
/*
|
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#ifndef AVCODEC_X86_HUFFYUVDSP_H |
||||
#define AVCODEC_X86_HUFFYUVDSP_H |
||||
|
||||
#include <stdint.h> |
||||
|
||||
void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, int w); |
||||
|
||||
void ff_add_hfyu_median_pred_cmov(uint8_t *dst, const uint8_t *top, |
||||
const uint8_t *diff, int w, |
||||
int *left, int *left_top); |
||||
|
||||
#endif /* AVCODEC_X86_HUFFYUVDSP_H */ |
@ -0,0 +1,63 @@ |
||||
/*
|
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "config.h" |
||||
#include "libavutil/attributes.h" |
||||
#include "libavutil/cpu.h" |
||||
#include "libavutil/x86/asm.h" |
||||
#include "libavutil/x86/cpu.h" |
||||
#include "libavcodec/huffyuvdsp.h" |
||||
|
||||
void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, int w); |
||||
|
||||
void ff_add_hfyu_median_pred_cmov(uint8_t *dst, const uint8_t *top, |
||||
const uint8_t *diff, int w, |
||||
int *left, int *left_top); |
||||
void ff_add_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *top, |
||||
const uint8_t *diff, int w, |
||||
int *left, int *left_top); |
||||
|
||||
int ff_add_hfyu_left_pred_ssse3(uint8_t *dst, const uint8_t *src, |
||||
int w, int left); |
||||
int ff_add_hfyu_left_pred_sse4(uint8_t *dst, const uint8_t *src, |
||||
int w, int left); |
||||
|
||||
av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c) |
||||
{ |
||||
int cpu_flags = av_get_cpu_flags(); |
||||
|
||||
#if HAVE_7REGS && HAVE_INLINE_ASM |
||||
if (cpu_flags & AV_CPU_FLAG_CMOV) |
||||
c->add_hfyu_median_pred = ff_add_hfyu_median_pred_cmov; |
||||
#endif |
||||
|
||||
if (INLINE_MMX(cpu_flags)) |
||||
c->add_bytes = ff_add_bytes_mmx; |
||||
|
||||
if (EXTERNAL_MMXEXT(cpu_flags)) { |
||||
/* slower than cmov version on AMD */ |
||||
if (!(cpu_flags & AV_CPU_FLAG_3DNOW)) |
||||
c->add_hfyu_median_pred = ff_add_hfyu_median_pred_mmxext; |
||||
} |
||||
|
||||
if (EXTERNAL_SSSE3(cpu_flags)) { |
||||
c->add_hfyu_left_pred = ff_add_hfyu_left_pred_ssse3; |
||||
if (cpu_flags & AV_CPU_FLAG_SSE4) // not really SSE4, just slow on Conroe
|
||||
c->add_hfyu_left_pred = ff_add_hfyu_left_pred_sse4; |
||||
} |
||||
} |
Loading…
Reference in new issue