mirror of https://github.com/FFmpeg/FFmpeg.git
* commit '79dad2a932534d1155079f937649e099f9e5cc27': dsputil: Separate h264chroma Conflicts: libavcodec/dsputil_template.c libavcodec/ppc/dsputil_ppc.c libavcodec/vc1dec.c libavcodec/vc1dsp.c Merged-by: Michael Niedermayer <michaelni@gmx.at>pull/9/merge
commit
c4e394e460
42 changed files with 982 additions and 684 deletions
@ -0,0 +1,51 @@ |
||||
/*
|
||||
* ARM NEON optimised H.264 chroma functions |
||||
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com> |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include <stdint.h> |
||||
|
||||
#include "libavutil/attributes.h" |
||||
#include "libavutil/cpu.h" |
||||
#include "libavutil/arm/cpu.h" |
||||
#include "libavcodec/h264chroma.h" |
||||
|
||||
void ff_put_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int); |
||||
void ff_put_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); |
||||
void ff_put_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int); |
||||
|
||||
void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int); |
||||
void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); |
||||
void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int); |
||||
|
||||
av_cold void ff_h264chroma_init_arm(H264ChromaContext *c, int bit_depth) |
||||
{ |
||||
const int high_bit_depth = bit_depth > 8; |
||||
int cpu_flags = av_get_cpu_flags(); |
||||
|
||||
if (have_neon(cpu_flags) && !high_bit_depth) { |
||||
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon; |
||||
c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon; |
||||
c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_neon; |
||||
|
||||
c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_neon; |
||||
c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_neon; |
||||
c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_neon; |
||||
} |
||||
} |
@ -0,0 +1,64 @@ |
||||
/*
|
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "config.h" |
||||
#include "h264chroma.h" |
||||
|
||||
#define BIT_DEPTH 8 |
||||
#include "h264chroma_template.c" |
||||
#undef BIT_DEPTH |
||||
|
||||
#define BIT_DEPTH 9 |
||||
#include "h264chroma_template.c" |
||||
#undef BIT_DEPTH |
||||
|
||||
#define BIT_DEPTH 10 |
||||
#include "h264chroma_template.c" |
||||
#undef BIT_DEPTH |
||||
|
||||
#define SET_CHROMA(depth) \ |
||||
c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_ ## depth ## _c; \
|
||||
c->put_h264_chroma_pixels_tab[1] = put_h264_chroma_mc4_ ## depth ## _c; \
|
||||
c->put_h264_chroma_pixels_tab[2] = put_h264_chroma_mc2_ ## depth ## _c; \
|
||||
c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_ ## depth ## _c; \
|
||||
c->avg_h264_chroma_pixels_tab[1] = avg_h264_chroma_mc4_ ## depth ## _c; \
|
||||
c->avg_h264_chroma_pixels_tab[2] = avg_h264_chroma_mc2_ ## depth ## _c; \
|
||||
|
||||
void ff_h264chroma_init(H264ChromaContext *c, int bit_depth) |
||||
{ |
||||
switch (bit_depth) { |
||||
case 10: |
||||
SET_CHROMA(10); |
||||
break; |
||||
case 9: |
||||
SET_CHROMA(9); |
||||
break; |
||||
default: |
||||
SET_CHROMA(8); |
||||
break; |
||||
} |
||||
|
||||
if (ARCH_ARM) |
||||
ff_h264chroma_init_arm(c, bit_depth); |
||||
if (ARCH_PPC) |
||||
ff_h264chroma_init_ppc(c, bit_depth); |
||||
if (ARCH_SH4) |
||||
ff_h264chroma_init_sh4(c, bit_depth); |
||||
if (ARCH_X86) |
||||
ff_h264chroma_init_x86(c, bit_depth); |
||||
} |
@ -0,0 +1,38 @@ |
||||
/*
|
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#ifndef AVCODEC_H264CHROMA_H |
||||
#define AVCODEC_H264CHROMA_H |
||||
|
||||
#include <stdint.h> |
||||
|
||||
typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y); |
||||
|
||||
typedef struct H264ChromaContext { |
||||
h264_chroma_mc_func put_h264_chroma_pixels_tab[3]; |
||||
h264_chroma_mc_func avg_h264_chroma_pixels_tab[3]; |
||||
} H264ChromaContext; |
||||
|
||||
void ff_h264chroma_init(H264ChromaContext *c, int bit_depth); |
||||
|
||||
void ff_h264chroma_init_arm(H264ChromaContext *c, int bit_depth); |
||||
void ff_h264chroma_init_ppc(H264ChromaContext *c, int bit_depth); |
||||
void ff_h264chroma_init_sh4(H264ChromaContext *c, int bit_depth); |
||||
void ff_h264chroma_init_x86(H264ChromaContext *c, int bit_depth); |
||||
|
||||
#endif /* AVCODEC_H264CHROMA_H */ |
@ -0,0 +1,142 @@ |
||||
/*
|
||||
* Copyright (c) 2000, 2001 Fabrice Bellard |
||||
* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "libavutil/avassert.h" |
||||
|
||||
#include "bit_depth_template.c" |
||||
|
||||
#define H264_CHROMA_MC(OPNAME, OP)\ |
||||
static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
|
||||
pixel *dst = (pixel*)_dst;\
|
||||
pixel *src = (pixel*)_src;\
|
||||
const int A=(8-x)*(8-y);\
|
||||
const int B=( x)*(8-y);\
|
||||
const int C=(8-x)*( y);\
|
||||
const int D=( x)*( y);\
|
||||
int i;\
|
||||
stride >>= sizeof(pixel)-1;\
|
||||
\
|
||||
av_assert2(x<8 && y<8 && x>=0 && y>=0);\
|
||||
\
|
||||
if(D){\
|
||||
for(i=0; i<h; i++){\
|
||||
OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
|
||||
OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
|
||||
dst+= stride;\
|
||||
src+= stride;\
|
||||
}\
|
||||
}else{\
|
||||
const int E= B+C;\
|
||||
const int step= C ? stride : 1;\
|
||||
for(i=0; i<h; i++){\
|
||||
OP(dst[0], (A*src[0] + E*src[step+0]));\
|
||||
OP(dst[1], (A*src[1] + E*src[step+1]));\
|
||||
dst+= stride;\
|
||||
src+= stride;\
|
||||
}\
|
||||
}\
|
||||
}\
|
||||
\
|
||||
static void FUNCC(OPNAME ## h264_chroma_mc4)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
|
||||
pixel *dst = (pixel*)_dst;\
|
||||
pixel *src = (pixel*)_src;\
|
||||
const int A=(8-x)*(8-y);\
|
||||
const int B=( x)*(8-y);\
|
||||
const int C=(8-x)*( y);\
|
||||
const int D=( x)*( y);\
|
||||
int i;\
|
||||
stride >>= sizeof(pixel)-1;\
|
||||
\
|
||||
av_assert2(x<8 && y<8 && x>=0 && y>=0);\
|
||||
\
|
||||
if(D){\
|
||||
for(i=0; i<h; i++){\
|
||||
OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
|
||||
OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
|
||||
OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
|
||||
OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
|
||||
dst+= stride;\
|
||||
src+= stride;\
|
||||
}\
|
||||
}else{\
|
||||
const int E= B+C;\
|
||||
const int step= C ? stride : 1;\
|
||||
for(i=0; i<h; i++){\
|
||||
OP(dst[0], (A*src[0] + E*src[step+0]));\
|
||||
OP(dst[1], (A*src[1] + E*src[step+1]));\
|
||||
OP(dst[2], (A*src[2] + E*src[step+2]));\
|
||||
OP(dst[3], (A*src[3] + E*src[step+3]));\
|
||||
dst+= stride;\
|
||||
src+= stride;\
|
||||
}\
|
||||
}\
|
||||
}\
|
||||
\
|
||||
static void FUNCC(OPNAME ## h264_chroma_mc8)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
|
||||
pixel *dst = (pixel*)_dst;\
|
||||
pixel *src = (pixel*)_src;\
|
||||
const int A=(8-x)*(8-y);\
|
||||
const int B=( x)*(8-y);\
|
||||
const int C=(8-x)*( y);\
|
||||
const int D=( x)*( y);\
|
||||
int i;\
|
||||
stride >>= sizeof(pixel)-1;\
|
||||
\
|
||||
av_assert2(x<8 && y<8 && x>=0 && y>=0);\
|
||||
\
|
||||
if(D){\
|
||||
for(i=0; i<h; i++){\
|
||||
OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
|
||||
OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
|
||||
OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
|
||||
OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
|
||||
OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
|
||||
OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
|
||||
OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
|
||||
OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
|
||||
dst+= stride;\
|
||||
src+= stride;\
|
||||
}\
|
||||
}else{\
|
||||
const int E= B+C;\
|
||||
const int step= C ? stride : 1;\
|
||||
for(i=0; i<h; i++){\
|
||||
OP(dst[0], (A*src[0] + E*src[step+0]));\
|
||||
OP(dst[1], (A*src[1] + E*src[step+1]));\
|
||||
OP(dst[2], (A*src[2] + E*src[step+2]));\
|
||||
OP(dst[3], (A*src[3] + E*src[step+3]));\
|
||||
OP(dst[4], (A*src[4] + E*src[step+4]));\
|
||||
OP(dst[5], (A*src[5] + E*src[step+5]));\
|
||||
OP(dst[6], (A*src[6] + E*src[step+6]));\
|
||||
OP(dst[7], (A*src[7] + E*src[step+7]));\
|
||||
dst+= stride;\
|
||||
src+= stride;\
|
||||
}\
|
||||
}\
|
||||
} |
||||
|
||||
#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1) |
||||
#define op_put(a, b) a = (((b) + 32)>>6) |
||||
|
||||
H264_CHROMA_MC(put_ , op_put) |
||||
H264_CHROMA_MC(avg_ , op_avg) |
||||
#undef op_avg |
||||
#undef op_put |
@ -0,0 +1,64 @@ |
||||
/*
|
||||
* Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org> |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "config.h" |
||||
#include "libavutil/attributes.h" |
||||
#include "libavcodec/h264chroma.h" |
||||
|
||||
#if HAVE_ALTIVEC |
||||
#include "libavutil/cpu.h" |
||||
#include "libavutil/intreadwrite.h" |
||||
#include "libavutil/ppc/types_altivec.h" |
||||
#include "libavutil/ppc/util_altivec.h" |
||||
#include "dsputil_altivec.h" |
||||
|
||||
#define PUT_OP_U8_ALTIVEC(d, s, dst) d = s |
||||
#define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s) |
||||
|
||||
#define OP_U8_ALTIVEC PUT_OP_U8_ALTIVEC |
||||
#define PREFIX_h264_chroma_mc8_altivec put_h264_chroma_mc8_altivec |
||||
#define PREFIX_h264_chroma_mc8_num altivec_put_h264_chroma_mc8_num |
||||
#include "h264chroma_template.c" |
||||
#undef OP_U8_ALTIVEC |
||||
#undef PREFIX_h264_chroma_mc8_altivec |
||||
#undef PREFIX_h264_chroma_mc8_num |
||||
|
||||
#define OP_U8_ALTIVEC AVG_OP_U8_ALTIVEC |
||||
#define PREFIX_h264_chroma_mc8_altivec avg_h264_chroma_mc8_altivec |
||||
#define PREFIX_h264_chroma_mc8_num altivec_avg_h264_chroma_mc8_num |
||||
#include "h264chroma_template.c" |
||||
#undef OP_U8_ALTIVEC |
||||
#undef PREFIX_h264_chroma_mc8_altivec |
||||
#undef PREFIX_h264_chroma_mc8_num |
||||
#endif /* HAVE_ALTIVEC */ |
||||
|
||||
av_cold void ff_h264chroma_init_ppc(H264ChromaContext *c, int bit_depth) |
||||
{ |
||||
#if HAVE_ALTIVEC |
||||
const int high_bit_depth = bit_depth > 8; |
||||
|
||||
if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) { |
||||
if (!high_bit_depth) { |
||||
c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_altivec; |
||||
c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec; |
||||
} |
||||
} |
||||
#endif /* HAVE_ALTIVEC */ |
||||
} |
@ -0,0 +1,289 @@ |
||||
/*
|
||||
* Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org> |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "libavutil/mem.h" |
||||
|
||||
/* this code assume that stride % 16 == 0 */ |
||||
|
||||
#define CHROMA_MC8_ALTIVEC_CORE(BIAS1, BIAS2) \ |
||||
vsrc2ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc2uc);\
|
||||
vsrc3ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc3uc);\
|
||||
\
|
||||
psum = vec_mladd(vA, vsrc0ssH, BIAS1);\
|
||||
psum = vec_mladd(vB, vsrc1ssH, psum);\
|
||||
psum = vec_mladd(vC, vsrc2ssH, psum);\
|
||||
psum = vec_mladd(vD, vsrc3ssH, psum);\
|
||||
psum = BIAS2(psum);\
|
||||
psum = vec_sr(psum, v6us);\
|
||||
\
|
||||
vdst = vec_ld(0, dst);\
|
||||
ppsum = (vec_u8)vec_pack(psum, psum);\
|
||||
vfdst = vec_perm(vdst, ppsum, fperm);\
|
||||
\
|
||||
OP_U8_ALTIVEC(fsum, vfdst, vdst);\
|
||||
\
|
||||
vec_st(fsum, 0, dst);\
|
||||
\
|
||||
vsrc0ssH = vsrc2ssH;\
|
||||
vsrc1ssH = vsrc3ssH;\
|
||||
\
|
||||
dst += stride;\
|
||||
src += stride; |
||||
|
||||
#define CHROMA_MC8_ALTIVEC_CORE_SIMPLE \ |
||||
\
|
||||
vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc0uc);\
|
||||
vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc1uc);\
|
||||
\
|
||||
psum = vec_mladd(vA, vsrc0ssH, v32ss);\
|
||||
psum = vec_mladd(vE, vsrc1ssH, psum);\
|
||||
psum = vec_sr(psum, v6us);\
|
||||
\
|
||||
vdst = vec_ld(0, dst);\
|
||||
ppsum = (vec_u8)vec_pack(psum, psum);\
|
||||
vfdst = vec_perm(vdst, ppsum, fperm);\
|
||||
\
|
||||
OP_U8_ALTIVEC(fsum, vfdst, vdst);\
|
||||
\
|
||||
vec_st(fsum, 0, dst);\
|
||||
\
|
||||
dst += stride;\
|
||||
src += stride; |
||||
|
||||
#define noop(a) a |
||||
#define add28(a) vec_add(v28ss, a) |
||||
|
||||
#ifdef PREFIX_h264_chroma_mc8_altivec |
||||
static void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, |
||||
int stride, int h, int x, int y) { |
||||
DECLARE_ALIGNED(16, signed int, ABCD)[4] = |
||||
{((8 - x) * (8 - y)), |
||||
(( x) * (8 - y)), |
||||
((8 - x) * ( y)), |
||||
(( x) * ( y))}; |
||||
register int i; |
||||
vec_u8 fperm; |
||||
const vec_s32 vABCD = vec_ld(0, ABCD); |
||||
const vec_s16 vA = vec_splat((vec_s16)vABCD, 1); |
||||
const vec_s16 vB = vec_splat((vec_s16)vABCD, 3); |
||||
const vec_s16 vC = vec_splat((vec_s16)vABCD, 5); |
||||
const vec_s16 vD = vec_splat((vec_s16)vABCD, 7); |
||||
LOAD_ZERO; |
||||
const vec_s16 v32ss = vec_sl(vec_splat_s16(1),vec_splat_u16(5)); |
||||
const vec_u16 v6us = vec_splat_u16(6); |
||||
register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1; |
||||
register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0; |
||||
|
||||
vec_u8 vsrcAuc, av_uninit(vsrcBuc), vsrcperm0, vsrcperm1; |
||||
vec_u8 vsrc0uc, vsrc1uc; |
||||
vec_s16 vsrc0ssH, vsrc1ssH; |
||||
vec_u8 vsrcCuc, vsrc2uc, vsrc3uc; |
||||
vec_s16 vsrc2ssH, vsrc3ssH, psum; |
||||
vec_u8 vdst, ppsum, vfdst, fsum; |
||||
|
||||
if (((unsigned long)dst) % 16 == 0) { |
||||
fperm = (vec_u8){0x10, 0x11, 0x12, 0x13, |
||||
0x14, 0x15, 0x16, 0x17, |
||||
0x08, 0x09, 0x0A, 0x0B, |
||||
0x0C, 0x0D, 0x0E, 0x0F}; |
||||
} else { |
||||
fperm = (vec_u8){0x00, 0x01, 0x02, 0x03, |
||||
0x04, 0x05, 0x06, 0x07, |
||||
0x18, 0x19, 0x1A, 0x1B, |
||||
0x1C, 0x1D, 0x1E, 0x1F}; |
||||
} |
||||
|
||||
vsrcAuc = vec_ld(0, src); |
||||
|
||||
if (loadSecond) |
||||
vsrcBuc = vec_ld(16, src); |
||||
vsrcperm0 = vec_lvsl(0, src); |
||||
vsrcperm1 = vec_lvsl(1, src); |
||||
|
||||
vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0); |
||||
if (reallyBadAlign) |
||||
vsrc1uc = vsrcBuc; |
||||
else |
||||
vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1); |
||||
|
||||
vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc0uc); |
||||
vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc1uc); |
||||
|
||||
if (ABCD[3]) { |
||||
if (!loadSecond) {// -> !reallyBadAlign
|
||||
for (i = 0 ; i < h ; i++) { |
||||
vsrcCuc = vec_ld(stride + 0, src); |
||||
vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0); |
||||
vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1); |
||||
|
||||
CHROMA_MC8_ALTIVEC_CORE(v32ss, noop) |
||||
} |
||||
} else { |
||||
vec_u8 vsrcDuc; |
||||
for (i = 0 ; i < h ; i++) { |
||||
vsrcCuc = vec_ld(stride + 0, src); |
||||
vsrcDuc = vec_ld(stride + 16, src); |
||||
vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0); |
||||
if (reallyBadAlign) |
||||
vsrc3uc = vsrcDuc; |
||||
else |
||||
vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1); |
||||
|
||||
CHROMA_MC8_ALTIVEC_CORE(v32ss, noop) |
||||
} |
||||
} |
||||
} else { |
||||
const vec_s16 vE = vec_add(vB, vC); |
||||
if (ABCD[2]) { // x == 0 B == 0
|
||||
if (!loadSecond) {// -> !reallyBadAlign
|
||||
for (i = 0 ; i < h ; i++) { |
||||
vsrcCuc = vec_ld(stride + 0, src); |
||||
vsrc1uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0); |
||||
CHROMA_MC8_ALTIVEC_CORE_SIMPLE |
||||
|
||||
vsrc0uc = vsrc1uc; |
||||
} |
||||
} else { |
||||
vec_u8 vsrcDuc; |
||||
for (i = 0 ; i < h ; i++) { |
||||
vsrcCuc = vec_ld(stride + 0, src); |
||||
vsrcDuc = vec_ld(stride + 15, src); |
||||
vsrc1uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0); |
||||
CHROMA_MC8_ALTIVEC_CORE_SIMPLE |
||||
|
||||
vsrc0uc = vsrc1uc; |
||||
} |
||||
} |
||||
} else { // y == 0 C == 0
|
||||
if (!loadSecond) {// -> !reallyBadAlign
|
||||
for (i = 0 ; i < h ; i++) { |
||||
vsrcCuc = vec_ld(0, src); |
||||
vsrc0uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0); |
||||
vsrc1uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1); |
||||
|
||||
CHROMA_MC8_ALTIVEC_CORE_SIMPLE |
||||
} |
||||
} else { |
||||
vec_u8 vsrcDuc; |
||||
for (i = 0 ; i < h ; i++) { |
||||
vsrcCuc = vec_ld(0, src); |
||||
vsrcDuc = vec_ld(15, src); |
||||
vsrc0uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0); |
||||
if (reallyBadAlign) |
||||
vsrc1uc = vsrcDuc; |
||||
else |
||||
vsrc1uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1); |
||||
|
||||
CHROMA_MC8_ALTIVEC_CORE_SIMPLE |
||||
} |
||||
} |
||||
} |
||||
} |
||||
} |
||||
#endif |
||||
|
||||
/* this code assume that stride % 16 == 0 */ |
||||
#ifdef PREFIX_no_rnd_vc1_chroma_mc8_altivec |
||||
static void PREFIX_no_rnd_vc1_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) { |
||||
DECLARE_ALIGNED(16, signed int, ABCD)[4] = |
||||
{((8 - x) * (8 - y)), |
||||
(( x) * (8 - y)), |
||||
((8 - x) * ( y)), |
||||
(( x) * ( y))}; |
||||
register int i; |
||||
vec_u8 fperm; |
||||
const vec_s32 vABCD = vec_ld(0, ABCD); |
||||
const vec_s16 vA = vec_splat((vec_s16)vABCD, 1); |
||||
const vec_s16 vB = vec_splat((vec_s16)vABCD, 3); |
||||
const vec_s16 vC = vec_splat((vec_s16)vABCD, 5); |
||||
const vec_s16 vD = vec_splat((vec_s16)vABCD, 7); |
||||
LOAD_ZERO; |
||||
const vec_s16 v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4)); |
||||
const vec_u16 v6us = vec_splat_u16(6); |
||||
register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1; |
||||
register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0; |
||||
|
||||
vec_u8 vsrcAuc, av_uninit(vsrcBuc), vsrcperm0, vsrcperm1; |
||||
vec_u8 vsrc0uc, vsrc1uc; |
||||
vec_s16 vsrc0ssH, vsrc1ssH; |
||||
vec_u8 vsrcCuc, vsrc2uc, vsrc3uc; |
||||
vec_s16 vsrc2ssH, vsrc3ssH, psum; |
||||
vec_u8 vdst, ppsum, vfdst, fsum; |
||||
|
||||
if (((unsigned long)dst) % 16 == 0) { |
||||
fperm = (vec_u8){0x10, 0x11, 0x12, 0x13, |
||||
0x14, 0x15, 0x16, 0x17, |
||||
0x08, 0x09, 0x0A, 0x0B, |
||||
0x0C, 0x0D, 0x0E, 0x0F}; |
||||
} else { |
||||
fperm = (vec_u8){0x00, 0x01, 0x02, 0x03, |
||||
0x04, 0x05, 0x06, 0x07, |
||||
0x18, 0x19, 0x1A, 0x1B, |
||||
0x1C, 0x1D, 0x1E, 0x1F}; |
||||
} |
||||
|
||||
vsrcAuc = vec_ld(0, src); |
||||
|
||||
if (loadSecond) |
||||
vsrcBuc = vec_ld(16, src); |
||||
vsrcperm0 = vec_lvsl(0, src); |
||||
vsrcperm1 = vec_lvsl(1, src); |
||||
|
||||
vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0); |
||||
if (reallyBadAlign) |
||||
vsrc1uc = vsrcBuc; |
||||
else |
||||
vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1); |
||||
|
||||
vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v, (vec_u8)vsrc0uc); |
||||
vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v, (vec_u8)vsrc1uc); |
||||
|
||||
if (!loadSecond) {// -> !reallyBadAlign
|
||||
for (i = 0 ; i < h ; i++) { |
||||
|
||||
|
||||
vsrcCuc = vec_ld(stride + 0, src); |
||||
|
||||
vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0); |
||||
vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1); |
||||
|
||||
CHROMA_MC8_ALTIVEC_CORE(vec_splat_s16(0), add28) |
||||
} |
||||
} else { |
||||
vec_u8 vsrcDuc; |
||||
for (i = 0 ; i < h ; i++) { |
||||
vsrcCuc = vec_ld(stride + 0, src); |
||||
vsrcDuc = vec_ld(stride + 16, src); |
||||
|
||||
vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0); |
||||
if (reallyBadAlign) |
||||
vsrc3uc = vsrcDuc; |
||||
else |
||||
vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1); |
||||
|
||||
CHROMA_MC8_ALTIVEC_CORE(vec_splat_s16(0), add28) |
||||
} |
||||
} |
||||
} |
||||
#endif |
||||
|
||||
#undef noop |
||||
#undef add28 |
||||
#undef CHROMA_MC8_ALTIVEC_CORE |
@ -1,3 +1,5 @@ |
||||
OBJS += sh4/dsputil_align.o \
|
||||
sh4/dsputil_sh4.o \
|
||||
sh4/idct_sh4.o \
|
||||
|
||||
OBJS-$(CONFIG_H264CHROMA) += sh4/h264chroma_init.o \
|
||||
|
@ -0,0 +1,132 @@ |
||||
/*
|
||||
* aligned/packed access motion |
||||
* |
||||
* Copyright (c) 2001-2003 BERO <bero@geocities.co.jp> |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include <assert.h> |
||||
#include <stdint.h> |
||||
|
||||
#include "libavutil/attributes.h" |
||||
#include "libavcodec/h264chroma.h" |
||||
|
||||
#define H264_CHROMA_MC(OPNAME, OP)\ |
||||
static void OPNAME ## h264_chroma_mc2_sh4(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
|
||||
const int A=(8-x)*(8-y);\
|
||||
const int B=( x)*(8-y);\
|
||||
const int C=(8-x)*( y);\
|
||||
const int D=( x)*( y);\
|
||||
\
|
||||
assert(x<8 && y<8 && x>=0 && y>=0);\
|
||||
\
|
||||
do {\
|
||||
int t0,t1,t2,t3; \
|
||||
uint8_t *s0 = src; \
|
||||
uint8_t *s1 = src+stride; \
|
||||
t0 = *s0++; t2 = *s1++; \
|
||||
t1 = *s0++; t3 = *s1++; \
|
||||
OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
|
||||
t0 = *s0++; t2 = *s1++; \
|
||||
OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
|
||||
dst+= stride;\
|
||||
src+= stride;\
|
||||
}while(--h);\
|
||||
}\
|
||||
\
|
||||
static void OPNAME ## h264_chroma_mc4_sh4(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
|
||||
const int A=(8-x)*(8-y);\
|
||||
const int B=( x)*(8-y);\
|
||||
const int C=(8-x)*( y);\
|
||||
const int D=( x)*( y);\
|
||||
\
|
||||
assert(x<8 && y<8 && x>=0 && y>=0);\
|
||||
\
|
||||
do {\
|
||||
int t0,t1,t2,t3; \
|
||||
uint8_t *s0 = src; \
|
||||
uint8_t *s1 = src+stride; \
|
||||
t0 = *s0++; t2 = *s1++; \
|
||||
t1 = *s0++; t3 = *s1++; \
|
||||
OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
|
||||
t0 = *s0++; t2 = *s1++; \
|
||||
OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
|
||||
t1 = *s0++; t3 = *s1++; \
|
||||
OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\
|
||||
t0 = *s0++; t2 = *s1++; \
|
||||
OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\
|
||||
dst+= stride;\
|
||||
src+= stride;\
|
||||
}while(--h);\
|
||||
}\
|
||||
\
|
||||
static void OPNAME ## h264_chroma_mc8_sh4(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
|
||||
const int A=(8-x)*(8-y);\
|
||||
const int B=( x)*(8-y);\
|
||||
const int C=(8-x)*( y);\
|
||||
const int D=( x)*( y);\
|
||||
\
|
||||
assert(x<8 && y<8 && x>=0 && y>=0);\
|
||||
\
|
||||
do {\
|
||||
int t0,t1,t2,t3; \
|
||||
uint8_t *s0 = src; \
|
||||
uint8_t *s1 = src+stride; \
|
||||
t0 = *s0++; t2 = *s1++; \
|
||||
t1 = *s0++; t3 = *s1++; \
|
||||
OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
|
||||
t0 = *s0++; t2 = *s1++; \
|
||||
OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
|
||||
t1 = *s0++; t3 = *s1++; \
|
||||
OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\
|
||||
t0 = *s0++; t2 = *s1++; \
|
||||
OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\
|
||||
t1 = *s0++; t3 = *s1++; \
|
||||
OP(dst[4], (A*t0 + B*t1 + C*t2 + D*t3));\
|
||||
t0 = *s0++; t2 = *s1++; \
|
||||
OP(dst[5], (A*t1 + B*t0 + C*t3 + D*t2));\
|
||||
t1 = *s0++; t3 = *s1++; \
|
||||
OP(dst[6], (A*t0 + B*t1 + C*t2 + D*t3));\
|
||||
t0 = *s0++; t2 = *s1++; \
|
||||
OP(dst[7], (A*t1 + B*t0 + C*t3 + D*t2));\
|
||||
dst+= stride;\
|
||||
src+= stride;\
|
||||
}while(--h);\
|
||||
} |
||||
|
||||
#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1) |
||||
#define op_put(a, b) a = (((b) + 32)>>6) |
||||
|
||||
H264_CHROMA_MC(put_ , op_put) |
||||
H264_CHROMA_MC(avg_ , op_avg) |
||||
#undef op_avg |
||||
#undef op_put |
||||
|
||||
av_cold void ff_h264chroma_init_sh4(H264ChromaContext *c, int bit_depth) |
||||
{ |
||||
const int high_bit_depth = bit_depth > 8; |
||||
|
||||
if (!high_bit_depth) { |
||||
c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_sh4; |
||||
c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_sh4; |
||||
c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_sh4; |
||||
c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_sh4; |
||||
c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_sh4; |
||||
c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_sh4; |
||||
} |
||||
} |
@ -0,0 +1,116 @@ |
||||
/*
|
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include <stdint.h> |
||||
|
||||
#include "config.h" |
||||
#include "libavutil/cpu.h" |
||||
#include "libavutil/x86/cpu.h" |
||||
#include "libavcodec/h264chroma.h" |
||||
|
||||
void ff_put_h264_chroma_mc8_rnd_mmx (uint8_t *dst, uint8_t *src, |
||||
int stride, int h, int x, int y); |
||||
void ff_avg_h264_chroma_mc8_rnd_mmxext(uint8_t *dst, uint8_t *src, |
||||
int stride, int h, int x, int y); |
||||
void ff_avg_h264_chroma_mc8_rnd_3dnow(uint8_t *dst, uint8_t *src, |
||||
int stride, int h, int x, int y); |
||||
|
||||
void ff_put_h264_chroma_mc4_mmx (uint8_t *dst, uint8_t *src, |
||||
int stride, int h, int x, int y); |
||||
void ff_avg_h264_chroma_mc4_mmxext (uint8_t *dst, uint8_t *src, |
||||
int stride, int h, int x, int y); |
||||
void ff_avg_h264_chroma_mc4_3dnow (uint8_t *dst, uint8_t *src, |
||||
int stride, int h, int x, int y); |
||||
|
||||
void ff_put_h264_chroma_mc2_mmxext (uint8_t *dst, uint8_t *src, |
||||
int stride, int h, int x, int y); |
||||
void ff_avg_h264_chroma_mc2_mmxext (uint8_t *dst, uint8_t *src, |
||||
int stride, int h, int x, int y); |
||||
|
||||
void ff_put_h264_chroma_mc8_rnd_ssse3(uint8_t *dst, uint8_t *src, |
||||
int stride, int h, int x, int y); |
||||
void ff_put_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src, |
||||
int stride, int h, int x, int y); |
||||
|
||||
void ff_avg_h264_chroma_mc8_rnd_ssse3(uint8_t *dst, uint8_t *src, |
||||
int stride, int h, int x, int y); |
||||
void ff_avg_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src, |
||||
int stride, int h, int x, int y); |
||||
|
||||
#define CHROMA_MC(OP, NUM, DEPTH, OPT) \ |
||||
void ff_ ## OP ## _h264_chroma_mc ## NUM ## _ ## DEPTH ## _ ## OPT \
|
||||
(uint8_t *dst, uint8_t *src, \
|
||||
int stride, int h, int x, int y); |
||||
|
||||
CHROMA_MC(put, 2, 10, mmxext) |
||||
CHROMA_MC(avg, 2, 10, mmxext) |
||||
CHROMA_MC(put, 4, 10, mmxext) |
||||
CHROMA_MC(avg, 4, 10, mmxext) |
||||
CHROMA_MC(put, 8, 10, sse2) |
||||
CHROMA_MC(avg, 8, 10, sse2) |
||||
CHROMA_MC(put, 8, 10, avx) |
||||
CHROMA_MC(avg, 8, 10, avx) |
||||
|
||||
void ff_h264chroma_init_x86(H264ChromaContext *c, int bit_depth) |
||||
{ |
||||
int high_bit_depth = bit_depth > 8; |
||||
int mm_flags = av_get_cpu_flags(); |
||||
|
||||
if (EXTERNAL_MMX(mm_flags) && !high_bit_depth) { |
||||
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_mmx; |
||||
c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_mmx; |
||||
} |
||||
|
||||
if (EXTERNAL_AMD3DNOW(mm_flags) && !high_bit_depth) { |
||||
c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_3dnow; |
||||
c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_3dnow; |
||||
} |
||||
|
||||
if (EXTERNAL_MMXEXT(mm_flags) && !high_bit_depth) { |
||||
c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_mmxext; |
||||
c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_mmxext; |
||||
c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_mmxext; |
||||
c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_mmxext; |
||||
} |
||||
|
||||
if (EXTERNAL_MMXEXT(mm_flags) && bit_depth == 10) { |
||||
c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_10_mmxext; |
||||
c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_10_mmxext; |
||||
c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_10_mmxext; |
||||
c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_10_mmxext; |
||||
} |
||||
|
||||
if (EXTERNAL_SSE2(mm_flags) && bit_depth == 10) { |
||||
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_sse2; |
||||
c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_sse2; |
||||
} |
||||
|
||||
if (EXTERNAL_SSSE3(mm_flags) && !high_bit_depth) { |
||||
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_ssse3; |
||||
c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_ssse3; |
||||
c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_ssse3; |
||||
c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_ssse3; |
||||
} |
||||
|
||||
if (EXTERNAL_AVX(mm_flags) && bit_depth == 10) { |
||||
// AVX implies !cache64.
|
||||
// TODO: Port cache(32|64) detection from x264.
|
||||
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_avx; |
||||
c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_avx; |
||||
} |
||||
} |
Loading…
Reference in new issue