mirror of https://github.com/FFmpeg/FFmpeg.git
* commit 'e3fcb14347466095839c2a3c47ebecff02da891e': dsputil: Split off IDCT bits into their own context Conflicts: configure libavcodec/aic.c libavcodec/arm/Makefile libavcodec/arm/dsputil_init_arm.c libavcodec/arm/dsputil_init_armv6.c libavcodec/asvdec.c libavcodec/dnxhdenc.c libavcodec/dsputil.c libavcodec/dvdec.c libavcodec/dxva2_mpeg2.c libavcodec/intrax8.c libavcodec/mdec.c libavcodec/mjpegdec.c libavcodec/mjpegenc_common.h libavcodec/mpegvideo.c libavcodec/ppc/dsputil_altivec.h libavcodec/ppc/dsputil_ppc.c libavcodec/ppc/idctdsp.c libavcodec/x86/Makefile libavcodec/x86/dsputil_init.c libavcodec/x86/dsputil_mmx.c libavcodec/x86/dsputil_x86.h Merged-by: Michael Niedermayer <michaelni@gmx.at>pull/76/merge
commit
581b5f0b9b
84 changed files with 1332 additions and 996 deletions
@ -1,5 +1,5 @@ |
||||
@
|
||||
@ ARMv4 optimized DSP utils
|
||||
@ ARMv4-optimized IDCT functions
|
||||
@ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
|
||||
@
|
||||
@ This file is part of FFmpeg.
|
@ -0,0 +1,34 @@ |
||||
/*
|
||||
* Copyright (c) 2009 Mans Rullgard <mans@mansr.com> |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#ifndef AVCODEC_ARM_IDCTDSP_ARM_H |
||||
#define AVCODEC_ARM_IDCTDSP_ARM_H |
||||
|
||||
#include "libavcodec/avcodec.h" |
||||
#include "libavcodec/idctdsp.h" |
||||
|
||||
void ff_idctdsp_init_armv5te(IDCTDSPContext *c, AVCodecContext *avctx, |
||||
unsigned high_bit_depth); |
||||
void ff_idctdsp_init_armv6(IDCTDSPContext *c, AVCodecContext *avctx, |
||||
unsigned high_bit_depth); |
||||
void ff_idctdsp_init_neon(IDCTDSPContext *c, AVCodecContext *avctx, |
||||
unsigned high_bit_depth); |
||||
|
||||
#endif /* AVCODEC_ARM_IDCTDSP_ARM_H */ |
@ -0,0 +1,48 @@ |
||||
/* |
||||
* Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
|
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "libavutil/arm/asm.S" |
||||
|
||||
function ff_add_pixels_clamped_armv6, export=1 |
||||
push {r4-r8,lr} |
||||
mov r3, #8 |
||||
1: |
||||
ldm r0!, {r4,r5,r12,lr} |
||||
ldrd r6, r7, [r1] |
||||
pkhbt r8, r4, r5, lsl #16 |
||||
pkhtb r5, r5, r4, asr #16 |
||||
pkhbt r4, r12, lr, lsl #16 |
||||
pkhtb lr, lr, r12, asr #16 |
||||
pld [r1, r2] |
||||
uxtab16 r8, r8, r6 |
||||
uxtab16 r5, r5, r6, ror #8 |
||||
uxtab16 r4, r4, r7 |
||||
uxtab16 lr, lr, r7, ror #8 |
||||
usat16 r8, #8, r8 |
||||
usat16 r5, #8, r5 |
||||
usat16 r4, #8, r4 |
||||
usat16 lr, #8, lr |
||||
orr r6, r8, r5, lsl #8 |
||||
orr r7, r4, lr, lsl #8 |
||||
subs r3, r3, #1 |
||||
strd_post r6, r7, r1, r2 |
||||
bgt 1b |
||||
pop {r4-r8,pc} |
||||
endfunc |
@ -0,0 +1,98 @@ |
||||
/*
|
||||
* ARM-optimized IDCT functions |
||||
* Copyright (c) 2001 Lionel Ulmer |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include <stdint.h> |
||||
|
||||
#include "libavutil/attributes.h" |
||||
#include "libavutil/cpu.h" |
||||
#include "libavutil/arm/cpu.h" |
||||
#include "libavcodec/avcodec.h" |
||||
#include "libavcodec/idctdsp.h" |
||||
#include "idctdsp_arm.h" |
||||
|
||||
void ff_j_rev_dct_arm(int16_t *data); |
||||
void ff_simple_idct_arm(int16_t *data); |
||||
|
||||
/* XXX: local hack */ |
||||
static void (*ff_put_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size); |
||||
static void (*ff_add_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size); |
||||
|
||||
void ff_add_pixels_clamped_arm(const int16_t *block, uint8_t *dest, |
||||
int line_size); |
||||
|
||||
/* XXX: those functions should be suppressed ASAP when all IDCTs are
|
||||
* converted */ |
||||
static void j_rev_dct_arm_put(uint8_t *dest, int line_size, int16_t *block) |
||||
{ |
||||
ff_j_rev_dct_arm(block); |
||||
ff_put_pixels_clamped(block, dest, line_size); |
||||
} |
||||
|
||||
static void j_rev_dct_arm_add(uint8_t *dest, int line_size, int16_t *block) |
||||
{ |
||||
ff_j_rev_dct_arm(block); |
||||
ff_add_pixels_clamped(block, dest, line_size); |
||||
} |
||||
|
||||
static void simple_idct_arm_put(uint8_t *dest, int line_size, int16_t *block) |
||||
{ |
||||
ff_simple_idct_arm(block); |
||||
ff_put_pixels_clamped(block, dest, line_size); |
||||
} |
||||
|
||||
static void simple_idct_arm_add(uint8_t *dest, int line_size, int16_t *block) |
||||
{ |
||||
ff_simple_idct_arm(block); |
||||
ff_add_pixels_clamped(block, dest, line_size); |
||||
} |
||||
|
||||
av_cold void ff_idctdsp_init_arm(IDCTDSPContext *c, AVCodecContext *avctx, |
||||
unsigned high_bit_depth) |
||||
{ |
||||
int cpu_flags = av_get_cpu_flags(); |
||||
|
||||
ff_put_pixels_clamped = c->put_pixels_clamped; |
||||
ff_add_pixels_clamped = c->add_pixels_clamped; |
||||
|
||||
if (!avctx->lowres && !high_bit_depth) { |
||||
if (avctx->idct_algo == FF_IDCT_AUTO || |
||||
avctx->idct_algo == FF_IDCT_ARM) { |
||||
c->idct_put = j_rev_dct_arm_put; |
||||
c->idct_add = j_rev_dct_arm_add; |
||||
c->idct = ff_j_rev_dct_arm; |
||||
c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM; |
||||
} else if (avctx->idct_algo == FF_IDCT_SIMPLEARM) { |
||||
c->idct_put = simple_idct_arm_put; |
||||
c->idct_add = simple_idct_arm_add; |
||||
c->idct = ff_simple_idct_arm; |
||||
c->idct_permutation_type = FF_NO_IDCT_PERM; |
||||
} |
||||
} |
||||
|
||||
c->add_pixels_clamped = ff_add_pixels_clamped_arm; |
||||
|
||||
if (have_armv5te(cpu_flags)) |
||||
ff_idctdsp_init_armv5te(c, avctx, high_bit_depth); |
||||
if (have_armv6(cpu_flags)) |
||||
ff_idctdsp_init_armv6(c, avctx, high_bit_depth); |
||||
if (have_neon(cpu_flags)) |
||||
ff_idctdsp_init_neon(c, avctx, high_bit_depth); |
||||
} |
@ -0,0 +1,48 @@ |
||||
/*
|
||||
* Copyright (c) 2009 Mans Rullgard <mans@mansr.com> |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include <stdint.h> |
||||
|
||||
#include "libavutil/attributes.h" |
||||
#include "libavcodec/avcodec.h" |
||||
#include "libavcodec/idctdsp.h" |
||||
#include "idctdsp_arm.h" |
||||
|
||||
void ff_simple_idct_armv6(int16_t *data); |
||||
void ff_simple_idct_put_armv6(uint8_t *dest, int line_size, int16_t *data); |
||||
void ff_simple_idct_add_armv6(uint8_t *dest, int line_size, int16_t *data); |
||||
|
||||
void ff_add_pixels_clamped_armv6(const int16_t *block, uint8_t *pixels, |
||||
int line_size); |
||||
|
||||
av_cold void ff_idctdsp_init_armv6(IDCTDSPContext *c, AVCodecContext *avctx, |
||||
unsigned high_bit_depth) |
||||
{ |
||||
if (!avctx->lowres && !high_bit_depth) { |
||||
if (avctx->idct_algo == FF_IDCT_AUTO || |
||||
avctx->idct_algo == FF_IDCT_SIMPLEARMV6) { |
||||
c->idct_put = ff_simple_idct_put_armv6; |
||||
c->idct_add = ff_simple_idct_add_armv6; |
||||
c->idct = ff_simple_idct_armv6; |
||||
c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM; |
||||
} |
||||
} |
||||
c->add_pixels_clamped = ff_add_pixels_clamped_armv6; |
||||
} |
@ -1,5 +1,5 @@ |
||||
/* |
||||
* ARM NEON optimised DSP functions |
||||
* ARM-NEON-optimized IDCT functions |
||||
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
|
||||
* |
||||
* This file is part of FFmpeg. |
@ -0,0 +1,311 @@ |
||||
/*
|
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "config.h" |
||||
#include "libavutil/attributes.h" |
||||
#include "libavutil/common.h" |
||||
#include "avcodec.h" |
||||
#include "dct.h" |
||||
#include "faanidct.h" |
||||
#include "idctdsp.h" |
||||
#include "simple_idct.h" |
||||
|
||||
av_cold void ff_init_scantable(uint8_t *permutation, ScanTable *st, |
||||
const uint8_t *src_scantable) |
||||
{ |
||||
int i, end; |
||||
|
||||
st->scantable = src_scantable; |
||||
|
||||
for (i = 0; i < 64; i++) { |
||||
int j = src_scantable[i]; |
||||
st->permutated[i] = permutation[j]; |
||||
} |
||||
|
||||
end = -1; |
||||
for (i = 0; i < 64; i++) { |
||||
int j = st->permutated[i]; |
||||
if (j > end) |
||||
end = j; |
||||
st->raster_end[i] = end; |
||||
} |
||||
} |
||||
|
||||
av_cold void ff_init_scantable_permutation(uint8_t *idct_permutation, |
||||
int idct_permutation_type) |
||||
{ |
||||
int i; |
||||
|
||||
if (ARCH_X86) |
||||
if (ff_init_scantable_permutation_x86(idct_permutation, |
||||
idct_permutation_type)) |
||||
return; |
||||
|
||||
switch (idct_permutation_type) { |
||||
case FF_NO_IDCT_PERM: |
||||
for (i = 0; i < 64; i++) |
||||
idct_permutation[i] = i; |
||||
break; |
||||
case FF_LIBMPEG2_IDCT_PERM: |
||||
for (i = 0; i < 64; i++) |
||||
idct_permutation[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); |
||||
break; |
||||
case FF_TRANSPOSE_IDCT_PERM: |
||||
for (i = 0; i < 64; i++) |
||||
idct_permutation[i] = ((i & 7) << 3) | (i >> 3); |
||||
break; |
||||
case FF_PARTTRANS_IDCT_PERM: |
||||
for (i = 0; i < 64; i++) |
||||
idct_permutation[i] = (i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3); |
||||
break; |
||||
default: |
||||
av_log(NULL, AV_LOG_ERROR, |
||||
"Internal error, IDCT permutation not set\n"); |
||||
} |
||||
} |
||||
|
||||
static void put_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict pixels, |
||||
int line_size) |
||||
{ |
||||
int i; |
||||
|
||||
/* read the pixels */ |
||||
for (i = 0; i < 8; i++) { |
||||
pixels[0] = av_clip_uint8(block[0]); |
||||
pixels[1] = av_clip_uint8(block[1]); |
||||
pixels[2] = av_clip_uint8(block[2]); |
||||
pixels[3] = av_clip_uint8(block[3]); |
||||
pixels[4] = av_clip_uint8(block[4]); |
||||
pixels[5] = av_clip_uint8(block[5]); |
||||
pixels[6] = av_clip_uint8(block[6]); |
||||
pixels[7] = av_clip_uint8(block[7]); |
||||
|
||||
pixels += line_size; |
||||
block += 8; |
||||
} |
||||
} |
||||
|
||||
static void put_pixels_clamped4_c(const int16_t *block, uint8_t *av_restrict pixels, |
||||
int line_size) |
||||
{ |
||||
int i; |
||||
|
||||
/* read the pixels */ |
||||
for(i=0;i<4;i++) { |
||||
pixels[0] = av_clip_uint8(block[0]); |
||||
pixels[1] = av_clip_uint8(block[1]); |
||||
pixels[2] = av_clip_uint8(block[2]); |
||||
pixels[3] = av_clip_uint8(block[3]); |
||||
|
||||
pixels += line_size; |
||||
block += 8; |
||||
} |
||||
} |
||||
|
||||
static void put_pixels_clamped2_c(const int16_t *block, uint8_t *av_restrict pixels, |
||||
int line_size) |
||||
{ |
||||
int i; |
||||
|
||||
/* read the pixels */ |
||||
for(i=0;i<2;i++) { |
||||
pixels[0] = av_clip_uint8(block[0]); |
||||
pixels[1] = av_clip_uint8(block[1]); |
||||
|
||||
pixels += line_size; |
||||
block += 8; |
||||
} |
||||
} |
||||
|
||||
static void put_signed_pixels_clamped_c(const int16_t *block, |
||||
uint8_t *av_restrict pixels, |
||||
int line_size) |
||||
{ |
||||
int i, j; |
||||
|
||||
for (i = 0; i < 8; i++) { |
||||
for (j = 0; j < 8; j++) { |
||||
if (*block < -128) |
||||
*pixels = 0; |
||||
else if (*block > 127) |
||||
*pixels = 255; |
||||
else |
||||
*pixels = (uint8_t) (*block + 128); |
||||
block++; |
||||
pixels++; |
||||
} |
||||
pixels += (line_size - 8); |
||||
} |
||||
} |
||||
|
||||
static void add_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict pixels, |
||||
int line_size) |
||||
{ |
||||
int i; |
||||
|
||||
/* read the pixels */ |
||||
for (i = 0; i < 8; i++) { |
||||
pixels[0] = av_clip_uint8(pixels[0] + block[0]); |
||||
pixels[1] = av_clip_uint8(pixels[1] + block[1]); |
||||
pixels[2] = av_clip_uint8(pixels[2] + block[2]); |
||||
pixels[3] = av_clip_uint8(pixels[3] + block[3]); |
||||
pixels[4] = av_clip_uint8(pixels[4] + block[4]); |
||||
pixels[5] = av_clip_uint8(pixels[5] + block[5]); |
||||
pixels[6] = av_clip_uint8(pixels[6] + block[6]); |
||||
pixels[7] = av_clip_uint8(pixels[7] + block[7]); |
||||
pixels += line_size; |
||||
block += 8; |
||||
} |
||||
} |
||||
|
||||
static void add_pixels_clamped4_c(const int16_t *block, uint8_t *av_restrict pixels, |
||||
int line_size) |
||||
{ |
||||
int i; |
||||
|
||||
/* read the pixels */ |
||||
for(i=0;i<4;i++) { |
||||
pixels[0] = av_clip_uint8(pixels[0] + block[0]); |
||||
pixels[1] = av_clip_uint8(pixels[1] + block[1]); |
||||
pixels[2] = av_clip_uint8(pixels[2] + block[2]); |
||||
pixels[3] = av_clip_uint8(pixels[3] + block[3]); |
||||
pixels += line_size; |
||||
block += 8; |
||||
} |
||||
} |
||||
|
||||
static void add_pixels_clamped2_c(const int16_t *block, uint8_t *av_restrict pixels, |
||||
int line_size) |
||||
{ |
||||
int i; |
||||
|
||||
/* read the pixels */ |
||||
for(i=0;i<2;i++) { |
||||
pixels[0] = av_clip_uint8(pixels[0] + block[0]); |
||||
pixels[1] = av_clip_uint8(pixels[1] + block[1]); |
||||
pixels += line_size; |
||||
block += 8; |
||||
} |
||||
} |
||||
|
||||
static void jref_idct_put(uint8_t *dest, int line_size, int16_t *block) |
||||
{ |
||||
ff_j_rev_dct(block); |
||||
put_pixels_clamped_c(block, dest, line_size); |
||||
} |
||||
|
||||
static void jref_idct_add(uint8_t *dest, int line_size, int16_t *block) |
||||
{ |
||||
ff_j_rev_dct(block); |
||||
add_pixels_clamped_c(block, dest, line_size); |
||||
} |
||||
static void ff_jref_idct4_put(uint8_t *dest, int line_size, int16_t *block) |
||||
{ |
||||
ff_j_rev_dct4 (block); |
||||
put_pixels_clamped4_c(block, dest, line_size); |
||||
} |
||||
static void ff_jref_idct4_add(uint8_t *dest, int line_size, int16_t *block) |
||||
{ |
||||
ff_j_rev_dct4 (block); |
||||
add_pixels_clamped4_c(block, dest, line_size); |
||||
} |
||||
|
||||
static void ff_jref_idct2_put(uint8_t *dest, int line_size, int16_t *block) |
||||
{ |
||||
ff_j_rev_dct2 (block); |
||||
put_pixels_clamped2_c(block, dest, line_size); |
||||
} |
||||
static void ff_jref_idct2_add(uint8_t *dest, int line_size, int16_t *block) |
||||
{ |
||||
ff_j_rev_dct2 (block); |
||||
add_pixels_clamped2_c(block, dest, line_size); |
||||
} |
||||
|
||||
static void ff_jref_idct1_put(uint8_t *dest, int line_size, int16_t *block) |
||||
{ |
||||
dest[0] = av_clip_uint8((block[0] + 4)>>3); |
||||
} |
||||
static void ff_jref_idct1_add(uint8_t *dest, int line_size, int16_t *block) |
||||
{ |
||||
dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3)); |
||||
} |
||||
|
||||
av_cold void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx) |
||||
{ |
||||
const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8; |
||||
|
||||
if (avctx->lowres==1) { |
||||
c->idct_put = ff_jref_idct4_put; |
||||
c->idct_add = ff_jref_idct4_add; |
||||
c->idct = ff_j_rev_dct4; |
||||
c->idct_permutation_type = FF_NO_IDCT_PERM; |
||||
} else if (avctx->lowres==2) { |
||||
c->idct_put = ff_jref_idct2_put; |
||||
c->idct_add = ff_jref_idct2_add; |
||||
c->idct = ff_j_rev_dct2; |
||||
c->idct_permutation_type = FF_NO_IDCT_PERM; |
||||
} else if (avctx->lowres==3) { |
||||
c->idct_put = ff_jref_idct1_put; |
||||
c->idct_add = ff_jref_idct1_add; |
||||
c->idct = ff_j_rev_dct1; |
||||
c->idct_permutation_type = FF_NO_IDCT_PERM; |
||||
} else { |
||||
if (avctx->bits_per_raw_sample == 10) { |
||||
c->idct_put = ff_simple_idct_put_10; |
||||
c->idct_add = ff_simple_idct_add_10; |
||||
c->idct = ff_simple_idct_10; |
||||
c->idct_permutation_type = FF_NO_IDCT_PERM; |
||||
} else if (avctx->bits_per_raw_sample == 12) { |
||||
c->idct_put = ff_simple_idct_put_12; |
||||
c->idct_add = ff_simple_idct_add_12; |
||||
c->idct = ff_simple_idct_12; |
||||
c->idct_permutation_type = FF_NO_IDCT_PERM; |
||||
} else { |
||||
if (avctx->idct_algo == FF_IDCT_INT) { |
||||
c->idct_put = jref_idct_put; |
||||
c->idct_add = jref_idct_add; |
||||
c->idct = ff_j_rev_dct; |
||||
c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM; |
||||
} else if (avctx->idct_algo == FF_IDCT_FAAN) { |
||||
c->idct_put = ff_faanidct_put; |
||||
c->idct_add = ff_faanidct_add; |
||||
c->idct = ff_faanidct; |
||||
c->idct_permutation_type = FF_NO_IDCT_PERM; |
||||
} else { // accurate/default
|
||||
c->idct_put = ff_simple_idct_put_8; |
||||
c->idct_add = ff_simple_idct_add_8; |
||||
c->idct = ff_simple_idct_8; |
||||
c->idct_permutation_type = FF_NO_IDCT_PERM; |
||||
} |
||||
} |
||||
} |
||||
|
||||
c->put_pixels_clamped = put_pixels_clamped_c; |
||||
c->put_signed_pixels_clamped = put_signed_pixels_clamped_c; |
||||
c->add_pixels_clamped = add_pixels_clamped_c; |
||||
|
||||
if (ARCH_ARM) |
||||
ff_idctdsp_init_arm(c, avctx, high_bit_depth); |
||||
if (ARCH_PPC) |
||||
ff_idctdsp_init_ppc(c, avctx, high_bit_depth); |
||||
if (ARCH_X86) |
||||
ff_idctdsp_init_x86(c, avctx, high_bit_depth); |
||||
|
||||
ff_init_scantable_permutation(c->idct_permutation, |
||||
c->idct_permutation_type); |
||||
} |
@ -0,0 +1,104 @@ |
||||
/*
|
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#ifndef AVCODEC_IDCTDSP_H |
||||
#define AVCODEC_IDCTDSP_H |
||||
|
||||
#include <stdint.h> |
||||
|
||||
#include "avcodec.h" |
||||
|
||||
/**
|
||||
* Scantable. |
||||
*/ |
||||
typedef struct ScanTable { |
||||
const uint8_t *scantable; |
||||
uint8_t permutated[64]; |
||||
uint8_t raster_end[64]; |
||||
} ScanTable; |
||||
|
||||
void ff_init_scantable(uint8_t *permutation, ScanTable *st, |
||||
const uint8_t *src_scantable); |
||||
void ff_init_scantable_permutation(uint8_t *idct_permutation, |
||||
int idct_permutation_type); |
||||
int ff_init_scantable_permutation_x86(uint8_t *idct_permutation, |
||||
int idct_permutation_type); |
||||
|
||||
typedef struct IDCTDSPContext { |
||||
/* pixel ops : interface with DCT */ |
||||
void (*put_pixels_clamped)(const int16_t *block /* align 16 */, |
||||
uint8_t *pixels /* align 8 */, |
||||
int line_size); |
||||
void (*put_signed_pixels_clamped)(const int16_t *block /* align 16 */, |
||||
uint8_t *pixels /* align 8 */, |
||||
int line_size); |
||||
void (*add_pixels_clamped)(const int16_t *block /* align 16 */, |
||||
uint8_t *pixels /* align 8 */, |
||||
int line_size); |
||||
|
||||
void (*idct)(int16_t *block /* align 16 */); |
||||
|
||||
/**
|
||||
* block -> idct -> clip to unsigned 8 bit -> dest. |
||||
* (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...) |
||||
* @param line_size size in bytes of a horizontal line of dest |
||||
*/ |
||||
void (*idct_put)(uint8_t *dest /* align 8 */, |
||||
int line_size, int16_t *block /* align 16 */); |
||||
|
||||
/**
|
||||
* block -> idct -> add dest -> clip to unsigned 8 bit -> dest. |
||||
* @param line_size size in bytes of a horizontal line of dest |
||||
*/ |
||||
void (*idct_add)(uint8_t *dest /* align 8 */, |
||||
int line_size, int16_t *block /* align 16 */); |
||||
|
||||
/**
|
||||
* IDCT input permutation. |
||||
* Several optimized IDCTs need a permutated input (relative to the |
||||
* normal order of the reference IDCT). |
||||
* This permutation must be performed before the idct_put/add. |
||||
* Note, normally this can be merged with the zigzag/alternate scan<br> |
||||
* An example to avoid confusion: |
||||
* - (->decode coeffs -> zigzag reorder -> dequant -> reference IDCT -> ...) |
||||
* - (x -> reference DCT -> reference IDCT -> x) |
||||
* - (x -> reference DCT -> simple_mmx_perm = idct_permutation |
||||
* -> simple_idct_mmx -> x) |
||||
* - (-> decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant |
||||
* -> simple_idct_mmx -> ...) |
||||
*/ |
||||
uint8_t idct_permutation[64]; |
||||
int idct_permutation_type; |
||||
#define FF_NO_IDCT_PERM 1 |
||||
#define FF_LIBMPEG2_IDCT_PERM 2 |
||||
#define FF_SIMPLE_IDCT_PERM 3 |
||||
#define FF_TRANSPOSE_IDCT_PERM 4 |
||||
#define FF_PARTTRANS_IDCT_PERM 5 |
||||
#define FF_SSE2_IDCT_PERM 6 |
||||
} IDCTDSPContext; |
||||
|
||||
void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx); |
||||
|
||||
void ff_idctdsp_init_arm(IDCTDSPContext *c, AVCodecContext *avctx, |
||||
unsigned high_bit_depth); |
||||
void ff_idctdsp_init_ppc(IDCTDSPContext *c, AVCodecContext *avctx, |
||||
unsigned high_bit_depth); |
||||
void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx, |
||||
unsigned high_bit_depth); |
||||
|
||||
#endif /* AVCODEC_IDCTDSP_H */ |
@ -0,0 +1,33 @@ |
||||
/*
|
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#ifndef AVCODEC_X86_IDCTDSP_H |
||||
#define AVCODEC_X86_IDCTDSP_H |
||||
|
||||
#include <stdint.h> |
||||
|
||||
void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, |
||||
int line_size); |
||||
void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, |
||||
int line_size); |
||||
void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, |
||||
int line_size); |
||||
void ff_put_signed_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels, |
||||
int line_size); |
||||
|
||||
#endif /* AVCODEC_X86_IDCTDSP_H */ |
@ -0,0 +1,112 @@ |
||||
/*
|
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "config.h" |
||||
#include "libavutil/attributes.h" |
||||
#include "libavutil/cpu.h" |
||||
#include "libavutil/x86/cpu.h" |
||||
#include "libavcodec/avcodec.h" |
||||
#include "libavcodec/idctdsp.h" |
||||
#include "libavcodec/simple_idct.h" |
||||
#include "idct_xvid.h" |
||||
#include "idctdsp.h" |
||||
|
||||
/* Input permutation for the simple_idct_mmx */ |
||||
static const uint8_t simple_mmx_permutation[64] = { |
||||
0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, |
||||
0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, |
||||
0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, |
||||
0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, |
||||
0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, |
||||
0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, |
||||
0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, |
||||
0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, |
||||
}; |
||||
|
||||
static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 }; |
||||
|
||||
av_cold int ff_init_scantable_permutation_x86(uint8_t *idct_permutation, |
||||
int idct_permutation_type) |
||||
{ |
||||
int i; |
||||
|
||||
switch (idct_permutation_type) { |
||||
case FF_SIMPLE_IDCT_PERM: |
||||
for (i = 0; i < 64; i++) |
||||
idct_permutation[i] = simple_mmx_permutation[i]; |
||||
return 1; |
||||
case FF_SSE2_IDCT_PERM: |
||||
for (i = 0; i < 64; i++) |
||||
idct_permutation[i] = (i & 0x38) | idct_sse2_row_perm[i & 7]; |
||||
return 1; |
||||
} |
||||
|
||||
return 0; |
||||
} |
||||
|
||||
av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx, |
||||
unsigned high_bit_depth) |
||||
{ |
||||
int cpu_flags = av_get_cpu_flags(); |
||||
|
||||
if (INLINE_MMX(cpu_flags)) { |
||||
c->put_pixels_clamped = ff_put_pixels_clamped_mmx; |
||||
c->add_pixels_clamped = ff_add_pixels_clamped_mmx; |
||||
|
||||
if (avctx->lowres == 0 && !high_bit_depth) { |
||||
switch (avctx->idct_algo) { |
||||
case FF_IDCT_AUTO: |
||||
case FF_IDCT_SIMPLEAUTO: |
||||
case FF_IDCT_SIMPLEMMX: |
||||
c->idct_put = ff_simple_idct_put_mmx; |
||||
c->idct_add = ff_simple_idct_add_mmx; |
||||
c->idct = ff_simple_idct_mmx; |
||||
c->idct_permutation_type = FF_SIMPLE_IDCT_PERM; |
||||
break; |
||||
case FF_IDCT_XVIDMMX: |
||||
c->idct_put = ff_idct_xvid_mmx_put; |
||||
c->idct_add = ff_idct_xvid_mmx_add; |
||||
c->idct = ff_idct_xvid_mmx; |
||||
break; |
||||
} |
||||
} |
||||
} |
||||
if (EXTERNAL_MMX(cpu_flags)) { |
||||
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx; |
||||
} |
||||
|
||||
if (INLINE_MMXEXT(cpu_flags)) { |
||||
if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX && avctx->lowres == 0) { |
||||
c->idct_put = ff_idct_xvid_mmxext_put; |
||||
c->idct_add = ff_idct_xvid_mmxext_add; |
||||
c->idct = ff_idct_xvid_mmxext; |
||||
} |
||||
} |
||||
|
||||
if (INLINE_SSE2(cpu_flags)) { |
||||
if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX && avctx->lowres == 0) { |
||||
c->idct_put = ff_idct_xvid_sse2_put; |
||||
c->idct_add = ff_idct_xvid_sse2_add; |
||||
c->idct = ff_idct_xvid_sse2; |
||||
c->idct_permutation_type = FF_SSE2_IDCT_PERM; |
||||
} |
||||
} |
||||
if (EXTERNAL_SSE2(cpu_flags)) { |
||||
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_sse2; |
||||
} |
||||
} |
@ -0,0 +1,133 @@ |
||||
/*
|
||||
* SIMD-optimized IDCT-related routines |
||||
* Copyright (c) 2000, 2001 Fabrice Bellard |
||||
* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
||||
* |
||||
* MMX optimization by Nick Kurshev <nickols_k@mail.ru> |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "config.h" |
||||
#include "libavutil/cpu.h" |
||||
#include "libavutil/x86/asm.h" |
||||
#include "idctdsp.h" |
||||
#include "inline_asm.h" |
||||
|
||||
#if HAVE_INLINE_ASM |
||||
|
||||
void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, |
||||
int line_size) |
||||
{ |
||||
const int16_t *p; |
||||
uint8_t *pix; |
||||
|
||||
/* read the pixels */ |
||||
p = block; |
||||
pix = pixels; |
||||
/* unrolled loop */ |
||||
__asm__ volatile ( |
||||
"movq (%3), %%mm0 \n\t" |
||||
"movq 8(%3), %%mm1 \n\t" |
||||
"movq 16(%3), %%mm2 \n\t" |
||||
"movq 24(%3), %%mm3 \n\t" |
||||
"movq 32(%3), %%mm4 \n\t" |
||||
"movq 40(%3), %%mm5 \n\t" |
||||
"movq 48(%3), %%mm6 \n\t" |
||||
"movq 56(%3), %%mm7 \n\t" |
||||
"packuswb %%mm1, %%mm0 \n\t" |
||||
"packuswb %%mm3, %%mm2 \n\t" |
||||
"packuswb %%mm5, %%mm4 \n\t" |
||||
"packuswb %%mm7, %%mm6 \n\t" |
||||
"movq %%mm0, (%0) \n\t" |
||||
"movq %%mm2, (%0, %1) \n\t" |
||||
"movq %%mm4, (%0, %1, 2) \n\t" |
||||
"movq %%mm6, (%0, %2) \n\t" |
||||
:: "r" (pix), "r" ((x86_reg) line_size), "r" ((x86_reg) line_size * 3), |
||||
"r" (p) |
||||
: "memory"); |
||||
pix += line_size * 4; |
||||
p += 32; |
||||
|
||||
// if here would be an exact copy of the code above
|
||||
// compiler would generate some very strange code
|
||||
// thus using "r"
|
||||
__asm__ volatile ( |
||||
"movq (%3), %%mm0 \n\t" |
||||
"movq 8(%3), %%mm1 \n\t" |
||||
"movq 16(%3), %%mm2 \n\t" |
||||
"movq 24(%3), %%mm3 \n\t" |
||||
"movq 32(%3), %%mm4 \n\t" |
||||
"movq 40(%3), %%mm5 \n\t" |
||||
"movq 48(%3), %%mm6 \n\t" |
||||
"movq 56(%3), %%mm7 \n\t" |
||||
"packuswb %%mm1, %%mm0 \n\t" |
||||
"packuswb %%mm3, %%mm2 \n\t" |
||||
"packuswb %%mm5, %%mm4 \n\t" |
||||
"packuswb %%mm7, %%mm6 \n\t" |
||||
"movq %%mm0, (%0) \n\t" |
||||
"movq %%mm2, (%0, %1) \n\t" |
||||
"movq %%mm4, (%0, %1, 2) \n\t" |
||||
"movq %%mm6, (%0, %2) \n\t" |
||||
:: "r" (pix), "r" ((x86_reg) line_size), "r" ((x86_reg) line_size * 3), |
||||
"r" (p) |
||||
: "memory"); |
||||
} |
||||
|
||||
void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, |
||||
int line_size) |
||||
{ |
||||
const int16_t *p; |
||||
uint8_t *pix; |
||||
int i; |
||||
|
||||
/* read the pixels */ |
||||
p = block; |
||||
pix = pixels; |
||||
MOVQ_ZERO(mm7); |
||||
i = 4; |
||||
do { |
||||
__asm__ volatile ( |
||||
"movq (%2), %%mm0 \n\t" |
||||
"movq 8(%2), %%mm1 \n\t" |
||||
"movq 16(%2), %%mm2 \n\t" |
||||
"movq 24(%2), %%mm3 \n\t" |
||||
"movq %0, %%mm4 \n\t" |
||||
"movq %1, %%mm6 \n\t" |
||||
"movq %%mm4, %%mm5 \n\t" |
||||
"punpcklbw %%mm7, %%mm4 \n\t" |
||||
"punpckhbw %%mm7, %%mm5 \n\t" |
||||
"paddsw %%mm4, %%mm0 \n\t" |
||||
"paddsw %%mm5, %%mm1 \n\t" |
||||
"movq %%mm6, %%mm5 \n\t" |
||||
"punpcklbw %%mm7, %%mm6 \n\t" |
||||
"punpckhbw %%mm7, %%mm5 \n\t" |
||||
"paddsw %%mm6, %%mm2 \n\t" |
||||
"paddsw %%mm5, %%mm3 \n\t" |
||||
"packuswb %%mm1, %%mm0 \n\t" |
||||
"packuswb %%mm3, %%mm2 \n\t" |
||||
"movq %%mm0, %0 \n\t" |
||||
"movq %%mm2, %1 \n\t" |
||||
: "+m" (*pix), "+m" (*(pix + line_size)) |
||||
: "r" (p) |
||||
: "memory"); |
||||
pix += line_size * 2; |
||||
p += 16; |
||||
} while (--i); |
||||
} |
||||
|
||||
#endif /* HAVE_INLINE_ASM */ |
Loading…
Reference in new issue